/*******************************************************************************
 * Copyright 2016 Intel Corporation.
 *
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them is governed by
 * the express license under which they were provided to you ('License'). Unless the License provides otherwise,
 * you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related
 * documents without Intel's prior written permission.
 * This software and the related documents are provided as is, with no express or implied warranties, other than
 * those that are expressly stated in the License.
 *******************************************************************************/

/* Intel(R) Integrated Performance Primitives (Intel(R) IPP) */

#include "prfiltermed_t.h"
static void ownrGetFilterMedianCubeSize(IpprVolume dstRoiSize, IpprVolume kernelVolume, Ipp32u numThreads, IpprVolume *pCubeVolume,
                                        IpprVolume *pLastVolume, IpprPoint *splitImage)
{
    IpprVolume cubeVolume;
    cubeVolume.width = dstRoiSize.width;
    cubeVolume.height = dstRoiSize.height;
    cubeVolume.depth = dstRoiSize.depth / (int)numThreads;
    if (cubeVolume.depth < TILE_S && TILE_S <= dstRoiSize.depth)
        cubeVolume.depth = TILE_S;
    (*splitImage).x = (*splitImage).y = (*splitImage).z = 1;
    if (((numThreads == 1) || (cubeVolume.depth < TILE_S)) && (dstRoiSize.depth)) {
        (*pLastVolume).width = (*pCubeVolume).width = dstRoiSize.width;
        (*pLastVolume).height = (*pCubeVolume).height = dstRoiSize.height;
        (*pLastVolume).depth = (*pCubeVolume).depth = dstRoiSize.depth;
    } else {
        cubeVolume.depth = TILE_S;
        cubeVolume.width = dstRoiSize.width;
        cubeVolume.height = dstRoiSize.height;
        /* split the volume to cubes */
        ipprSplitToCubes_T(dstRoiSize, cubeVolume, splitImage, pCubeVolume, pLastVolume);
    }
}

IPPFUN(IppStatus, ipprFilterMedianGetSize_T,
       (IpprVolume kernelVolume, IpprVolume dstRoiVolume, IppDataType dataType, int numChannels, int *pSpecSizeL, int *pBufferSize))
{
    Ipp32s numThreads;
    IpprVolume pTileSize = {0, 0, 0}, pLastSize = {0, 0, 0};
    int pSpecSize;
    IpprPoint splitImage = {0, 0, 0};
    IppStatus status = ippStsNoErr;
    int width = dstRoiVolume.width, pBufSize;
    int height = dstRoiVolume.height;
    int depth = dstRoiVolume.depth;

    if (pSpecSizeL == 0 || pBufferSize == 0)
        return ippStsNullPtrErr;
    if (width <= 0 || height <= 0 || depth <= 0)
        return ippStsSizeErr;
    if (kernelVolume.width <= 0 || kernelVolume.height <= 0 || kernelVolume.depth <= 0)
        return ippStsSizeErr;

    ippGetNumThreads_T(&numThreads);

    ownrGetFilterMedianCubeSize(dstRoiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;
    status = ipprFilterMedianGetSize(kernelVolume, pTileSize, dataType, numChannels, &pSpecSize, &pBufSize);
    status = ipprFilterMedianGetSize(kernelVolume, pLastSize, dataType, numChannels, &pSpecSize, &pBufSize);
    if (status >= 0) {
        *pSpecSizeL = pSpecSize + sizeof(FilterMedianInfo);
        *pBufferSize = pBufSize * ((int)numThreads);
    }
    return status;
}
IPPFUN(IppStatus, ipprFilterMedianInit_T, (IpprVolume kernelVolume, IppDataType dataType, int numChannels, IpprFilterMedianSpec_T *pSpecT))
{
    FilterMedianInfo *pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    IpprFilterMedianSpec *pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));

    if (pSpecT == 0)
        return ippStsNullPtrErr;
    pFilterMedianInfo->kernelVolume = kernelVolume;
    return ipprFilterMedianInit(kernelVolume, dataType, numChannels, pSpec);
}

IppStatus ipprFilterMedian_8u_C1V_T_Fun(int t, void *arg)
{
    ipprFilterMedian_8u_T_Str *ts = (ipprFilterMedian_8u_T_Str *)arg;
    const Ipp8u *pSrc = (const Ipp8u *)ts->pSrc; // const
    int srcPlaneStep = ts->srcPlaneStep;
    int srcStep = ts->srcStep;
    Ipp8u *pDst = ts->pDst;
    int dstPlaneStep = ts->dstPlaneStep;
    int dstStep = ts->dstStep;
    IpprBorderType border = ts->border;
    const Ipp8u *borderValue = ts->borderValue;
    const IpprFilterMedianSpec_T *pSpec = (const IpprFilterMedianSpec_T *)ts->pSpec; // const
    Ipp8u *pBuffer = ts->pBuffer;
    int bufSize = ts->bufSize;
    int numChannels = ts->numChannels;
    IpprPoint splitImage = ts->splitImage;
    IpprVolume pTileSize = ts->pTileSize;
    IpprVolume pLastSize = ts->pLastSize;

    IpprVolume roiSizeS;
    roiSizeS.depth = pTileSize.depth;
    int w, h, d;
    IpprBorderType borderTrd = border;
    IpprBorderType borderTrdW = borderTrd;
    IpprBorderType borderTrdD = borderTrd;
    Ipp8u *pSrcRoi;
    Ipp8u *pDstRoi;
    int threadIdx = 0;
    IppStatus status = ippStsOk;

    w = t % splitImage.x;
    h = (t % (splitImage.x * splitImage.y)) / splitImage.x;
    d = t / (splitImage.x * splitImage.y);

    pSrcRoi = (Ipp8u *)((Ipp8u *)(pSrc + w * pTileSize.width * numChannels) + h * pTileSize.height * srcStep + d * pTileSize.depth * srcPlaneStep);
    pDstRoi = (Ipp8u *)((Ipp8u *)(pDst + w * pTileSize.width * numChannels) + h * pTileSize.height * dstStep + d * pTileSize.depth * dstPlaneStep);
    roiSizeS.depth = pTileSize.depth;
    roiSizeS.height = pTileSize.height;
    roiSizeS.width = pTileSize.width;
    if (pLastSize.depth && (d == (int)(splitImage.z - 1)))
        roiSizeS.depth = pLastSize.depth;
    if (pLastSize.height && (h == (int)(splitImage.y - 1)))
        roiSizeS.height = pLastSize.height;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;

    if ((splitImage.y > 1)) {
        if (h == 0)
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1))
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemTop);
        else
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom | (int)ipprBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (w == 0)
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight);
        else if (w == (int)(splitImage.x - 1))
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemLeft);
        else
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight | (int)ipprBorderInMemLeft);
    }
    borderTrdD = borderTrdW;
    if ((splitImage.z > 1)) {
        if (d == 0)
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack);
        else if (d == (int)(splitImage.z - 1))
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemFront);
        else
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack | (int)ipprBorderInMemFront);
    }
    ippGetThreadIdx_T(&threadIdx);
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;
    status = ipprFilterMedian_8u_C1V(pSrcRoi, srcPlaneStep, srcStep, pDstRoi, dstPlaneStep, dstStep, roiSizeS, borderTrdD, borderValue,
                                     (IpprFilterMedianSpec *)pSpec, pBuf);
    return status;
}
IppStatus ipprFilterMedian_16u_C1V_T_Fun(int t, void *arg)
{
    ipprFilterMedian_16u_T_Str *ts = (ipprFilterMedian_16u_T_Str *)arg;
    const Ipp16u *pSrc = (const Ipp16u *)ts->pSrc; // const
    int srcPlaneStep = ts->srcPlaneStep;
    int srcStep = ts->srcStep;
    Ipp16u *pDst = ts->pDst;
    int dstPlaneStep = ts->dstPlaneStep;
    int dstStep = ts->dstStep;
    IpprBorderType border = ts->border;
    const Ipp16u *borderValue = ts->borderValue;
    const IpprFilterMedianSpec_T *pSpec = (const IpprFilterMedianSpec_T *)ts->pSpec; // const
    Ipp8u *pBuffer = ts->pBuffer;
    int bufSize = ts->bufSize;
    int numChannels = ts->numChannels;
    IpprPoint splitImage = ts->splitImage;
    IpprVolume pTileSize = ts->pTileSize;
    IpprVolume pLastSize = ts->pLastSize;

    IpprVolume roiSizeS;
    roiSizeS.depth = pTileSize.depth;
    int w, h, d;
    IpprBorderType borderTrd = border;
    IpprBorderType borderTrdW = borderTrd;
    IpprBorderType borderTrdD = borderTrd;
    Ipp16u *pSrcRoi;
    Ipp16u *pDstRoi;
    int threadIdx = 0;
    IppStatus status = ippStsOk;
    w = t % splitImage.x;
    h = (t % (splitImage.x * splitImage.y)) / splitImage.x;
    d = t / (splitImage.x * splitImage.y);

    pSrcRoi = (Ipp16u *)((Ipp8u *)(pSrc + w * pTileSize.width * numChannels) + h * pTileSize.height * srcStep + d * pTileSize.depth * srcPlaneStep);
    pDstRoi = (Ipp16u *)((Ipp8u *)(pDst + w * pTileSize.width * numChannels) + h * pTileSize.height * dstStep + d * pTileSize.depth * dstPlaneStep);

    roiSizeS.depth = pTileSize.depth;
    roiSizeS.height = pTileSize.height;
    roiSizeS.width = pTileSize.width;
    if (pLastSize.depth && (d == (int)(splitImage.z - 1)))
        roiSizeS.depth = pLastSize.depth;
    if (pLastSize.height && (h == (int)(splitImage.y - 1)))
        roiSizeS.height = pLastSize.height;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;

    if ((splitImage.y > 1)) {
        if (h == 0)
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1))
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemTop);
        else
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom | (int)ipprBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (w == 0)
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight);
        else if (w == (int)(splitImage.x - 1))
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemLeft);
        else
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight | (int)ipprBorderInMemLeft);
    }
    borderTrdD = borderTrdW;
    if ((splitImage.z > 1)) {
        if (d == 0)
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack);
        else if (d == (int)(splitImage.z - 1))
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemFront);
        else
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack | (int)ipprBorderInMemFront);
    }
    ippGetThreadIdx_T(&threadIdx);

    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;
    status = ipprFilterMedian_16u_C1V(pSrcRoi, srcPlaneStep, srcStep, pDstRoi, dstPlaneStep, dstStep, roiSizeS, borderTrdD, borderValue,
                                      (IpprFilterMedianSpec *)pSpec, pBuf);
    return status;
}
IppStatus ipprFilterMedian_16s_C1V_T_Fun(int t, void *arg)
{
    ipprFilterMedian_16s_T_Str *ts = (ipprFilterMedian_16s_T_Str *)arg;
    const Ipp16s *pSrc = (const Ipp16s *)ts->pSrc; // const
    int srcPlaneStep = ts->srcPlaneStep;
    int srcStep = ts->srcStep;
    Ipp16s *pDst = ts->pDst;
    int dstPlaneStep = ts->dstPlaneStep;
    int dstStep = ts->dstStep;
    IpprBorderType border = ts->border;
    const Ipp16s *borderValue = ts->borderValue;
    const IpprFilterMedianSpec_T *pSpec = (const IpprFilterMedianSpec_T *)ts->pSpec; // const
    Ipp8u *pBuffer = ts->pBuffer;
    int bufSize = ts->bufSize;
    int numChannels = ts->numChannels;
    IpprPoint splitImage = ts->splitImage;
    IpprVolume pTileSize = ts->pTileSize;
    IpprVolume pLastSize = ts->pLastSize;

    IpprVolume roiSizeS;
    roiSizeS.depth = pTileSize.depth;
    int w, h, d;
    IpprBorderType borderTrd = border;
    IpprBorderType borderTrdW = borderTrd;
    IpprBorderType borderTrdD = borderTrd;
    Ipp16s *pSrcRoi;
    Ipp16s *pDstRoi;
    int threadIdx = 0;
    IppStatus status = ippStsOk;

    w = t % splitImage.x;
    h = (t % (splitImage.x * splitImage.y)) / splitImage.x;
    d = t / (splitImage.x * splitImage.y);

    pSrcRoi = (Ipp16s *)((Ipp8u *)(pSrc + w * pTileSize.width * numChannels) + h * pTileSize.height * srcStep + d * pTileSize.depth * srcPlaneStep);
    pDstRoi = (Ipp16s *)((Ipp8u *)(pDst + w * pTileSize.width * numChannels) + h * pTileSize.height * dstStep + d * pTileSize.depth * dstPlaneStep);
    roiSizeS.depth = pTileSize.depth;
    roiSizeS.height = pTileSize.height;
    roiSizeS.width = pTileSize.width;
    if (pLastSize.depth && (d == (int)(splitImage.z - 1)))
        roiSizeS.depth = pLastSize.depth;
    if (pLastSize.height && (h == (int)(splitImage.y - 1)))
        roiSizeS.height = pLastSize.height;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;

    if ((splitImage.y > 1)) {
        if (h == 0)
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1))
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemTop);
        else
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom | (int)ipprBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (w == 0)
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight);
        else if (w == (int)(splitImage.x - 1))
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemLeft);
        else
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight | (int)ipprBorderInMemLeft);
    }
    borderTrdD = borderTrdW;
    if ((splitImage.z > 1)) {
        if (d == 0)
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack);
        else if (d == (int)(splitImage.z - 1))
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemFront);
        else
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack | (int)ipprBorderInMemFront);
    }
    ippGetThreadIdx_T(&threadIdx);
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;
    status = ipprFilterMedian_16s_C1V(pSrcRoi, srcPlaneStep, srcStep, pDstRoi, dstPlaneStep, dstStep, roiSizeS, borderTrdD, borderValue,
                                      (IpprFilterMedianSpec *)pSpec, pBuf);
    return status;
}
IppStatus ipprFilterMedian_32f_C1V_T_Fun(int t, void *arg)
{
    ipprFilterMedian_32f_T_Str *ts = (ipprFilterMedian_32f_T_Str *)arg;
    const Ipp32f *pSrc = (const Ipp32f *)ts->pSrc; // const
    int srcPlaneStep = ts->srcPlaneStep;
    int srcStep = ts->srcStep;
    Ipp32f *pDst = ts->pDst;
    int dstPlaneStep = ts->dstPlaneStep;
    int dstStep = ts->dstStep;
    IpprBorderType border = ts->border;
    const Ipp32f *borderValue = ts->borderValue;
    const IpprFilterMedianSpec_T *pSpec = (const IpprFilterMedianSpec_T *)ts->pSpec; // const
    Ipp8u *pBuffer = ts->pBuffer;
    int bufSize = ts->bufSize;
    int numChannels = ts->numChannels;
    IpprPoint splitImage = ts->splitImage;
    IpprVolume pTileSize = ts->pTileSize;
    IpprVolume pLastSize = ts->pLastSize;

    IpprVolume roiSizeS;
    roiSizeS.depth = pTileSize.depth;
    int w, h, d;
    IpprBorderType borderTrd = border;
    IpprBorderType borderTrdW = borderTrd;
    IpprBorderType borderTrdD = borderTrd;
    Ipp32f *pSrcRoi;
    Ipp32f *pDstRoi;
    int threadIdx = 0;
    IppStatus status = ippStsOk;

    w = t % splitImage.x;
    h = (t % (splitImage.x * splitImage.y)) / splitImage.x;
    d = t / (splitImage.x * splitImage.y);

    pSrcRoi = (Ipp32f *)((Ipp8u *)(pSrc + w * pTileSize.width * numChannels) + h * pTileSize.height * srcStep + d * pTileSize.depth * srcPlaneStep);
    pDstRoi = (Ipp32f *)((Ipp8u *)(pDst + w * pTileSize.width * numChannels) + h * pTileSize.height * dstStep + d * pTileSize.depth * dstPlaneStep);
    roiSizeS.depth = pTileSize.depth;
    roiSizeS.height = pTileSize.height;
    roiSizeS.width = pTileSize.width;
    if (pLastSize.depth && (d == (int)(splitImage.z - 1)))
        roiSizeS.depth = pLastSize.depth;
    if (pLastSize.height && (h == (int)(splitImage.y - 1)))
        roiSizeS.height = pLastSize.height;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;

    if ((splitImage.y > 1)) {
        if (h == 0)
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1))
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemTop);
        else
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom | (int)ipprBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (w == 0)
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight);
        else if (w == (int)(splitImage.x - 1))
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemLeft);
        else
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight | (int)ipprBorderInMemLeft);
    }
    borderTrdD = borderTrdW;
    if ((splitImage.z > 1)) {
        if (d == 0)
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack);
        else if (d == (int)(splitImage.z - 1))
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemFront);
        else
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack | (int)ipprBorderInMemFront);
    }
    ippGetThreadIdx_T(&threadIdx);
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;
    status = ipprFilterMedian_32f_C1V(pSrcRoi, srcPlaneStep, srcStep, pDstRoi, dstPlaneStep, dstStep, roiSizeS, borderTrdD, borderValue,
                                      (IpprFilterMedianSpec *)pSpec, pBuf);
    return status;
}
IppStatus ipprFilterMedian_64f_C1V_T_Fun(int t, void *arg)
{
    ipprFilterMedian_64f_T_Str *ts = (ipprFilterMedian_64f_T_Str *)arg;
    const Ipp64f *pSrc = (const Ipp64f *)ts->pSrc; // const
    int srcPlaneStep = ts->srcPlaneStep;
    int srcStep = ts->srcStep;
    Ipp64f *pDst = ts->pDst;
    int dstPlaneStep = ts->dstPlaneStep;
    int dstStep = ts->dstStep;
    IpprBorderType border = ts->border;
    const Ipp64f *borderValue = ts->borderValue;
    const IpprFilterMedianSpec_T *pSpec = (const IpprFilterMedianSpec_T *)ts->pSpec; // const
    Ipp8u *pBuffer = ts->pBuffer;
    int bufSize = ts->bufSize;
    int numChannels = ts->numChannels;
    IpprPoint splitImage = ts->splitImage;
    IpprVolume pTileSize = ts->pTileSize;
    IpprVolume pLastSize = ts->pLastSize;

    IpprVolume roiSizeS;
    roiSizeS.depth = pTileSize.depth;
    int w, h, d;
    IpprBorderType borderTrd = border;
    IpprBorderType borderTrdW = borderTrd;
    IpprBorderType borderTrdD = borderTrd;
    Ipp64f *pSrcRoi;
    Ipp64f *pDstRoi;
    int threadIdx = 0;
    IppStatus status = ippStsOk;

    w = t % splitImage.x;
    h = (t % (splitImage.x * splitImage.y)) / splitImage.x;
    d = t / (splitImage.x * splitImage.y);

    pSrcRoi = (Ipp64f *)((Ipp8u *)(pSrc + w * pTileSize.width * numChannels) + h * pTileSize.height * srcStep + d * pTileSize.depth * srcPlaneStep);
    pDstRoi = (Ipp64f *)((Ipp8u *)(pDst + w * pTileSize.width * numChannels) + h * pTileSize.height * dstStep + d * pTileSize.depth * dstPlaneStep);
    roiSizeS.depth = pTileSize.depth;
    roiSizeS.height = pTileSize.height;
    roiSizeS.width = pTileSize.width;
    if (pLastSize.depth && (d == (int)(splitImage.z - 1)))
        roiSizeS.depth = pLastSize.depth;
    if (pLastSize.height && (h == (int)(splitImage.y - 1)))
        roiSizeS.height = pLastSize.height;
    if (pLastSize.width && (w == (int)(splitImage.x - 1)))
        roiSizeS.width = pLastSize.width;

    if ((splitImage.y > 1)) {
        if (h == 0)
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom);
        else if (h == (int)(splitImage.y - 1))
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemTop);
        else
            borderTrd = (IpprBorderType)((int)border | (int)ipprBorderInMemBottom | (int)ipprBorderInMemTop);
    }
    borderTrdW = borderTrd;
    if ((splitImage.x > 1)) {
        if (w == 0)
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight);
        else if (w == (int)(splitImage.x - 1))
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemLeft);
        else
            borderTrdW = (IpprBorderType)((int)borderTrd | (int)ipprBorderInMemRight | (int)ipprBorderInMemLeft);
    }
    borderTrdD = borderTrdW;
    if ((splitImage.z > 1)) {
        if (d == 0)
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack);
        else if (d == (int)(splitImage.z - 1))
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemFront);
        else
            borderTrdD = (IpprBorderType)((int)borderTrdW | (int)ipprBorderInMemBack | (int)ipprBorderInMemFront);
    }
    ippGetThreadIdx_T(&threadIdx);
    Ipp8u *pBuf = pBuffer + bufSize * threadIdx;
    status = ipprFilterMedian_64f_C1V(pSrcRoi, srcPlaneStep, srcStep, pDstRoi, dstPlaneStep, dstStep, roiSizeS, borderTrdD, borderValue,
                                      (IpprFilterMedianSpec *)pSpec, pBuf);
    return status;
}

IPPFUN(IppStatus, ipprFilterMedian_8u_C1V_T,
       (const Ipp8u *pSrc, int srcPlaneStep, int srcStep, Ipp8u *pDst, int dstPlaneStep, int dstStep, IpprVolume roiVolume, IpprBorderType borderType,
        const Ipp8u borderValue[1], const IpprFilterMedianSpec_T *pSpecT, Ipp8u *pBuffer))
{
    IppStatus statusAll;
    int numChannels = 1;
    Ipp32u numThreads = 1;

    FilterMedianInfo *pFilterMedianInfo; /* Bilateral Info structure */
    IpprFilterMedianSpec *pSpec;
    IpprVolume kernelVolume;
    IpprPoint splitImage = {1, 1, 1};
    IpprVolume pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)
        return ippStsNullPtrErr;
    if (roiVolume.width <= 0 || roiVolume.height <= 0 || roiVolume.depth <= 0)
        return ippStsSizeErr;
    if (pSpecT == 0 || pBuffer == 0)
        return ippStsNullPtrErr;

    pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));
    kernelVolume = pFilterMedianInfo->kernelVolume;
    statusAll = ippStsNoErr;
    splitImage.x = splitImage.y = splitImage.z = 0;

    ippGetNumThreads_T((int *)&numThreads);
    ownrGetFilterMedianCubeSize(roiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;

    if ((numThreads == 1) || (splitImage.x * splitImage.y * splitImage.z == 1)) {
        /* Intel IPP function call */
        statusAll =
            ipprFilterMedian_8u_C1V(pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, roiVolume, borderType, borderValue, pSpec, pBuffer);
    } else {
        int numTiles = splitImage.x * splitImage.y * splitImage.z;
        ipprFilterMedian_8u_T_Str ts;
        int specSize, bufSize;
        IppStatus status;
        status = ipprFilterMedianGetSize(kernelVolume, pLastSize, ipp8u, numChannels, &specSize, &bufSize);
        fMedThreadingStructureEncode_8u_T((const Ipp8u *)pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, borderType, borderValue,
                                          (IpprFilterMedianSpec *)pSpec, pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_T(numTiles, (void *)&ts, ipprFilterMedian_8u_C1V_T_Fun);
    }
    return statusAll;
}
IPPFUN(IppStatus, ipprFilterMedian_16u_C1V_T,
       (const Ipp16u *pSrc, int srcPlaneStep, int srcStep, Ipp16u *pDst, int dstPlaneStep, int dstStep, IpprVolume roiVolume,
        IpprBorderType borderType, const Ipp16u borderValue[1], const IpprFilterMedianSpec_T *pSpecT, Ipp8u *pBuffer))
{
    IppStatus statusAll;
    int numChannels = 1;
    Ipp32u numThreads = 1;

    FilterMedianInfo *pFilterMedianInfo; /* Bilateral Info structure */
    IpprFilterMedianSpec *pSpec;
    IpprVolume kernelVolume;
    IpprPoint splitImage = {1, 1, 1};
    IpprVolume pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)
        return ippStsNullPtrErr;
    if (roiVolume.width <= 0 || roiVolume.height <= 0 || roiVolume.depth <= 0)
        return ippStsSizeErr;
    if (pSpecT == 0 || pBuffer == 0)
        return ippStsNullPtrErr;

    pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));
    kernelVolume = pFilterMedianInfo->kernelVolume;
    statusAll = ippStsNoErr;
    splitImage.x = splitImage.y = splitImage.z = 0;

    ippGetNumThreads_T((int *)&numThreads);
    ownrGetFilterMedianCubeSize(roiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;

    if ((numThreads == 1) || (splitImage.x * splitImage.y * splitImage.z == 1)) {
        /* Intel IPP function call */
        statusAll =
            ipprFilterMedian_16u_C1V(pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, roiVolume, borderType, borderValue, pSpec, pBuffer);
    } else {
        int numTiles = splitImage.x * splitImage.y * splitImage.z;
        ipprFilterMedian_16u_T_Str ts;
        int specSize, bufSize;
        IppStatus status;
        status = ipprFilterMedianGetSize(kernelVolume, pLastSize, ipp16u, numChannels, &specSize, &bufSize);
        fMedThreadingStructureEncode_16u_T((const Ipp16u *)pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, borderType, borderValue, pSpec,
                                           pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_T(numTiles, (void *)&ts, ipprFilterMedian_16u_C1V_T_Fun);
    }
    return statusAll;
}
IPPFUN(IppStatus, ipprFilterMedian_16s_C1V_T,
       (const Ipp16s *pSrc, int srcPlaneStep, int srcStep, Ipp16s *pDst, int dstPlaneStep, int dstStep, IpprVolume roiVolume,
        IpprBorderType borderType, const Ipp16s borderValue[1], const IpprFilterMedianSpec_T *pSpecT, Ipp8u *pBuffer))
{
    IppStatus statusAll;
    int numChannels = 1;
    Ipp32u numThreads = 1;

    FilterMedianInfo *pFilterMedianInfo; /* Bilateral Info structure */
    IpprFilterMedianSpec *pSpec;
    IpprVolume kernelVolume;
    IpprPoint splitImage = {1, 1, 1};
    IpprVolume pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)
        return ippStsNullPtrErr;
    if (roiVolume.width <= 0 || roiVolume.height <= 0 || roiVolume.depth <= 0)
        return ippStsSizeErr;
    if (pSpecT == 0 || pBuffer == 0)
        return ippStsNullPtrErr;

    pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));
    kernelVolume = pFilterMedianInfo->kernelVolume;
    statusAll = ippStsNoErr;
    splitImage.x = splitImage.y = splitImage.z = 0;

    ippGetNumThreads_T((int *)&numThreads);
    ownrGetFilterMedianCubeSize(roiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;

    if ((numThreads == 1) || (splitImage.x * splitImage.y * splitImage.z == 1)) {
        /* Intel IPP function call */
        statusAll =
            ipprFilterMedian_16s_C1V(pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, roiVolume, borderType, borderValue, pSpec, pBuffer);
    } else {
        int numTiles = splitImage.x * splitImage.y * splitImage.z;
        ipprFilterMedian_16s_T_Str ts;
        int specSize, bufSize;
        IppStatus status;
        status = ipprFilterMedianGetSize(kernelVolume, pLastSize, ipp16s, numChannels, &specSize, &bufSize);
        fMedThreadingStructureEncode_16s_T((Ipp16s *)pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, borderType, borderValue, pSpec,
                                           pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_T(numTiles, (void *)&ts, ipprFilterMedian_16s_C1V_T_Fun);
    }
    return statusAll;
}
IPPFUN(IppStatus, ipprFilterMedian_32f_C1V_T,
       (const Ipp32f *pSrc, int srcPlaneStep, int srcStep, Ipp32f *pDst, int dstPlaneStep, int dstStep, IpprVolume roiVolume,
        IpprBorderType borderType, const Ipp32f borderValue[1], const IpprFilterMedianSpec_T *pSpecT, Ipp8u *pBuffer))
{
    IppStatus statusAll;
    int numChannels = 1;
    Ipp32u numThreads = 1;

    FilterMedianInfo *pFilterMedianInfo; /* Bilateral Info structure */
    IpprFilterMedianSpec *pSpec;
    IpprVolume kernelVolume;
    IpprPoint splitImage = {1, 1, 1};
    IpprVolume pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)
        return ippStsNullPtrErr;
    if (roiVolume.width <= 0 || roiVolume.height <= 0 || roiVolume.depth <= 0)
        return ippStsSizeErr;
    if (pSpecT == 0 || pBuffer == 0)
        return ippStsNullPtrErr;

    pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));
    kernelVolume = pFilterMedianInfo->kernelVolume;
    statusAll = ippStsNoErr;
    splitImage.x = splitImage.y = splitImage.z = 0;

    ippGetNumThreads_T((int *)&numThreads);
    ownrGetFilterMedianCubeSize(roiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;

    if ((numThreads == 1) || (splitImage.x * splitImage.y * splitImage.z == 1)) {
        /* Intel IPP function call */
        statusAll =
            ipprFilterMedian_32f_C1V(pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, roiVolume, borderType, borderValue, pSpec, pBuffer);
    } else {
        int numTiles = splitImage.x * splitImage.y * splitImage.z;
        ipprFilterMedian_32f_T_Str ts;
        int specSize, bufSize;
        IppStatus status;
        status = ipprFilterMedianGetSize(kernelVolume, pLastSize, ipp32f, numChannels, &specSize, &bufSize);
        fMedThreadingStructureEncode_32f_T((Ipp32f *)pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, borderType, borderValue, pSpec,
                                           pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_T(numTiles, (void *)&ts, ipprFilterMedian_32f_C1V_T_Fun);
    }
    return statusAll;
}
IPPFUN(IppStatus, ipprFilterMedian_64f_C1V_T,
       (const Ipp64f *pSrc, int srcPlaneStep, int srcStep, Ipp64f *pDst, int dstPlaneStep, int dstStep, IpprVolume roiVolume,
        IpprBorderType borderType, const Ipp64f borderValue[1], const IpprFilterMedianSpec_T *pSpecT, Ipp8u *pBuffer))
{
    IppStatus statusAll;
    int numChannels = 1;
    Ipp32u numThreads = 1;

    FilterMedianInfo *pFilterMedianInfo; /* Bilateral Info structure */
    IpprFilterMedianSpec *pSpec;
    IpprVolume kernelVolume;
    IpprPoint splitImage = {1, 1, 1};
    IpprVolume pTileSize, pLastSize;
    if (pSrc == 0 || pDst == 0)
        return ippStsNullPtrErr;
    if (roiVolume.width <= 0 || roiVolume.height <= 0 || roiVolume.depth <= 0)
        return ippStsSizeErr;
    if (pSpecT == 0 || pBuffer == 0)
        return ippStsNullPtrErr;

    pFilterMedianInfo = (FilterMedianInfo *)pSpecT;
    pSpec = (IpprFilterMedianSpec *)((Ipp8u *)pSpecT + sizeof(FilterMedianInfo));
    kernelVolume = pFilterMedianInfo->kernelVolume;
    statusAll = ippStsNoErr;
    splitImage.x = splitImage.y = splitImage.z = 0;

    ippGetNumThreads_T((int *)&numThreads);
    ownrGetFilterMedianCubeSize(roiVolume, kernelVolume, numThreads, &pTileSize, &pLastSize, &splitImage);
    if (pLastSize.width < pTileSize.width)
        pLastSize.width = pTileSize.width;
    if (pLastSize.height < pTileSize.height)
        pLastSize.height = pTileSize.height;
    if (pLastSize.depth < pTileSize.depth)
        pLastSize.depth = pTileSize.depth;

    if ((numThreads == 1) || (splitImage.x * splitImage.y * splitImage.z == 1)) {
        /* Intel IPP function call */
        statusAll =
            ipprFilterMedian_64f_C1V(pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, roiVolume, borderType, borderValue, pSpec, pBuffer);
    } else {
        int numTiles = splitImage.x * splitImage.y * splitImage.z;
        ipprFilterMedian_64f_T_Str ts;
        int specSize, bufSize;
        IppStatus status;
        status = ipprFilterMedianGetSize(kernelVolume, pLastSize, ipp64f, numChannels, &specSize, &bufSize);
        fMedThreadingStructureEncode_64f_T((const Ipp64f *)pSrc, srcPlaneStep, srcStep, pDst, dstPlaneStep, dstStep, borderType, borderValue, pSpec,
                                           pBuffer, bufSize, numChannels, splitImage, pTileSize, pLastSize, &ts);
        statusAll = ippParallelFor_T(numTiles, (void *)&ts, ipprFilterMedian_64f_C1V_T_Fun);
    }
    return statusAll;
}
