/*******************************************************************************
 * Copyright 2015 Intel Corporation.
 *
 *
 * This software and the related documents are Intel copyrighted materials, and your use of them is governed by
 * the express license under which they were provided to you ('License'). Unless the License provides otherwise,
 * you may not use, modify, copy, publish, distribute, disclose or transmit this software or the related
 * documents without Intel's prior written permission.
 * This software and the related documents are provided as is, with no express or implied warranties, other than
 * those that are expressly stated in the License.
 *******************************************************************************/

/*
//   Purpose: Functions of Sqr operation
//   Contents:
//       ippiSqr_16s_C1IRSfs_T
//		 ippiSqr_32s16s_C1RSfs_T
*/

#include "pisimplearithm_t.h"

static IppStatus ippiSqr_16s_C1IRSfs_T_Fun(int i, void *arg)
{
    ippiSimpleArithmetics_16s_T_Str *ts = (ippiSimpleArithmetics_16s_T_Str *)arg;

    Ipp16s *pSrcDst = ts->pDst;
    int srcDstStep = ts->dstStep;

    IppiSize roiSize;
    roiSize.width = ts->roiSize.width;
    roiSize.height = ts->roiSize.height;
    int scaleFactor = ts->scaleFactor;
    IppiPoint splitImage = ts->splitImage;
    IppiSize tileSize = ts->tileSize;
    IppiSize tailSize = ts->tailSize;

    Ipp16s *pSrcDstRoi;
    IppiPoint roiOffset = {0, 0};

    owniGetTileParamsByIndex_T(i, splitImage, tileSize, tailSize, &roiOffset, &roiSize);

    /* compute pointer to ROI */
    pSrcDstRoi = owniGetImagePointer_16s_C1(pSrcDst, srcDstStep, roiOffset.x, roiOffset.y);

    return ippiSqr_16s_C1IRSfs(pSrcDstRoi, srcDstStep, roiSize, scaleFactor);
}

IPPFUN(IppStatus, ippiSqr_16s_C1IRSfs_T, (Ipp16s * pSrcDst, int srcDstStep, IppiSize roiSize, int scaleFactor))
{
    IppStatus status = ippStsNoErr;

    if (pSrcDst == 0)
        return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0)
        return ippStsSizeErr;

    int numTiles = 0;
    int pixelSize = sizeof(Ipp16s);

    IppiPoint splitImage;
    IppiSize tileSize, tailSize;
    int minTileSize = IPP_MIN(IPP64_MIN_ADD_2D / pixelSize, IPP_MAX_32S / pixelSize);

    /* split the image to tiles */
    status = owniSplitUniform2D_T(roiSize, minTileSize, &splitImage, &tileSize, &tailSize);
    if (status != ippStsNoErr)
        return status;

    if (splitImage.x == 1 && splitImage.y == 1) {
        status = ippiSqr_16s_C1IRSfs(pSrcDst, srcDstStep, roiSize, scaleFactor);
    } else {
        numTiles = splitImage.x * splitImage.y;
        ippiSimpleArithmetics_16s_T_Str ts;
        simpleArithmeticsThreadingStructureEncode_16s((Ipp16s *)0, 0, (Ipp16s *)0, 0, pSrcDst, srcDstStep, roiSize, scaleFactor, splitImage, tileSize,
                                                      tailSize, &ts);
        status = ippParallelFor_T(numTiles, (void *)&ts, ippiSqr_16s_C1IRSfs_T_Fun);
    }

    return status;
}

static IppStatus ippiSqr_16s32s_C1RSfs_T_Fun(int i, void *arg)
{
    ippiSimpleArithmetics_16s32s_T_Str *ts = (ippiSimpleArithmetics_16s32s_T_Str *)arg;

    IppStatus status = ippStsNoErr;

    const Ipp16s *pSrc = ts->pSrc1;
    Ipp32s *pDst = ts->pDst;

    int j;
    int srcStep = ts->src1Step;
    int dstStep = ts->dstStep;
    int scaleFactor = ts->scaleFactor;

    IppiSize roiSize;
    IppiSize tileSize = ts->tileSize;
    IppiSize tailSize = ts->tailSize;

    roiSize.width = ts->roiSize.width;
    roiSize.height = ts->roiSize.height;

    IppiPoint splitImage = ts->splitImage;
    IppiPoint roiOffset = {0, 0};

    Ipp16s *pSrcRoi;
    Ipp16s *pLineSrc;

    Ipp32s *pDstRoi;
    Ipp32s *pLineDst;

    status = owniGetTileParamsByIndex_T(i, splitImage, tileSize, tailSize, &roiOffset, &roiSize);
    if (status != ippStsNoErr)
        return status;

    pSrcRoi = owniGetImagePointer_16s_C1(pSrc, srcStep, roiOffset.x, roiOffset.y);
    pDstRoi = owniGetImagePointer_32s_C1(pDst, dstStep, roiOffset.x, roiOffset.y);

    for (j = 0; j < roiSize.height; ++j) {
        pLineSrc = (Ipp16s *)((Ipp8u *)pSrcRoi + j * srcStep);
        pLineDst = (Ipp32s *)((Ipp8u *)pDstRoi + j * dstStep);

        status = ippsMul_16s32s_Sfs(pLineSrc, pLineSrc, pLineDst, roiSize.width, scaleFactor);
        if (status != ippStsNoErr)
            break;
    }

    return status;
}

IPPFUN(IppStatus, ippiSqr_16s32s_C1RSfs_T, (const Ipp16s *pSrc, int srcStep, Ipp32s *pDst, int dstStep, IppiSize roiSize, int scaleFactor))
{
    if (pSrc == NULL || pDst == NULL)
        return ippStsNullPtrErr;
    if (roiSize.width <= 0 || roiSize.height <= 0)
        return ippStsSizeErr;

    IppStatus status = ippStsNoErr;

    int numTiles;
    int pixelSize = sizeof(Ipp16s);
    int minTileSize = IPP_MIN(IPP64_MIN_ADD_2D / pixelSize, IPP_MAX_32S / pixelSize);

    IppiPoint splitImage;

    IppiSize tileSize;
    IppiSize tailSize;

    status = owniSplitUniform2D_T(roiSize, minTileSize, &splitImage, &tileSize, &tailSize);
    if (status != ippStsNoErr)
        return status;

    if (splitImage.x == 1 && splitImage.y == 1) {
        Ipp16s *pLineSrc;
        Ipp32s *pLineDst;

        for (int i = 0; i < roiSize.height; ++i) {
            pLineSrc = (Ipp16s *)((Ipp8u *)pSrc + i * srcStep);
            pLineDst = (Ipp32s *)((Ipp8u *)pDst + i * dstStep);

            status = ippsMul_16s32s_Sfs(pLineSrc, pLineSrc, pLineDst, roiSize.width, scaleFactor);
            if (status != 0)
                break;
        }
    } else {
        ippiSimpleArithmetics_16s32s_T_Str ts;

        numTiles = splitImage.x * splitImage.y;
        simpleArithmeticsThreadingStructureEncode_16s32s(pSrc, srcStep, pSrc, srcStep, pDst, dstStep, roiSize, scaleFactor, splitImage, tileSize,
                                                         tailSize, &ts);

        status = ippParallelFor_T(numTiles, (void *)&ts, ippiSqr_16s32s_C1RSfs_T_Fun);
    }

    return status;
}
