/*
 * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/*
 * This file was originally generated by JSLC
 * and then hand edited for performance.
 */

#include <jni.h>
#include <math.h>
#include "SSEUtils.h"
#include "com_sun_scenario_effect_impl_sw_sse_SSELinearConvolvePeer.h"

#define cmin 1.0f
#define cmax (255.0f - 1.0f/32.0f)

#define fvaltobyte(f) (((f) < cmin) ? 0 : (((f) > cmax) ? 255 : ((jint) (f))))

JNIEXPORT void JNICALL
Java_com_sun_scenario_effect_impl_sw_sse_SSELinearConvolvePeer_filterVector
    (JNIEnv *env, jobject lcpthis,
     jintArray dstPixels_arr, jint dstw, jint dsth, jint dstscan,
     jintArray srcPixels_arr, jint srcw, jint srch, jint srcscan,
     jfloatArray weights_arr, jint count,
     jfloat srcx0, jfloat srcy0,
     jfloat offsetx, jfloat offsety,
     jfloat deltax, jfloat deltay,
     jfloat dxcol, jfloat dycol, jfloat dxrow, jfloat dyrow)
{
    if (count > 128) return;
    jfloat weights[128];
    env->GetFloatArrayRegion(weights_arr, 0, count, weights);

    jint *srcPixels = (jint *)env->GetPrimitiveArrayCritical(srcPixels_arr, 0);
    if (srcPixels == NULL) return;
    jint *dstPixels = (jint *)env->GetPrimitiveArrayCritical(dstPixels_arr, 0);
    if (dstPixels == NULL) {
        env->ReleasePrimitiveArrayCritical(srcPixels_arr, srcPixels, JNI_ABORT);
        return;
    }

    jint dstrow = 0;
    // srcxy0 point at UL corner, shift them to center of 1st dest pixel:
    srcx0 += (dxrow + dxcol) * 0.5f;
    srcy0 += (dyrow + dycol) * 0.5f;
    for (jint dy = 0; dy < dsth; dy++) {
        jfloat srcx = srcx0;
        jfloat srcy = srcy0;
        for (jint dx = 0; dx < dstw; dx++) {
            jfloat fvals[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
            jfloat sampx = srcx + offsetx;
            jfloat sampy = srcy + offsety;
            for (jint i = 0; i < count; ++i) {
                laccumsample(srcPixels, sampx, sampy, srcw, srch, srcscan,
                             weights[i], fvals);
                sampx += deltax;
                sampy += deltay;
            }
            dstPixels[dstrow + dx] =
                (fvaltobyte(fvals[FVAL_A]) << 24) +
                (fvaltobyte(fvals[FVAL_R]) << 16) +
                (fvaltobyte(fvals[FVAL_G]) <<  8) +
                (fvaltobyte(fvals[FVAL_B])      );
            srcx += dxcol;
            srcy += dycol;
        }
        srcx0 += dxrow;
        srcy0 += dyrow;
        dstrow += dstscan;
    }

    env->ReleasePrimitiveArrayCritical(dstPixels_arr, dstPixels, 0);
    env->ReleasePrimitiveArrayCritical(srcPixels_arr, srcPixels, JNI_ABORT);
}

/*
 * In the nomenclature of the argument list for this method, "row" refers
 * to the coordinate which increments once for each new stream of single
 * axis data that we are blurring in a single pass.  And "col" refers to
 * the other coordinate that increments along the row.
 * Rows are horizontal in the first pass and vertical in the second pass.
 * Cols are vice versa.
 */
JNIEXPORT void JNICALL
Java_com_sun_scenario_effect_impl_sw_sse_SSELinearConvolvePeer_filterHV
    (JNIEnv *env, jobject lcpthis,
     jintArray dstPixels_arr, jint dstcols, jint dstrows, jint dcolinc, jint drowinc,
     jintArray srcPixels_arr, jint srccols, jint srcrows, jint scolinc, jint srowinc,
     jfloatArray kvals_arr)
{
    jint kernelSize = env->GetArrayLength(kvals_arr) / 2;
    if (kernelSize > 128) return;
    jfloat kvals[256];
    env->GetFloatArrayRegion(kvals_arr, 0, kernelSize * 2, kvals);

    jint *srcPixels = (jint *)env->GetPrimitiveArrayCritical(srcPixels_arr, 0);
    if (srcPixels == NULL) return;
    jint *dstPixels = (jint *)env->GetPrimitiveArrayCritical(dstPixels_arr, 0);
    if (dstPixels == NULL) {
        env->ReleasePrimitiveArrayCritical(srcPixels_arr, srcPixels, JNI_ABORT);
        return;
    }

    // cvals stores the component values from the surrounding K pixels
    // from x-r to x+r
    jfloat cvals[128*4];
    jint dstrow = 0;
    jint srcrow = 0;
    for (jint r = 0; r < dstrows; r++) {
        jint dstoff = dstrow;
        jint srcoff = srcrow;
        // Must clear out the array at the start of every line
        // Might be able to rely on the fact that the previous line must
        // have run out of data towards the end of the scan line, though.
        for (jint i = 0; i < kernelSize*4; i++) {
            cvals[i] = 0.0f;
        }
        jint koff = kernelSize;
        for (jint c = 0; c < dstcols; c++) {
            // Load the data for this x location into the array.
            jint i = (kernelSize - koff) * 4;
            jint rgb = (c < srccols) ? srcPixels[srcoff] : 0;
            cvals[i+0] = (jfloat) ((rgb >> 24) & 0xff);
            cvals[i+1] = (jfloat) ((rgb >> 16) & 0xff);
            cvals[i+2] = (jfloat) ((rgb >>  8) & 0xff);
            cvals[i+3] = (jfloat) ((rgb      ) & 0xff);
            // Bump the koff to the next spot to align the coefficients.
            if (--koff <= 0) {
                koff += kernelSize;
            }
            jfloat suma = 0.0f;
            jfloat sumr = 0.0f;
            jfloat sumg = 0.0f;
            jfloat sumb = 0.0f;
            for (i = 0; i < kernelSize*4; i += 4) {
                jfloat factor = kvals[koff + (i>>2)];
                suma += cvals[i+0] * factor;
                sumr += cvals[i+1] * factor;
                sumg += cvals[i+2] * factor;
                sumb += cvals[i+3] * factor;
            }
            dstPixels[dstoff] =
                (((suma < cmin) ? 0 : ((suma > cmax) ? 255 : ((jint) suma))) << 24) +
                (((sumr < cmin) ? 0 : ((sumr > cmax) ? 255 : ((jint) sumr))) << 16) +
                (((sumg < cmin) ? 0 : ((sumg > cmax) ? 255 : ((jint) sumg))) <<  8) +
                (((sumb < cmin) ? 0 : ((sumb > cmax) ? 255 : ((jint) sumb)))      );
            dstoff += dcolinc;
            srcoff += scolinc;
        }
        dstrow += drowinc;
        srcrow += srowinc;
    }

    env->ReleasePrimitiveArrayCritical(dstPixels_arr, dstPixels, 0);
    env->ReleasePrimitiveArrayCritical(srcPixels_arr, srcPixels, JNI_ABORT);
}
