summaryrefslogtreecommitdiffstats
path: root/src/com
diff options
context:
space:
mode:
authorMichael Bien <[email protected]>2011-09-21 06:25:20 +0200
committerMichael Bien <[email protected]>2011-09-21 06:25:20 +0200
commitbaf07b12a2a62003334d17113e8dad1e92b80029 (patch)
tree7f1d345ed624bd38d909402051383cc1f43148b9 /src/com
parentccfc0b128c0eeee54ded44fc3700de54e9532213 (diff)
bugfixes for parallel reduction primitive
- wrong cache size - illegal read from cache on overflow -> read from gloabal mem
Diffstat (limited to 'src/com')
-rw-r--r--src/com/jogamp/opencl/util/pp/Reduction.java2
-rw-r--r--src/com/jogamp/opencl/util/pp/reduce.cl4
2 files changed, 3 insertions, 3 deletions
diff --git a/src/com/jogamp/opencl/util/pp/Reduction.java b/src/com/jogamp/opencl/util/pp/Reduction.java
index c2d47f7c..77a37e78 100644
--- a/src/com/jogamp/opencl/util/pp/Reduction.java
+++ b/src/com/jogamp/opencl/util/pp/Reduction.java
@@ -122,7 +122,7 @@ public class Reduction<B extends Buffer> implements CLResource {
int workItems = CLUtil.roundUp(realSize, groupSize*2) / 2;
int groups = workItems / groupSize;
- int sharedBufferSize = groupSize / 2 * ELEMENT.SIZE*VECTOR_SIZE;
+ int sharedBufferSize = groupSize * ELEMENT.SIZE*VECTOR_SIZE;
int outputSize = groups * ELEMENT.SIZE*VECTOR_SIZE;
diff --git a/src/com/jogamp/opencl/util/pp/reduce.cl b/src/com/jogamp/opencl/util/pp/reduce.cl
index d820ffe2..5f124d62 100644
--- a/src/com/jogamp/opencl/util/pp/reduce.cl
+++ b/src/com/jogamp/opencl/util/pp/reduce.cl
@@ -50,9 +50,9 @@ kernel void reduce(const global TYPE* input, global TYPE* output, local TYPE* sh
#elif OP_MUL
shared[localID] = 1;
#elif OP_MIN
- shared[localID] = shared[0];
+ shared[localID] = input[0];
#elif OP_MAX
- shared[localID] = shared[0];
+ shared[localID] = input[0];
#endif
}