diff options
author | Michael Bien <[email protected]> | 2011-09-21 06:25:20 +0200 |
---|---|---|
committer | Michael Bien <[email protected]> | 2011-09-21 06:25:20 +0200 |
commit | baf07b12a2a62003334d17113e8dad1e92b80029 (patch) | |
tree | 7f1d345ed624bd38d909402051383cc1f43148b9 /src/com | |
parent | ccfc0b128c0eeee54ded44fc3700de54e9532213 (diff) |
bugfixes for parallel reduction primitive
- wrong cache size
- illegal read from cache on overflow -> read from gloabal mem
Diffstat (limited to 'src/com')
-rw-r--r-- | src/com/jogamp/opencl/util/pp/Reduction.java | 2 | ||||
-rw-r--r-- | src/com/jogamp/opencl/util/pp/reduce.cl | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/src/com/jogamp/opencl/util/pp/Reduction.java b/src/com/jogamp/opencl/util/pp/Reduction.java index c2d47f7c..77a37e78 100644 --- a/src/com/jogamp/opencl/util/pp/Reduction.java +++ b/src/com/jogamp/opencl/util/pp/Reduction.java @@ -122,7 +122,7 @@ public class Reduction<B extends Buffer> implements CLResource { int workItems = CLUtil.roundUp(realSize, groupSize*2) / 2; int groups = workItems / groupSize; - int sharedBufferSize = groupSize / 2 * ELEMENT.SIZE*VECTOR_SIZE; + int sharedBufferSize = groupSize * ELEMENT.SIZE*VECTOR_SIZE; int outputSize = groups * ELEMENT.SIZE*VECTOR_SIZE; diff --git a/src/com/jogamp/opencl/util/pp/reduce.cl b/src/com/jogamp/opencl/util/pp/reduce.cl index d820ffe2..5f124d62 100644 --- a/src/com/jogamp/opencl/util/pp/reduce.cl +++ b/src/com/jogamp/opencl/util/pp/reduce.cl @@ -50,9 +50,9 @@ kernel void reduce(const global TYPE* input, global TYPE* output, local TYPE* sh #elif OP_MUL shared[localID] = 1; #elif OP_MIN - shared[localID] = shared[0]; + shared[localID] = input[0]; #elif OP_MAX - shared[localID] = shared[0]; + shared[localID] = input[0]; #endif } |