summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/com/jogamp/opencl/util/CLUtil.java12
-rw-r--r--src/com/jogamp/opencl/util/pp/Reduction.java100
-rw-r--r--test/com/jogamp/opencl/CLCommandQueueTest.java7
-rw-r--r--test/com/jogamp/opencl/HighLevelBindingTest.java3
-rw-r--r--test/com/jogamp/opencl/LowLevelBindingTest.java2
-rw-r--r--test/com/jogamp/opencl/TestUtils.java9
6 files changed, 100 insertions, 33 deletions
diff --git a/src/com/jogamp/opencl/util/CLUtil.java b/src/com/jogamp/opencl/util/CLUtil.java
index ff04f745..5c90be56 100644
--- a/src/com/jogamp/opencl/util/CLUtil.java
+++ b/src/com/jogamp/opencl/util/CLUtil.java
@@ -85,6 +85,18 @@ public class CLUtil {
}
/**
+ * Rounds the value up to the nearest multiple.
+ */
+ public static int roundUp(int value, int requiredMultiple) {
+ int r = value % requiredMultiple;
+ if (r == 0) {
+ return value;
+ } else {
+ return value + requiredMultiple - r;
+ }
+ }
+
+ /**
* Reads chars from input stream and puts them into the supplied StringBuilder.
* The stream is closed after successful or unsuccessful read.
*/
diff --git a/src/com/jogamp/opencl/util/pp/Reduction.java b/src/com/jogamp/opencl/util/pp/Reduction.java
index 690a573f..1e75819f 100644
--- a/src/com/jogamp/opencl/util/pp/Reduction.java
+++ b/src/com/jogamp/opencl/util/pp/Reduction.java
@@ -41,6 +41,8 @@ import com.jogamp.opencl.CLProgram;
import com.jogamp.opencl.CLResource;
import com.jogamp.opencl.CLWork.CLWork1D;
import com.jogamp.opencl.util.CLUtil;
+import com.jogamp.opencl.util.concurrent.CLQueueContext;
+import com.jogamp.opencl.util.concurrent.CLTask;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
@@ -98,6 +100,14 @@ public class Reduction<B extends Buffer> implements CLResource {
return new Reduction<B>(context, op, elementType);
}
+ public static <B extends Buffer> Reduction<B> create(CLCommandQueue queue, OP op, Class<B> elementType) {
+ return create(queue.getContext(), op, elementType);
+ }
+
+ public static <B extends Buffer> CLTask<CLReductionQueueContext<B>, B> createTask(B input, B output, OP op, Class<B> elementType) {
+ return new CLReductionTask<B>(input, output, op, elementType);
+ }
+
public B reduce(CLCommandQueue queue, B input, B output) {
int length = input.capacity();
@@ -109,7 +119,7 @@ public class Reduction<B extends Buffer> implements CLResource {
int groupSize = (int)reduction.getKernel().getWorkGroupSize(queue.getDevice());
int realSize = length / VECTOR_SIZE;
- int workItems = roundUp(realSize, groupSize*2) / 2;
+ int workItems = CLUtil.roundUp(realSize, groupSize*2) / 2;
int groups = workItems / groupSize;
int sharedBufferSize = groupSize / 2 * ELEMENT.SIZE*VECTOR_SIZE;
@@ -148,15 +158,6 @@ public class Reduction<B extends Buffer> implements CLResource {
return output;
}
-
- private static int roundUp(int globalSize, int groupSize) {
- int r = globalSize % groupSize;
- if (r == 0) {
- return globalSize;
- } else {
- return globalSize + groupSize - r;
- }
- }
private <B extends Buffer> void finishMax(B output, ByteBuffer buffer) {
if(output instanceof ByteBuffer) {
@@ -333,8 +334,13 @@ public class Reduction<B extends Buffer> implements CLResource {
return program == null || program.isReleased();
}
+ @Override
+ public String toString() {
+ return getClass().getSimpleName()+"["+OPERATION+", "+ELEMENT+VECTOR_SIZE+"]";
+ }
+
- public enum OP {ADD, MUL, MIN,MAX}
+ public enum OP {ADD, MUL, MIN, MAX}
private enum TYPE {
@@ -377,15 +383,71 @@ public class Reduction<B extends Buffer> implements CLResource {
}
}
}
+
+ private static class CLReductionTask<B extends Buffer> extends CLTask<CLReductionQueueContext<B>, B> {
+
+ private final B input;
+ private final B output;
+ private final OP op;
+ private final Class<B> elementType;
+ private final Integer KEY;
+
+ private CLReductionTask(B input, B output, OP op, Class<B> elementType) {
+ this.input = input;
+ this.output = output;
+ this.op = op;
+ this.elementType = elementType;
+ this.KEY = op.ordinal() + 100*TYPE.valueOf(elementType).ordinal();
+ }
- public static void main(String[] args) {
- int groupSize = 1024;
- int localID = 0;
- for(int i = groupSize >> 1; i > 0; i >>= 1) {
- if(localID < i) {
- System.out.println("op("+localID+", "+(localID+i)+")");
- }
- System.out.println("sync "+i);
+ @Override
+ public CLReductionQueueContext<B> createQueueContext(CLCommandQueue queue) {
+ Reduction<B> reduction = Reduction.create(queue, op, elementType);
+ return new CLReductionQueueContext<B>(queue, reduction);
+ }
+
+ @Override
+ public B execute(CLReductionQueueContext<B> context) {
+ return context.reduction.reduce(context.queue, input, output);
+ }
+
+ @Override
+ public Object getContextKey() {
+ return KEY;
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName()+"["+op+", "+elementType+", "+KEY+"]";
+ }
+ }
+
+ /**
+ * Context required for executing {@link Reduction} {@link CLTask}s.
+ * @author Michael Bien
+ */
+ public static class CLReductionQueueContext<B extends Buffer> extends CLQueueContext {
+
+ private final Reduction<B> reduction;
+
+ private CLReductionQueueContext(CLCommandQueue queue, Reduction<B> reduction) {
+ super(queue);
+ this.reduction = reduction;
+ }
+
+ @Override
+ public void release() {
+ reduction.release();
+ }
+
+ @Override
+ public boolean isReleased() {
+ return reduction.isReleased();
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName()+"["+reduction+"]";
}
}
diff --git a/test/com/jogamp/opencl/CLCommandQueueTest.java b/test/com/jogamp/opencl/CLCommandQueueTest.java
index f5438d40..7efc3aa0 100644
--- a/test/com/jogamp/opencl/CLCommandQueueTest.java
+++ b/test/com/jogamp/opencl/CLCommandQueueTest.java
@@ -51,6 +51,7 @@ import static com.jogamp.opencl.TestUtils.*;
import static com.jogamp.opencl.CLEvent.*;
import static com.jogamp.opencl.CLVersion.*;
import static com.jogamp.common.nio.Buffers.*;
+import static com.jogamp.opencl.util.CLUtil.*;
import static com.jogamp.opencl.CLCommandQueue.Mode.*;
/**
@@ -102,7 +103,7 @@ public class CLCommandQueueTest {
CLDevice device = context.getDevices()[0];
int groupSize = device.getMaxWorkItemSizes()[0];
- final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer
+ final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer
CLBuffer<ByteBuffer> clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
CLBuffer<ByteBuffer> clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
@@ -232,7 +233,7 @@ public class CLCommandQueueTest {
CLDevice device = context.getDevices()[0];
int groupSize = device.getMaxWorkItemSizes()[0];
- final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer
+ final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer
CLBuffer<ByteBuffer> clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
CLBuffer<ByteBuffer> clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
@@ -301,7 +302,7 @@ public class CLCommandQueueTest {
CLDevice device = context.getDevices()[0];
int groupSize = device.getMaxWorkItemSizes()[0];
- final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer
+ final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer
CLBuffer<ByteBuffer> clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
CLBuffer<ByteBuffer> clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY);
diff --git a/test/com/jogamp/opencl/HighLevelBindingTest.java b/test/com/jogamp/opencl/HighLevelBindingTest.java
index 449ccd7b..db1a9333 100644
--- a/test/com/jogamp/opencl/HighLevelBindingTest.java
+++ b/test/com/jogamp/opencl/HighLevelBindingTest.java
@@ -58,6 +58,7 @@ import static com.jogamp.opencl.util.CLPlatformFilters.*;
import static com.jogamp.opencl.CLVersion.*;
import static com.jogamp.opencl.CLDevice.Type.*;
import static com.jogamp.common.nio.Buffers.*;
+import static com.jogamp.opencl.util.CLUtil.*;
/**
* Test testing the high level bindings.
@@ -376,7 +377,7 @@ public class HighLevelBindingTest {
int elementCount = 11444777; // Length of float arrays to process (odd # for illustration)
int localWorkSize = device.getMaxWorkItemSizes()[0]; // set and log Global and Local work size dimensions
- int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize
+ int globalWorkSize = roundUp(elementCount, localWorkSize); // rounded up to the nearest multiple of the LocalWorkSize
out.println("allocateing buffers of size: "+globalWorkSize);
diff --git a/test/com/jogamp/opencl/LowLevelBindingTest.java b/test/com/jogamp/opencl/LowLevelBindingTest.java
index 655bfc64..2c54a5ad 100644
--- a/test/com/jogamp/opencl/LowLevelBindingTest.java
+++ b/test/com/jogamp/opencl/LowLevelBindingTest.java
@@ -267,7 +267,7 @@ public class LowLevelBindingTest {
checkError("on clCreateCommandQueue", intBuffer.get(0));
int localWorkSize = Math.min(128, maxWGS); // set and log Global and Local work size dimensions
- int globalWorkSize = roundUp(localWorkSize, ELEMENT_COUNT); // rounded up to the nearest multiple of the LocalWorkSize
+ int globalWorkSize = roundUp(ELEMENT_COUNT, localWorkSize); // rounded up to the nearest multiple of the LocalWorkSize
out.println("allocateing buffers of size: "+globalWorkSize);
diff --git a/test/com/jogamp/opencl/TestUtils.java b/test/com/jogamp/opencl/TestUtils.java
index efe6855e..e7e5fabe 100644
--- a/test/com/jogamp/opencl/TestUtils.java
+++ b/test/com/jogamp/opencl/TestUtils.java
@@ -69,15 +69,6 @@ public class TestUtils {
return buffer;
}
- public static int roundUp(int groupSize, int globalSize) {
- int r = globalSize % groupSize;
- if (r == 0) {
- return globalSize;
- } else {
- return globalSize + groupSize - r;
- }
- }
-
public static void checkIfEqual(ByteBuffer a, ByteBuffer b, int elements) {
for(int i = 0; i < elements; i++) {
int aVal = a.getInt();