summaryrefslogtreecommitdiffstats
path: root/test/com/mbien/opencl/CLConcurrencyTest.java
diff options
context:
space:
mode:
Diffstat (limited to 'test/com/mbien/opencl/CLConcurrencyTest.java')
-rw-r--r--test/com/mbien/opencl/CLConcurrencyTest.java126
1 files changed, 120 insertions, 6 deletions
diff --git a/test/com/mbien/opencl/CLConcurrencyTest.java b/test/com/mbien/opencl/CLConcurrencyTest.java
index 057b57ee..dfaa4e49 100644
--- a/test/com/mbien/opencl/CLConcurrencyTest.java
+++ b/test/com/mbien/opencl/CLConcurrencyTest.java
@@ -1,7 +1,6 @@
package com.mbien.opencl;
import com.mbien.opencl.CLBuffer.Mem;
-import com.mbien.opencl.CLCommandQueue.Mode;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.junit.Test;
@@ -45,9 +44,8 @@ public class CLConcurrencyTest {
assertEquals(0, events.size());
- // asynchronous write of data to GPU device, blocking read later to get the computed results back.
- queue.putWriteBuffer(clBufferA, events, false) // write A
- .putWriteBuffer(clBufferB, events, false); // write B
+ queue.putWriteBuffer(clBufferA, false, events) // write A
+ .putWriteBuffer(clBufferB, false, events); // write B
assertEquals(2, events.size());
queue.putWaitForEvents(events);
@@ -56,10 +54,10 @@ public class CLConcurrencyTest {
assertEquals(0, events.size());
vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B
- queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256);
+ queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events);
vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B
- queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256);
+ queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events);
assertEquals(2, events.size());
queue.putWaitForEvent(events, 0)
@@ -80,4 +78,120 @@ public class CLConcurrencyTest {
}
+ @Test
+ public void concurrencyTest() throws IOException, InterruptedException {
+
+ out.println(" - - - queue synchronisation test - - - ");
+
+ final int elements = ONE_MB/SIZEOF_INT * 10; // 20MB per buffer
+
+ CLContext context = CLContext.create();
+
+ CLDevice[] devices = context.getCLDevices();
+
+ if(devices.length < 2) {
+ out.println("aborting test... need at least 2 devices");
+ context.release();
+ return;
+ }
+
+ final CLBuffer<ByteBuffer> clBufferC = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferD = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+
+ final CLBuffer<ByteBuffer> clBufferA1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferB1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferA2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferB2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+
+ CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build();
+
+ final CLKernel vectorAddKernel1 = program.getCLKernel("VectorAddGM")
+ .setArg(3, elements);
+
+ //TODO introduce public api for cloning/creating kernels
+ final CLKernel vectorAddKernel2 = vectorAddKernel1.copy()
+ .setArg(3, elements);
+
+
+ int secondDevice = devices.length > 1 ? 1 : 0;
+
+ final CLCommandQueue queue1 = devices[0 ].createCommandQueue();
+ final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue();
+
+ if(secondDevice > 0)
+ System.out.println("using two devices");
+
+ final QueueBarrier barrier = new QueueBarrier(2);
+
+ Thread thread1 = new Thread("C") {
+
+ @Override
+ public void run() {
+
+ fillBuffer(clBufferA1.buffer, 12345);
+ fillBuffer(clBufferB1.buffer, 67890);
+
+// System.out.println("C buffer");
+ queue1.putWriteBuffer(clBufferA1, false) // write A
+ .putWriteBuffer(clBufferB1, true); // write B
+
+// System.out.println("C args");
+ vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B
+
+// System.out.println("C kernels");
+ CLEventList events1 = new CLEventList(2);
+ queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, 256, events1)
+ .putReadBuffer(clBufferC, false, events1);
+
+ barrier.waitFor(queue1, events1);
+
+ }
+
+ };
+
+ Thread thread2 = new Thread("D") {
+
+ @Override
+ public void run() {
+
+ fillBuffer(clBufferA2.buffer, 12345);
+ fillBuffer(clBufferB2.buffer, 67890);
+
+// System.out.println("D buffer");
+ queue2.putWriteBuffer(clBufferA2, false) // write A
+ .putWriteBuffer(clBufferB2, true); // write B
+
+// System.out.println("D args");
+ vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B
+
+// System.out.println("D kernels");
+ CLEventList events2 = new CLEventList(2);
+ queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, 256, events2)
+ .putReadBuffer(clBufferD, false, events2);
+
+ barrier.waitFor(queue2, events2);
+
+ }
+
+ };
+
+ out.println("starting threads");
+ thread1.start();
+ thread2.start();
+ barrier.await();
+ out.println("done");
+
+ checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements);
+
+ context.release();
+
+// vectorAddKernel2.release();
+
+ out.println("results are valid");
+
+ }
+
+
+
+
} \ No newline at end of file