summaryrefslogtreecommitdiffstats
path: root/test/com/mbien/opencl
diff options
context:
space:
mode:
authorMichael Bien <[email protected]>2010-01-14 17:38:34 +0100
committerMichael Bien <[email protected]>2010-01-14 17:38:34 +0100
commit9e650242da44a939e6a4c1e3c06d77c2e668a3e0 (patch)
treeb0272560299430beb79915c9f701994bc40a10d5 /test/com/mbien/opencl
parent9343c3ef5829f74207a8d220cb3b082211b910f2 (diff)
cleaned up NioDirectOnly list, added clSetKernelArg to list.
added experimental QueueBarrier for easy synchronization between multiple concurrent CLCommandQueues. refactored CLCommandQueue, added putTask(). added another concurrency JUnit test.
Diffstat (limited to 'test/com/mbien/opencl')
-rw-r--r--test/com/mbien/opencl/CLConcurrencyTest.java126
1 files changed, 120 insertions, 6 deletions
diff --git a/test/com/mbien/opencl/CLConcurrencyTest.java b/test/com/mbien/opencl/CLConcurrencyTest.java
index 057b57ee..dfaa4e49 100644
--- a/test/com/mbien/opencl/CLConcurrencyTest.java
+++ b/test/com/mbien/opencl/CLConcurrencyTest.java
@@ -1,7 +1,6 @@
package com.mbien.opencl;
import com.mbien.opencl.CLBuffer.Mem;
-import com.mbien.opencl.CLCommandQueue.Mode;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.junit.Test;
@@ -45,9 +44,8 @@ public class CLConcurrencyTest {
assertEquals(0, events.size());
- // asynchronous write of data to GPU device, blocking read later to get the computed results back.
- queue.putWriteBuffer(clBufferA, events, false) // write A
- .putWriteBuffer(clBufferB, events, false); // write B
+ queue.putWriteBuffer(clBufferA, false, events) // write A
+ .putWriteBuffer(clBufferB, false, events); // write B
assertEquals(2, events.size());
queue.putWaitForEvents(events);
@@ -56,10 +54,10 @@ public class CLConcurrencyTest {
assertEquals(0, events.size());
vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B
- queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256);
+ queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events);
vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B
- queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256);
+ queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events);
assertEquals(2, events.size());
queue.putWaitForEvent(events, 0)
@@ -80,4 +78,120 @@ public class CLConcurrencyTest {
}
+ @Test
+ public void concurrencyTest() throws IOException, InterruptedException {
+
+ out.println(" - - - queue synchronisation test - - - ");
+
+ final int elements = ONE_MB/SIZEOF_INT * 10; // 20MB per buffer
+
+ CLContext context = CLContext.create();
+
+ CLDevice[] devices = context.getCLDevices();
+
+ if(devices.length < 2) {
+ out.println("aborting test... need at least 2 devices");
+ context.release();
+ return;
+ }
+
+ final CLBuffer<ByteBuffer> clBufferC = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferD = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+
+ final CLBuffer<ByteBuffer> clBufferA1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferB1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferA2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+ final CLBuffer<ByteBuffer> clBufferB2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY);
+
+ CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build();
+
+ final CLKernel vectorAddKernel1 = program.getCLKernel("VectorAddGM")
+ .setArg(3, elements);
+
+ //TODO introduce public api for cloning/creating kernels
+ final CLKernel vectorAddKernel2 = vectorAddKernel1.copy()
+ .setArg(3, elements);
+
+
+ int secondDevice = devices.length > 1 ? 1 : 0;
+
+ final CLCommandQueue queue1 = devices[0 ].createCommandQueue();
+ final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue();
+
+ if(secondDevice > 0)
+ System.out.println("using two devices");
+
+ final QueueBarrier barrier = new QueueBarrier(2);
+
+ Thread thread1 = new Thread("C") {
+
+ @Override
+ public void run() {
+
+ fillBuffer(clBufferA1.buffer, 12345);
+ fillBuffer(clBufferB1.buffer, 67890);
+
+// System.out.println("C buffer");
+ queue1.putWriteBuffer(clBufferA1, false) // write A
+ .putWriteBuffer(clBufferB1, true); // write B
+
+// System.out.println("C args");
+ vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B
+
+// System.out.println("C kernels");
+ CLEventList events1 = new CLEventList(2);
+ queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, 256, events1)
+ .putReadBuffer(clBufferC, false, events1);
+
+ barrier.waitFor(queue1, events1);
+
+ }
+
+ };
+
+ Thread thread2 = new Thread("D") {
+
+ @Override
+ public void run() {
+
+ fillBuffer(clBufferA2.buffer, 12345);
+ fillBuffer(clBufferB2.buffer, 67890);
+
+// System.out.println("D buffer");
+ queue2.putWriteBuffer(clBufferA2, false) // write A
+ .putWriteBuffer(clBufferB2, true); // write B
+
+// System.out.println("D args");
+ vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B
+
+// System.out.println("D kernels");
+ CLEventList events2 = new CLEventList(2);
+ queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, 256, events2)
+ .putReadBuffer(clBufferD, false, events2);
+
+ barrier.waitFor(queue2, events2);
+
+ }
+
+ };
+
+ out.println("starting threads");
+ thread1.start();
+ thread2.start();
+ barrier.await();
+ out.println("done");
+
+ checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements);
+
+ context.release();
+
+// vectorAddKernel2.release();
+
+ out.println("results are valid");
+
+ }
+
+
+
+
} \ No newline at end of file