diff options
author | Michael Bien <[email protected]> | 2010-01-14 17:38:34 +0100 |
---|---|---|
committer | Michael Bien <[email protected]> | 2010-01-14 17:38:34 +0100 |
commit | 9e650242da44a939e6a4c1e3c06d77c2e668a3e0 (patch) | |
tree | b0272560299430beb79915c9f701994bc40a10d5 /test/com/mbien/opencl | |
parent | 9343c3ef5829f74207a8d220cb3b082211b910f2 (diff) |
cleaned up NioDirectOnly list, added clSetKernelArg to list.
added experimental QueueBarrier for easy synchronization between multiple concurrent CLCommandQueues.
refactored CLCommandQueue, added putTask().
added another concurrency JUnit test.
Diffstat (limited to 'test/com/mbien/opencl')
-rw-r--r-- | test/com/mbien/opencl/CLConcurrencyTest.java | 126 |
1 files changed, 120 insertions, 6 deletions
diff --git a/test/com/mbien/opencl/CLConcurrencyTest.java b/test/com/mbien/opencl/CLConcurrencyTest.java index 057b57ee..dfaa4e49 100644 --- a/test/com/mbien/opencl/CLConcurrencyTest.java +++ b/test/com/mbien/opencl/CLConcurrencyTest.java @@ -1,7 +1,6 @@ package com.mbien.opencl; import com.mbien.opencl.CLBuffer.Mem; -import com.mbien.opencl.CLCommandQueue.Mode; import java.io.IOException; import java.nio.ByteBuffer; import org.junit.Test; @@ -45,9 +44,8 @@ public class CLConcurrencyTest { assertEquals(0, events.size()); - // asynchronous write of data to GPU device, blocking read later to get the computed results back. - queue.putWriteBuffer(clBufferA, events, false) // write A - .putWriteBuffer(clBufferB, events, false); // write B + queue.putWriteBuffer(clBufferA, false, events) // write A + .putWriteBuffer(clBufferB, false, events); // write B assertEquals(2, events.size()); queue.putWaitForEvents(events); @@ -56,10 +54,10 @@ public class CLConcurrencyTest { assertEquals(0, events.size()); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B - queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256); + queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B - queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256); + queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events); assertEquals(2, events.size()); queue.putWaitForEvent(events, 0) @@ -80,4 +78,120 @@ public class CLConcurrencyTest { } + @Test + public void concurrencyTest() throws IOException, InterruptedException { + + out.println(" - - - queue synchronisation test - - - "); + + final int elements = ONE_MB/SIZEOF_INT * 10; // 20MB per buffer + + CLContext context = CLContext.create(); + + CLDevice[] devices = context.getCLDevices(); + + if(devices.length < 2) { + out.println("aborting test... need at least 2 devices"); + context.release(); + return; + } + + final CLBuffer<ByteBuffer> clBufferC = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferD = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + final CLBuffer<ByteBuffer> clBufferA1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferB1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferA2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferB2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + final CLKernel vectorAddKernel1 = program.getCLKernel("VectorAddGM") + .setArg(3, elements); + + //TODO introduce public api for cloning/creating kernels + final CLKernel vectorAddKernel2 = vectorAddKernel1.copy() + .setArg(3, elements); + + + int secondDevice = devices.length > 1 ? 1 : 0; + + final CLCommandQueue queue1 = devices[0 ].createCommandQueue(); + final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue(); + + if(secondDevice > 0) + System.out.println("using two devices"); + + final QueueBarrier barrier = new QueueBarrier(2); + + Thread thread1 = new Thread("C") { + + @Override + public void run() { + + fillBuffer(clBufferA1.buffer, 12345); + fillBuffer(clBufferB1.buffer, 67890); + +// System.out.println("C buffer"); + queue1.putWriteBuffer(clBufferA1, false) // write A + .putWriteBuffer(clBufferB1, true); // write B + +// System.out.println("C args"); + vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B + +// System.out.println("C kernels"); + CLEventList events1 = new CLEventList(2); + queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, 256, events1) + .putReadBuffer(clBufferC, false, events1); + + barrier.waitFor(queue1, events1); + + } + + }; + + Thread thread2 = new Thread("D") { + + @Override + public void run() { + + fillBuffer(clBufferA2.buffer, 12345); + fillBuffer(clBufferB2.buffer, 67890); + +// System.out.println("D buffer"); + queue2.putWriteBuffer(clBufferA2, false) // write A + .putWriteBuffer(clBufferB2, true); // write B + +// System.out.println("D args"); + vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B + +// System.out.println("D kernels"); + CLEventList events2 = new CLEventList(2); + queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, 256, events2) + .putReadBuffer(clBufferD, false, events2); + + barrier.waitFor(queue2, events2); + + } + + }; + + out.println("starting threads"); + thread1.start(); + thread2.start(); + barrier.await(); + out.println("done"); + + checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); + + context.release(); + +// vectorAddKernel2.release(); + + out.println("results are valid"); + + } + + + + }
\ No newline at end of file |