diff options
Diffstat (limited to 'test/com/mbien/opencl/CLConcurrencyTest.java')
-rw-r--r-- | test/com/mbien/opencl/CLConcurrencyTest.java | 126 |
1 files changed, 120 insertions, 6 deletions
diff --git a/test/com/mbien/opencl/CLConcurrencyTest.java b/test/com/mbien/opencl/CLConcurrencyTest.java index 057b57ee..dfaa4e49 100644 --- a/test/com/mbien/opencl/CLConcurrencyTest.java +++ b/test/com/mbien/opencl/CLConcurrencyTest.java @@ -1,7 +1,6 @@ package com.mbien.opencl; import com.mbien.opencl.CLBuffer.Mem; -import com.mbien.opencl.CLCommandQueue.Mode; import java.io.IOException; import java.nio.ByteBuffer; import org.junit.Test; @@ -45,9 +44,8 @@ public class CLConcurrencyTest { assertEquals(0, events.size()); - // asynchronous write of data to GPU device, blocking read later to get the computed results back. - queue.putWriteBuffer(clBufferA, events, false) // write A - .putWriteBuffer(clBufferB, events, false); // write B + queue.putWriteBuffer(clBufferA, false, events) // write A + .putWriteBuffer(clBufferB, false, events); // write B assertEquals(2, events.size()); queue.putWaitForEvents(events); @@ -56,10 +54,10 @@ public class CLConcurrencyTest { assertEquals(0, events.size()); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B - queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256); + queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B - queue.put1DRangeKernel(vectorAddKernel, events, 0, elements, 256); + queue.put1DRangeKernel(vectorAddKernel, 0, elements, 256, events); assertEquals(2, events.size()); queue.putWaitForEvent(events, 0) @@ -80,4 +78,120 @@ public class CLConcurrencyTest { } + @Test + public void concurrencyTest() throws IOException, InterruptedException { + + out.println(" - - - queue synchronisation test - - - "); + + final int elements = ONE_MB/SIZEOF_INT * 10; // 20MB per buffer + + CLContext context = CLContext.create(); + + CLDevice[] devices = context.getCLDevices(); + + if(devices.length < 2) { + out.println("aborting test... need at least 2 devices"); + context.release(); + return; + } + + final CLBuffer<ByteBuffer> clBufferC = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferD = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + final CLBuffer<ByteBuffer> clBufferA1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferB1 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferA2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer<ByteBuffer> clBufferB2 = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + final CLKernel vectorAddKernel1 = program.getCLKernel("VectorAddGM") + .setArg(3, elements); + + //TODO introduce public api for cloning/creating kernels + final CLKernel vectorAddKernel2 = vectorAddKernel1.copy() + .setArg(3, elements); + + + int secondDevice = devices.length > 1 ? 1 : 0; + + final CLCommandQueue queue1 = devices[0 ].createCommandQueue(); + final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue(); + + if(secondDevice > 0) + System.out.println("using two devices"); + + final QueueBarrier barrier = new QueueBarrier(2); + + Thread thread1 = new Thread("C") { + + @Override + public void run() { + + fillBuffer(clBufferA1.buffer, 12345); + fillBuffer(clBufferB1.buffer, 67890); + +// System.out.println("C buffer"); + queue1.putWriteBuffer(clBufferA1, false) // write A + .putWriteBuffer(clBufferB1, true); // write B + +// System.out.println("C args"); + vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B + +// System.out.println("C kernels"); + CLEventList events1 = new CLEventList(2); + queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, 256, events1) + .putReadBuffer(clBufferC, false, events1); + + barrier.waitFor(queue1, events1); + + } + + }; + + Thread thread2 = new Thread("D") { + + @Override + public void run() { + + fillBuffer(clBufferA2.buffer, 12345); + fillBuffer(clBufferB2.buffer, 67890); + +// System.out.println("D buffer"); + queue2.putWriteBuffer(clBufferA2, false) // write A + .putWriteBuffer(clBufferB2, true); // write B + +// System.out.println("D args"); + vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B + +// System.out.println("D kernels"); + CLEventList events2 = new CLEventList(2); + queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, 256, events2) + .putReadBuffer(clBufferD, false, events2); + + barrier.waitFor(queue2, events2); + + } + + }; + + out.println("starting threads"); + thread1.start(); + thread2.start(); + barrier.await(); + out.println("done"); + + checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); + + context.release(); + +// vectorAddKernel2.release(); + + out.println("results are valid"); + + } + + + + }
\ No newline at end of file |