From 9e650242da44a939e6a4c1e3c06d77c2e668a3e0 Mon Sep 17 00:00:00 2001 From: Michael Bien Date: Thu, 14 Jan 2010 17:38:34 +0100 Subject: cleaned up NioDirectOnly list, added clSetKernelArg to list. added experimental QueueBarrier for easy synchronization between multiple concurrent CLCommandQueues. refactored CLCommandQueue, added putTask(). added another concurrency JUnit test. --- src/com/mbien/opencl/CLCommandQueue.java | 56 ++++++++++++++++++++------------ src/com/mbien/opencl/CLKernel.java | 18 +++++++--- src/com/mbien/opencl/QueueBarrier.java | 48 +++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 24 deletions(-) create mode 100644 src/com/mbien/opencl/QueueBarrier.java (limited to 'src/com') diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java index 38ec7274..4e88ff1d 100644 --- a/src/com/mbien/opencl/CLCommandQueue.java +++ b/src/com/mbien/opencl/CLCommandQueue.java @@ -48,11 +48,10 @@ public class CLCommandQueue implements CLResource { } public CLCommandQueue putWriteBuffer(CLBuffer writeBuffer, boolean blockingRead) { - return putWriteBuffer(writeBuffer, null, blockingRead); + return putWriteBuffer(writeBuffer, blockingRead, null); } - public CLCommandQueue putWriteBuffer(CLBuffer writeBuffer, CLEventList events, boolean blockingWrite) { - PointerBuffer pb = PointerBuffer.allocateDirect(2); + public CLCommandQueue putWriteBuffer(CLBuffer writeBuffer, boolean blockingWrite, CLEventList events) { int ret = cl.clEnqueueWriteBuffer( ID, writeBuffer.ID, blockingWrite ? CL_TRUE : CL_FALSE, @@ -70,11 +69,11 @@ public class CLCommandQueue implements CLResource { } public CLCommandQueue putReadBuffer(CLBuffer readBuffer, boolean blockingRead) { - putReadBuffer(readBuffer, null, blockingRead); + putReadBuffer(readBuffer, blockingRead, null); return this; } - public CLCommandQueue putReadBuffer(CLBuffer readBuffer, CLEventList events, boolean blockingRead) { + public CLCommandQueue putReadBuffer(CLBuffer readBuffer, boolean blockingRead, CLEventList events) { int ret = cl.clEnqueueReadBuffer( ID, readBuffer.ID, blockingRead ? CL_TRUE : CL_FALSE, @@ -150,11 +149,6 @@ public class CLCommandQueue implements CLResource { return this; } - public CLCommandQueue putTask() { - - return this; - } - public CLBuffer putMapBuffer() { return null; @@ -198,12 +192,35 @@ public class CLCommandQueue implements CLResource { return this; } + /** + * {@link #putTask} equivalent to calling + * {@link #put1DRangeKernel(CLKernel kernel, long globalWorkOffset, long globalWorkSize, long localWorkSize)} + * with globalWorkOffset = null, globalWorkSize set to 1, and localWorkSize set to 1. + */ + public CLCommandQueue putTask(CLKernel kernel) { + int ret = cl.clEnqueueTask(ID, kernel.ID, 0, null, null); + checkForError(ret, "can not enqueue Task"); + return this; + } + + /** + * @see #putTask(com.mbien.opencl.CLKernel) + */ + public CLCommandQueue putTask(CLKernel kernel, CLEventList events) { + int ret = cl.clEnqueueTask(ID, kernel.ID, 0, null, events==null ? null : events.IDs); + checkForError(ret, "can not enqueue Task"); + if(events != null) { + events.createEvent(context); + } + return this; + } + public CLCommandQueue put1DRangeKernel(CLKernel kernel, long globalWorkOffset, long globalWorkSize, long localWorkSize) { - this.put1DRangeKernel(kernel, null, globalWorkOffset, globalWorkSize, localWorkSize); + this.put1DRangeKernel(kernel, globalWorkOffset, globalWorkSize, localWorkSize, null); return this; } - public CLCommandQueue put1DRangeKernel(CLKernel kernel, CLEventList events, long globalWorkOffset, long globalWorkSize, long localWorkSize) { + public CLCommandQueue put1DRangeKernel(CLKernel kernel, long globalWorkOffset, long globalWorkSize, long localWorkSize, CLEventList events) { PointerBuffer globWO = null; PointerBuffer globWS = null; PointerBuffer locWS = null; @@ -218,25 +235,24 @@ public class CLCommandQueue implements CLResource { locWS = bufferC.put(1, localWorkSize).position(1); } - this.putNDRangeKernel(kernel, events, 1, globWO, globWS, locWS); + this.putNDRangeKernel(kernel, 1, globWO, globWS, locWS, events); return this; } public CLCommandQueue put2DRangeKernel(CLKernel kernel, long globalWorkOffsetX, long globalWorkOffsetY, long globalWorkSizeX, long globalWorkSizeY, long localWorkSizeX, long localWorkSizeY) { - this.put2DRangeKernel(kernel, null, + this.put2DRangeKernel(kernel, globalWorkOffsetX, globalWorkOffsetY, globalWorkSizeX, globalWorkSizeY, - localWorkSizeX, localWorkSizeY); + localWorkSizeX, localWorkSizeY, null); return this; } - public CLCommandQueue put2DRangeKernel(CLKernel kernel, CLEventList events, - long globalWorkOffsetX, long globalWorkOffsetY, + public CLCommandQueue put2DRangeKernel(CLKernel kernel, long globalWorkOffsetX, long globalWorkOffsetY, long globalWorkSizeX, long globalWorkSizeY, - long localWorkSizeX, long localWorkSizeY) { + long localWorkSizeX, long localWorkSizeY, CLEventList events) { PointerBuffer globalWorkOffset = null; PointerBuffer globalWorkSize = null; PointerBuffer localWorkSize = null; @@ -255,11 +271,11 @@ public class CLCommandQueue implements CLResource { } public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) { - this.putNDRangeKernel(kernel, null, workDimension, globalWorkOffset, globalWorkSize, localWorkSize); + this.putNDRangeKernel(kernel, workDimension, globalWorkOffset, globalWorkSize, localWorkSize, null); return this; } - public CLCommandQueue putNDRangeKernel(CLKernel kernel, CLEventList events, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) { + public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize, CLEventList events) { int ret = cl.clEnqueueNDRangeKernel( ID, kernel.ID, workDimension, diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java index e7ac4b4d..2115a9f8 100644 --- a/src/com/mbien/opencl/CLKernel.java +++ b/src/com/mbien/opencl/CLKernel.java @@ -26,12 +26,15 @@ public class CLKernel implements CLResource { private final CLProgram program; private final CL cl; + private final ByteBuffer buffer; + private int argIndex; CLKernel(CLProgram program, long id) { this.ID = id; this.program = program; this.cl = program.context.cl; + this.buffer = BufferFactory.newDirectByteBuffer(8); long[] longArray = new long[1]; @@ -136,19 +139,19 @@ public class CLKernel implements CLResource { } private final Buffer wrap(float value) { - return BufferFactory.newDirectByteBuffer(4).putFloat(value).rewind(); + return buffer.putFloat(value).rewind(); } private final Buffer wrap(double value) { - return BufferFactory.newDirectByteBuffer(8).putDouble(value).rewind(); + return buffer.putDouble(value).rewind(); } private final Buffer wrap(int value) { - return BufferFactory.newDirectByteBuffer(4).putInt(value).rewind(); + return buffer.putInt(value).rewind(); } private final Buffer wrap(long value) { - return BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); + return buffer.putLong(value).rewind(); } public CLKernel rewind() { @@ -196,5 +199,12 @@ public class CLKernel implements CLResource { hash = 43 * hash + (this.program != null ? this.program.hashCode() : 0); return hash; } + + CLKernel copy() { + int[] err = new int[1]; + long newID = cl.clCreateKernel(program.ID, name, err, 0); + checkForError(err[0], "can not copy kernel"); + return new CLKernel(program, newID); + } } diff --git a/src/com/mbien/opencl/QueueBarrier.java b/src/com/mbien/opencl/QueueBarrier.java new file mode 100644 index 00000000..247ede7a --- /dev/null +++ b/src/com/mbien/opencl/QueueBarrier.java @@ -0,0 +1,48 @@ +package com.mbien.opencl; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +/** + * + * @author Michael Bien + */ +public class QueueBarrier { + + private final CountDownLatch latch; + + public QueueBarrier(int queueCount) { + this.latch = new CountDownLatch(queueCount); + } + + /** + * Blocks the current Thread until the given events on the CLCommandQueue occurred. + * This method may be invoked concurrently without synchronization on the QueueBarrier object + * as long each Thread passes a distinct CLCommandQueue as parameter to this method. + */ + public QueueBarrier waitFor(CLCommandQueue queue, CLEventList events) { + queue.putWaitForEvents(events); + latch.countDown(); + return this; + } + + /** + * Blocks until all Threads which called {@link #waitFor} + * continue excecution. + * This method blocks only once, all subsequent calls are ignored. + */ + public QueueBarrier await() throws InterruptedException { + latch.await(); + return this; + } + /** + * @see {@link #await()} + * @param timeout the maximum time to wait + * @param unit the time unit of the {@code timeout} argument + */ + public QueueBarrier await(long timeout, TimeUnit unit) throws InterruptedException { + latch.await(timeout, unit); + return this; + } + +} -- cgit v1.2.3