diff options
-rw-r--r-- | src/com/mbien/opencl/CLCommandQueue.java | 206 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLEvent.java | 37 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLEventList.java | 95 | ||||
-rw-r--r-- | test/com/mbien/opencl/HighLevelBindingTest.java | 2 | ||||
-rw-r--r-- | test/com/mbien/opencl/LowLevelBindingTest.java | 15 |
5 files changed, 302 insertions, 53 deletions
diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java index 3b0560ba..dd2ab075 100644 --- a/src/com/mbien/opencl/CLCommandQueue.java +++ b/src/com/mbien/opencl/CLCommandQueue.java @@ -1,6 +1,6 @@ package com.mbien.opencl; -import java.nio.Buffer; +import com.sun.gluegen.runtime.PointerBuffer; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -13,8 +13,8 @@ import static com.mbien.opencl.CL.*; * command-queues allows applications to queue multiple independent commands without * requiring synchronization. Note that this should work as long as these objects are * not being shared.<b/> - * Sharing of objects across multiple command-queues will require the application to - * perform appropriate synchronization. + * Sharing of objects across multiple command-queues or using a CLCommandQueue + * form multiple Threads will require the application to perform appropriate synchronization.<b/> * @author Michael Bien */ public class CLCommandQueue implements CLResource { @@ -24,11 +24,22 @@ public class CLCommandQueue implements CLResource { private final CLDevice device; private final CL cl; + /* + * Those direct memory buffers are used to pass data between the JVM and OpenCL. + */ + private final PointerBuffer bufferA; + private final PointerBuffer bufferB; + private final PointerBuffer bufferC; + CLCommandQueue(CLContext context, CLDevice device, long properties) { this.context = context; this.cl = context.cl; this.device = device; + this.bufferA = PointerBuffer.allocateDirect(2); + this.bufferB = PointerBuffer.allocateDirect(2); + this.bufferC = PointerBuffer.allocateDirect(2); + int[] status = new int[1]; this.ID = cl.clCreateCommandQueue(context.ID, device.ID, properties, status, 0); @@ -36,34 +47,50 @@ public class CLCommandQueue implements CLResource { throw new CLException(status[0], "can not create command queue on "+device); } - public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, boolean blockingWrite) { + public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, boolean blockingRead) { + return putWriteBuffer(writeBuffer, null, blockingRead); + } + + public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, CLEventList events, boolean blockingWrite) { + PointerBuffer pb = PointerBuffer.allocateDirect(2); int ret = cl.clEnqueueWriteBuffer( ID, writeBuffer.ID, blockingWrite ? CL_TRUE : CL_FALSE, 0, writeBuffer.getSizeInBytes(), writeBuffer.buffer, -// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null (fast dircet memory path) - 0, null, 0, null, 0); //TODO events + 0, null, events==null ? null : events.IDs); if(ret != CL_SUCCESS) throw new CLException(ret, "can not enqueue WriteBuffer: " + writeBuffer); + if(events != null) { + events.createEvent(context); + } + return this; } public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, boolean blockingRead) { + putReadBuffer(readBuffer, null, blockingRead); + return this; + } + + public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, CLEventList events, boolean blockingRead) { int ret = cl.clEnqueueReadBuffer( ID, readBuffer.ID, blockingRead ? CL_TRUE : CL_FALSE, 0, readBuffer.getSizeInBytes(), readBuffer.buffer, -// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null (fast dircet memory path) - 0, null, 0, null, 0); //TODO events + 0, null, events==null ? null : events.IDs); if(ret != CL_SUCCESS) throw new CLException(ret, "can not enqueue ReadBuffer: " + readBuffer); - + + if(events != null) { + events.createEvent(context); + } + return this; } - +/* public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, Buffer buffer, boolean blockingRead) { int ret = cl.clEnqueueReadBuffer( @@ -77,19 +104,24 @@ public class CLCommandQueue implements CLResource { return this; } +*/ - public CLCommandQueue putBarrier() { - int ret = cl.clEnqueueBarrier(ID); - checkForError(ret, "can not enqueue Barrier"); - return this; + public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy) { + return putCopyBuffer(src, dest, bytesToCopy, null); } - public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy) { + public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy, CLEventList events) { + int ret = cl.clEnqueueCopyBuffer( ID, src.ID, dest.ID, src.buffer.position(), dest.buffer.position(), bytesToCopy, -// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null - 0, null, 0, null, 0); //TODO events + 0, null, events==null ? null : events.IDs); + checkForError(ret, "can not copy Buffer"); + + if(events != null) { + events.createEvent(context); + } + return this; } @@ -107,10 +139,6 @@ public class CLCommandQueue implements CLResource { return this; } - public CLCommandQueue putMarker() { - - return this; - } public CLCommandQueue putWriteImage() { @@ -141,66 +169,158 @@ public class CLCommandQueue implements CLResource { return this; } +*/ + public CLCommandQueue putMarker(CLEventList events) { + int ret = cl.clEnqueueMarker(CL_INT_MIN, events.IDs); + checkForError(ret, "can not enqueue marker"); + return this; + } - public CLCommandQueue putWaitForEvents() { + public CLCommandQueue putWaitForEvent(CLEventList list, int index) { + int marker = list.IDs.position(); + list.IDs.position(index); + int ret = cl.clWaitForEvents(1, list.IDs); + list.IDs.position(marker); + checkForError(ret, "error while waiting for events"); + return this; + } + public CLCommandQueue putWaitForEvents(CLEventList list) { + int ret = cl.clWaitForEvents(list.size, list.IDs); + checkForError(ret, "error while waiting for events"); + return this; + } + + public CLCommandQueue putBarrier() { + int ret = cl.clEnqueueBarrier(ID); + checkForError(ret, "can not enqueue Barrier"); return this; } -*/ public CLCommandQueue put1DRangeKernel(CLKernel kernel, long globalWorkOffset, long globalWorkSize, long localWorkSize) { - return this.putNDRangeKernel( - kernel, 1, - globalWorkOffset==0 ? null : new long[] {globalWorkOffset}, - globalWorkSize ==0 ? null : new long[] {globalWorkSize }, - localWorkSize ==0 ? null : new long[] {localWorkSize } ); + this.put1DRangeKernel(kernel, null, globalWorkOffset, globalWorkSize, localWorkSize); + return this; + } + + public CLCommandQueue put1DRangeKernel(CLKernel kernel, CLEventList events, long globalWorkOffset, long globalWorkSize, long localWorkSize) { + PointerBuffer globWO = null; + PointerBuffer globWS = null; + PointerBuffer locWS = null; + + if(globalWorkOffset != 0) { + globWO = bufferA.put(1, globalWorkOffset).position(1); + } + if(globalWorkSize != 0) { + globWS = bufferB.put(1, globalWorkSize).position(1); + } + if(globalWorkSize != 0) { + locWS = bufferC.put(1, localWorkSize).position(1); + } + + this.putNDRangeKernel(kernel, 1, globWO, globWS, locWS); + return this; } public CLCommandQueue put2DRangeKernel(CLKernel kernel, long globalWorkOffsetX, long globalWorkOffsetY, long globalWorkSizeX, long globalWorkSizeY, long localWorkSizeX, long localWorkSizeY) { - return this.putNDRangeKernel( - kernel, 2, - globalWorkOffsetX==0 && globalWorkOffsetY==0 ? null : new long[] {globalWorkOffsetX, globalWorkOffsetY}, - globalWorkSizeX ==0 && globalWorkSizeY ==0 ? null : new long[] {globalWorkSizeX, globalWorkSizeY }, - localWorkSizeX ==0 && localWorkSizeY ==0 ? null : new long[] {localWorkSizeX, localWorkSizeY } ); + this.put2DRangeKernel(kernel, null, + globalWorkOffsetX, globalWorkOffsetY, + globalWorkSizeX, globalWorkSizeY, + localWorkSizeX, localWorkSizeY); + + return this; } - public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) { + public CLCommandQueue put2DRangeKernel(CLKernel kernel, CLEventList events, + long globalWorkOffsetX, long globalWorkOffsetY, + long globalWorkSizeX, long globalWorkSizeY, + long localWorkSizeX, long localWorkSizeY) { + PointerBuffer globalWorkOffset = null; + PointerBuffer globalWorkSize = null; + PointerBuffer localWorkSize = null; + + if(globalWorkOffsetX != 0 && globalWorkOffsetY != 0) { + globalWorkOffset = bufferA.put(globalWorkOffsetX).put(globalWorkOffsetY).rewind(); + } + if(globalWorkSizeX != 0 && globalWorkSizeY != 0) { + globalWorkSize = bufferB.put(globalWorkSizeX).put(globalWorkSizeY).rewind(); + } + if(localWorkSizeX != 0 && localWorkSizeY !=0) { + localWorkSize = bufferC.put(localWorkSizeX).put(localWorkSizeY).rewind(); + } + this.putNDRangeKernel(kernel, 2, globalWorkOffset, globalWorkSize, localWorkSize); + return this; + } + + public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) { + this.putNDRangeKernel(kernel, null, workDimension, globalWorkOffset, globalWorkSize, localWorkSize); + return this; + } + + public CLCommandQueue putNDRangeKernel(CLKernel kernel, CLEventList events, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) { int ret = cl.clEnqueueNDRangeKernel( ID, kernel.ID, workDimension, - globalWorkOffset, 0, - globalWorkSize, 0, - localWorkSize, 0, - 0, - null, 0, - null, 0 ); + globalWorkOffset, + globalWorkSize, + localWorkSize, + 0, null, + events==null ? null : events.IDs); if(ret != CL_SUCCESS) throw new CLException(ret, "can not enqueue NDRangeKernel: " + kernel); + if(events != null) { + events.createEvent(context); + } + return this; } - public CLCommandQueue putAcquireGLObject(long glObject) { + this.putAcquireGLObject(glObject, null); + return this; + } + + public CLCommandQueue putAcquireGLObject(long glObject, CLEventList events) { CLGLI xl = (CLGLI) cl; - int ret = xl.clEnqueueAcquireGLObjects(ID, 1, new long[] {glObject}, 0, 0, null, 0, null, 0); + + PointerBuffer glObj = bufferA.put(1, glObject).position(1); + + int ret = xl.clEnqueueAcquireGLObjects(ID, 1, glObj, 0, null, + events==null ? null : events.IDs); if(ret != CL_SUCCESS) throw new CLException(ret, "can not aquire GLObject: " + glObject); + if(events != null) { + events.createEvent(context); + } + return this; } public CLCommandQueue putReleaseGLObject(long glObject) { + this.putReleaseGLObject(glObject, null); + return this; + } + + public CLCommandQueue putReleaseGLObject(long glObject, CLEventList events) { CLGLI xl = (CLGLI) cl; - int ret = xl.clEnqueueReleaseGLObjects(ID, 1, new long[] {glObject}, 0, 0, null, 0, null, 0); + + PointerBuffer glObj = bufferA.put(1, glObject).position(1); + + int ret = xl.clEnqueueReleaseGLObjects(ID, 1, glObj, 0, null, + events==null ? null : events.IDs); if(ret != CL_SUCCESS) throw new CLException(ret, "can not release GLObject: " + glObject); + if(events != null) { + events.createEvent(context); + } + return this; } diff --git a/src/com/mbien/opencl/CLEvent.java b/src/com/mbien/opencl/CLEvent.java index c4b62917..45117bc6 100644 --- a/src/com/mbien/opencl/CLEvent.java +++ b/src/com/mbien/opencl/CLEvent.java @@ -17,7 +17,7 @@ public class CLEvent implements CLResource { private final CLEventInfoAccessor eventInfo; - CLEvent(CLContext context, int id) { + CLEvent(CLContext context, long id) { this.context = context; this.cl = context.cl; this.ID = id; @@ -38,6 +38,41 @@ public class CLEvent implements CLResource { return CommandType.valueOf(status); } + @Override + public String toString() { + return "CLEvent [id: " + ID + + " name: " + getType() + + " status: " + getStatus()+"]"; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CLEvent other = (CLEvent) obj; + if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) { + return false; + } + if (this.ID != other.ID) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 5; + hash = 13 * hash + (this.context != null ? this.context.hashCode() : 0); + hash = 13 * hash + (int) (this.ID ^ (this.ID >>> 32)); + return hash; + } + + + private class CLEventInfoAccessor extends CLInfoAccessor { @Override diff --git a/src/com/mbien/opencl/CLEventList.java b/src/com/mbien/opencl/CLEventList.java new file mode 100644 index 00000000..66b07d55 --- /dev/null +++ b/src/com/mbien/opencl/CLEventList.java @@ -0,0 +1,95 @@ +package com.mbien.opencl; + +import com.sun.gluegen.runtime.PointerBuffer; +import java.util.Iterator; + +/** + * Fixed size list for storing CLEvents. + * @author Michael Bien + */ +public final class CLEventList implements CLResource, Iterable<CLEvent> { + + private final CLEvent[] events; + + final PointerBuffer IDs; + int size; + + public CLEventList(int capacity) { + this.events = new CLEvent[capacity]; + this.IDs = PointerBuffer.allocateDirect(capacity); + } + + void createEvent(CLContext context) { + + if(events[size] != null) + events[size].release(); + + events[size] = new CLEvent(context, IDs.get()); + size++; + } + + /** + * Releases all CLEvents in this list. + */ + public void release() { + for (int i = 0; i < size; i++) { + events[i].release(); + events[i] = null; + } + size = 0; + IDs.rewind(); + } + + public CLEvent getEvent(int index) { + if(index >= size) + throw new IndexOutOfBoundsException("list contains "+size+" events, can not return event with index "+index); + return events[index]; + } + + /** + * Returns the current size of this list. + */ + public int size() { + return size; + } + + /** + * Returns the maximum size of this list. + */ + public int capacity() { + return events.length; + } + + public Iterator<CLEvent> iterator() { + return new EventIterator(events, size); + } + + private static class EventIterator implements Iterator<CLEvent> { + + private final CLEvent[] events; + private final int size; + private int index; + + private EventIterator(CLEvent[] events, int size) { + this.events = events; + this.size = size; + } + + public boolean hasNext() { + return index < size; + } + + public CLEvent next() { + if(hasNext()) + return events[index++]; + else + return null; + } + + public void remove() { + throw new UnsupportedOperationException("remove() not supported."); + } + + } + +} diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java index 42e74667..5515cecc 100644 --- a/test/com/mbien/opencl/HighLevelBindingTest.java +++ b/test/com/mbien/opencl/HighLevelBindingTest.java @@ -153,7 +153,7 @@ public class HighLevelBindingTest { // Asynchronous write of data to GPU device, blocking read later queue.putWriteBuffer(clBufferA, false) .putWriteBuffer(clBufferB, false) - .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) + .put1DRangeKernel(vectorAddKernel, 0, globalWorkSize, localWorkSize) .putReadBuffer(clBufferC, true) .finish().release(); diff --git a/test/com/mbien/opencl/LowLevelBindingTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java index 0a600102..94b86e71 100644 --- a/test/com/mbien/opencl/LowLevelBindingTest.java +++ b/test/com/mbien/opencl/LowLevelBindingTest.java @@ -1,6 +1,7 @@ package com.mbien.opencl; import com.sun.gluegen.runtime.CPU; +import com.sun.gluegen.runtime.PointerBuffer; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.IntBuffer; @@ -289,21 +290,19 @@ public class LowLevelBindingTest { out.println("used device memory: "+ (srcA.capacity()+srcB.capacity()+dest.capacity())/1000000 +"MB"); // Asynchronous write of data to GPU device - ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, 0, null, 0); + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, null); checkError("on clEnqueueWriteBuffer", ret); - ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, 0, null, 0); + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, null); checkError("on clEnqueueWriteBuffer", ret); // Launch kernel - ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, 0, - new long[]{ globalWorkSize }, 0, - new long[]{ localWorkSize }, 0, 0, - null, 0, - null, 0); + PointerBuffer gWS = PointerBuffer.allocateDirect(1).put(globalWorkSize).rewind(); + PointerBuffer lWS = PointerBuffer.allocateDirect(1).put(localWorkSize).rewind(); + ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, gWS, lWS, 0, null, null); checkError("on clEnqueueNDRangeKernel", ret); // Synchronous/blocking read of results - ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, 0, null, 0); + ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, null); checkError("on clEnqueueReadBuffer", ret); out.println("a+b=c result snapshot: "); |