aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/com/mbien/opencl/CLCommandQueue.java206
-rw-r--r--src/com/mbien/opencl/CLEvent.java37
-rw-r--r--src/com/mbien/opencl/CLEventList.java95
-rw-r--r--test/com/mbien/opencl/HighLevelBindingTest.java2
-rw-r--r--test/com/mbien/opencl/LowLevelBindingTest.java15
5 files changed, 302 insertions, 53 deletions
diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java
index 3b0560ba..dd2ab075 100644
--- a/src/com/mbien/opencl/CLCommandQueue.java
+++ b/src/com/mbien/opencl/CLCommandQueue.java
@@ -1,6 +1,6 @@
package com.mbien.opencl;
-import java.nio.Buffer;
+import com.sun.gluegen.runtime.PointerBuffer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
@@ -13,8 +13,8 @@ import static com.mbien.opencl.CL.*;
* command-queues allows applications to queue multiple independent commands without
* requiring synchronization. Note that this should work as long as these objects are
* not being shared.<b/>
- * Sharing of objects across multiple command-queues will require the application to
- * perform appropriate synchronization.
+ * Sharing of objects across multiple command-queues or using a CLCommandQueue
+ * form multiple Threads will require the application to perform appropriate synchronization.<b/>
* @author Michael Bien
*/
public class CLCommandQueue implements CLResource {
@@ -24,11 +24,22 @@ public class CLCommandQueue implements CLResource {
private final CLDevice device;
private final CL cl;
+ /*
+ * Those direct memory buffers are used to pass data between the JVM and OpenCL.
+ */
+ private final PointerBuffer bufferA;
+ private final PointerBuffer bufferB;
+ private final PointerBuffer bufferC;
+
CLCommandQueue(CLContext context, CLDevice device, long properties) {
this.context = context;
this.cl = context.cl;
this.device = device;
+ this.bufferA = PointerBuffer.allocateDirect(2);
+ this.bufferB = PointerBuffer.allocateDirect(2);
+ this.bufferC = PointerBuffer.allocateDirect(2);
+
int[] status = new int[1];
this.ID = cl.clCreateCommandQueue(context.ID, device.ID, properties, status, 0);
@@ -36,34 +47,50 @@ public class CLCommandQueue implements CLResource {
throw new CLException(status[0], "can not create command queue on "+device);
}
- public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, boolean blockingWrite) {
+ public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, boolean blockingRead) {
+ return putWriteBuffer(writeBuffer, null, blockingRead);
+ }
+
+ public CLCommandQueue putWriteBuffer(CLBuffer<?> writeBuffer, CLEventList events, boolean blockingWrite) {
+ PointerBuffer pb = PointerBuffer.allocateDirect(2);
int ret = cl.clEnqueueWriteBuffer(
ID, writeBuffer.ID, blockingWrite ? CL_TRUE : CL_FALSE,
0, writeBuffer.getSizeInBytes(), writeBuffer.buffer,
-// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null (fast dircet memory path)
- 0, null, 0, null, 0); //TODO events
+ 0, null, events==null ? null : events.IDs);
if(ret != CL_SUCCESS)
throw new CLException(ret, "can not enqueue WriteBuffer: " + writeBuffer);
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, boolean blockingRead) {
+ putReadBuffer(readBuffer, null, blockingRead);
+ return this;
+ }
+
+ public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, CLEventList events, boolean blockingRead) {
int ret = cl.clEnqueueReadBuffer(
ID, readBuffer.ID, blockingRead ? CL_TRUE : CL_FALSE,
0, readBuffer.getSizeInBytes(), readBuffer.buffer,
-// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null (fast dircet memory path)
- 0, null, 0, null, 0); //TODO events
+ 0, null, events==null ? null : events.IDs);
if(ret != CL_SUCCESS)
throw new CLException(ret, "can not enqueue ReadBuffer: " + readBuffer);
-
+
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
-
+/*
public CLCommandQueue putReadBuffer(CLBuffer<?> readBuffer, Buffer buffer, boolean blockingRead) {
int ret = cl.clEnqueueReadBuffer(
@@ -77,19 +104,24 @@ public class CLCommandQueue implements CLResource {
return this;
}
+*/
- public CLCommandQueue putBarrier() {
- int ret = cl.clEnqueueBarrier(ID);
- checkForError(ret, "can not enqueue Barrier");
- return this;
+ public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy) {
+ return putCopyBuffer(src, dest, bytesToCopy, null);
}
- public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy) {
+ public CLCommandQueue putCopyBuffer(CLBuffer<?> src, CLBuffer<?> dest, long bytesToCopy, CLEventList events) {
+
int ret = cl.clEnqueueCopyBuffer(
ID, src.ID, dest.ID, src.buffer.position(), dest.buffer.position(), bytesToCopy,
-// 0, null, null); //TODO solve NPE in gluegen when PointerBuffer == null
- 0, null, 0, null, 0); //TODO events
+ 0, null, events==null ? null : events.IDs);
+
checkForError(ret, "can not copy Buffer");
+
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
@@ -107,10 +139,6 @@ public class CLCommandQueue implements CLResource {
return this;
}
- public CLCommandQueue putMarker() {
-
- return this;
- }
public CLCommandQueue putWriteImage() {
@@ -141,66 +169,158 @@ public class CLCommandQueue implements CLResource {
return this;
}
+*/
+ public CLCommandQueue putMarker(CLEventList events) {
+ int ret = cl.clEnqueueMarker(CL_INT_MIN, events.IDs);
+ checkForError(ret, "can not enqueue marker");
+ return this;
+ }
- public CLCommandQueue putWaitForEvents() {
+ public CLCommandQueue putWaitForEvent(CLEventList list, int index) {
+ int marker = list.IDs.position();
+ list.IDs.position(index);
+ int ret = cl.clWaitForEvents(1, list.IDs);
+ list.IDs.position(marker);
+ checkForError(ret, "error while waiting for events");
+ return this;
+ }
+ public CLCommandQueue putWaitForEvents(CLEventList list) {
+ int ret = cl.clWaitForEvents(list.size, list.IDs);
+ checkForError(ret, "error while waiting for events");
+ return this;
+ }
+
+ public CLCommandQueue putBarrier() {
+ int ret = cl.clEnqueueBarrier(ID);
+ checkForError(ret, "can not enqueue Barrier");
return this;
}
-*/
public CLCommandQueue put1DRangeKernel(CLKernel kernel, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
- return this.putNDRangeKernel(
- kernel, 1,
- globalWorkOffset==0 ? null : new long[] {globalWorkOffset},
- globalWorkSize ==0 ? null : new long[] {globalWorkSize },
- localWorkSize ==0 ? null : new long[] {localWorkSize } );
+ this.put1DRangeKernel(kernel, null, globalWorkOffset, globalWorkSize, localWorkSize);
+ return this;
+ }
+
+ public CLCommandQueue put1DRangeKernel(CLKernel kernel, CLEventList events, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
+ PointerBuffer globWO = null;
+ PointerBuffer globWS = null;
+ PointerBuffer locWS = null;
+
+ if(globalWorkOffset != 0) {
+ globWO = bufferA.put(1, globalWorkOffset).position(1);
+ }
+ if(globalWorkSize != 0) {
+ globWS = bufferB.put(1, globalWorkSize).position(1);
+ }
+ if(globalWorkSize != 0) {
+ locWS = bufferC.put(1, localWorkSize).position(1);
+ }
+
+ this.putNDRangeKernel(kernel, 1, globWO, globWS, locWS);
+ return this;
}
public CLCommandQueue put2DRangeKernel(CLKernel kernel, long globalWorkOffsetX, long globalWorkOffsetY,
long globalWorkSizeX, long globalWorkSizeY,
long localWorkSizeX, long localWorkSizeY) {
- return this.putNDRangeKernel(
- kernel, 2,
- globalWorkOffsetX==0 && globalWorkOffsetY==0 ? null : new long[] {globalWorkOffsetX, globalWorkOffsetY},
- globalWorkSizeX ==0 && globalWorkSizeY ==0 ? null : new long[] {globalWorkSizeX, globalWorkSizeY },
- localWorkSizeX ==0 && localWorkSizeY ==0 ? null : new long[] {localWorkSizeX, localWorkSizeY } );
+ this.put2DRangeKernel(kernel, null,
+ globalWorkOffsetX, globalWorkOffsetY,
+ globalWorkSizeX, globalWorkSizeY,
+ localWorkSizeX, localWorkSizeY);
+
+ return this;
}
- public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) {
+ public CLCommandQueue put2DRangeKernel(CLKernel kernel, CLEventList events,
+ long globalWorkOffsetX, long globalWorkOffsetY,
+ long globalWorkSizeX, long globalWorkSizeY,
+ long localWorkSizeX, long localWorkSizeY) {
+ PointerBuffer globalWorkOffset = null;
+ PointerBuffer globalWorkSize = null;
+ PointerBuffer localWorkSize = null;
+
+ if(globalWorkOffsetX != 0 && globalWorkOffsetY != 0) {
+ globalWorkOffset = bufferA.put(globalWorkOffsetX).put(globalWorkOffsetY).rewind();
+ }
+ if(globalWorkSizeX != 0 && globalWorkSizeY != 0) {
+ globalWorkSize = bufferB.put(globalWorkSizeX).put(globalWorkSizeY).rewind();
+ }
+ if(localWorkSizeX != 0 && localWorkSizeY !=0) {
+ localWorkSize = bufferC.put(localWorkSizeX).put(localWorkSizeY).rewind();
+ }
+ this.putNDRangeKernel(kernel, 2, globalWorkOffset, globalWorkSize, localWorkSize);
+ return this;
+ }
+
+ public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) {
+ this.putNDRangeKernel(kernel, null, workDimension, globalWorkOffset, globalWorkSize, localWorkSize);
+ return this;
+ }
+
+ public CLCommandQueue putNDRangeKernel(CLKernel kernel, CLEventList events, int workDimension, PointerBuffer globalWorkOffset, PointerBuffer globalWorkSize, PointerBuffer localWorkSize) {
int ret = cl.clEnqueueNDRangeKernel(
ID, kernel.ID, workDimension,
- globalWorkOffset, 0,
- globalWorkSize, 0,
- localWorkSize, 0,
- 0,
- null, 0,
- null, 0 );
+ globalWorkOffset,
+ globalWorkSize,
+ localWorkSize,
+ 0, null,
+ events==null ? null : events.IDs);
if(ret != CL_SUCCESS)
throw new CLException(ret, "can not enqueue NDRangeKernel: " + kernel);
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
-
public CLCommandQueue putAcquireGLObject(long glObject) {
+ this.putAcquireGLObject(glObject, null);
+ return this;
+ }
+
+ public CLCommandQueue putAcquireGLObject(long glObject, CLEventList events) {
CLGLI xl = (CLGLI) cl;
- int ret = xl.clEnqueueAcquireGLObjects(ID, 1, new long[] {glObject}, 0, 0, null, 0, null, 0);
+
+ PointerBuffer glObj = bufferA.put(1, glObject).position(1);
+
+ int ret = xl.clEnqueueAcquireGLObjects(ID, 1, glObj, 0, null,
+ events==null ? null : events.IDs);
if(ret != CL_SUCCESS)
throw new CLException(ret, "can not aquire GLObject: " + glObject);
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
public CLCommandQueue putReleaseGLObject(long glObject) {
+ this.putReleaseGLObject(glObject, null);
+ return this;
+ }
+
+ public CLCommandQueue putReleaseGLObject(long glObject, CLEventList events) {
CLGLI xl = (CLGLI) cl;
- int ret = xl.clEnqueueReleaseGLObjects(ID, 1, new long[] {glObject}, 0, 0, null, 0, null, 0);
+
+ PointerBuffer glObj = bufferA.put(1, glObject).position(1);
+
+ int ret = xl.clEnqueueReleaseGLObjects(ID, 1, glObj, 0, null,
+ events==null ? null : events.IDs);
if(ret != CL_SUCCESS)
throw new CLException(ret, "can not release GLObject: " + glObject);
+ if(events != null) {
+ events.createEvent(context);
+ }
+
return this;
}
diff --git a/src/com/mbien/opencl/CLEvent.java b/src/com/mbien/opencl/CLEvent.java
index c4b62917..45117bc6 100644
--- a/src/com/mbien/opencl/CLEvent.java
+++ b/src/com/mbien/opencl/CLEvent.java
@@ -17,7 +17,7 @@ public class CLEvent implements CLResource {
private final CLEventInfoAccessor eventInfo;
- CLEvent(CLContext context, int id) {
+ CLEvent(CLContext context, long id) {
this.context = context;
this.cl = context.cl;
this.ID = id;
@@ -38,6 +38,41 @@ public class CLEvent implements CLResource {
return CommandType.valueOf(status);
}
+ @Override
+ public String toString() {
+ return "CLEvent [id: " + ID
+ + " name: " + getType()
+ + " status: " + getStatus()+"]";
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final CLEvent other = (CLEvent) obj;
+ if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) {
+ return false;
+ }
+ if (this.ID != other.ID) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 13 * hash + (this.context != null ? this.context.hashCode() : 0);
+ hash = 13 * hash + (int) (this.ID ^ (this.ID >>> 32));
+ return hash;
+ }
+
+
+
private class CLEventInfoAccessor extends CLInfoAccessor {
@Override
diff --git a/src/com/mbien/opencl/CLEventList.java b/src/com/mbien/opencl/CLEventList.java
new file mode 100644
index 00000000..66b07d55
--- /dev/null
+++ b/src/com/mbien/opencl/CLEventList.java
@@ -0,0 +1,95 @@
+package com.mbien.opencl;
+
+import com.sun.gluegen.runtime.PointerBuffer;
+import java.util.Iterator;
+
+/**
+ * Fixed size list for storing CLEvents.
+ * @author Michael Bien
+ */
+public final class CLEventList implements CLResource, Iterable<CLEvent> {
+
+ private final CLEvent[] events;
+
+ final PointerBuffer IDs;
+ int size;
+
+ public CLEventList(int capacity) {
+ this.events = new CLEvent[capacity];
+ this.IDs = PointerBuffer.allocateDirect(capacity);
+ }
+
+ void createEvent(CLContext context) {
+
+ if(events[size] != null)
+ events[size].release();
+
+ events[size] = new CLEvent(context, IDs.get());
+ size++;
+ }
+
+ /**
+ * Releases all CLEvents in this list.
+ */
+ public void release() {
+ for (int i = 0; i < size; i++) {
+ events[i].release();
+ events[i] = null;
+ }
+ size = 0;
+ IDs.rewind();
+ }
+
+ public CLEvent getEvent(int index) {
+ if(index >= size)
+ throw new IndexOutOfBoundsException("list contains "+size+" events, can not return event with index "+index);
+ return events[index];
+ }
+
+ /**
+ * Returns the current size of this list.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Returns the maximum size of this list.
+ */
+ public int capacity() {
+ return events.length;
+ }
+
+ public Iterator<CLEvent> iterator() {
+ return new EventIterator(events, size);
+ }
+
+ private static class EventIterator implements Iterator<CLEvent> {
+
+ private final CLEvent[] events;
+ private final int size;
+ private int index;
+
+ private EventIterator(CLEvent[] events, int size) {
+ this.events = events;
+ this.size = size;
+ }
+
+ public boolean hasNext() {
+ return index < size;
+ }
+
+ public CLEvent next() {
+ if(hasNext())
+ return events[index++];
+ else
+ return null;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("remove() not supported.");
+ }
+
+ }
+
+}
diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java
index 42e74667..5515cecc 100644
--- a/test/com/mbien/opencl/HighLevelBindingTest.java
+++ b/test/com/mbien/opencl/HighLevelBindingTest.java
@@ -153,7 +153,7 @@ public class HighLevelBindingTest {
// Asynchronous write of data to GPU device, blocking read later
queue.putWriteBuffer(clBufferA, false)
.putWriteBuffer(clBufferB, false)
- .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
+ .put1DRangeKernel(vectorAddKernel, 0, globalWorkSize, localWorkSize)
.putReadBuffer(clBufferC, true)
.finish().release();
diff --git a/test/com/mbien/opencl/LowLevelBindingTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java
index 0a600102..94b86e71 100644
--- a/test/com/mbien/opencl/LowLevelBindingTest.java
+++ b/test/com/mbien/opencl/LowLevelBindingTest.java
@@ -1,6 +1,7 @@
package com.mbien.opencl;
import com.sun.gluegen.runtime.CPU;
+import com.sun.gluegen.runtime.PointerBuffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
@@ -289,21 +290,19 @@ public class LowLevelBindingTest {
out.println("used device memory: "+ (srcA.capacity()+srcB.capacity()+dest.capacity())/1000000 +"MB");
// Asynchronous write of data to GPU device
- ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, 0, null, 0);
+ ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, null);
checkError("on clEnqueueWriteBuffer", ret);
- ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, 0, null, 0);
+ ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, null);
checkError("on clEnqueueWriteBuffer", ret);
// Launch kernel
- ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, 0,
- new long[]{ globalWorkSize }, 0,
- new long[]{ localWorkSize }, 0, 0,
- null, 0,
- null, 0);
+ PointerBuffer gWS = PointerBuffer.allocateDirect(1).put(globalWorkSize).rewind();
+ PointerBuffer lWS = PointerBuffer.allocateDirect(1).put(localWorkSize).rewind();
+ ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, gWS, lWS, 0, null, null);
checkError("on clEnqueueNDRangeKernel", ret);
// Synchronous/blocking read of results
- ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, 0, null, 0);
+ ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, null);
checkError("on clEnqueueReadBuffer", ret);
out.println("a+b=c result snapshot: ");