summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Bien <[email protected]>2009-10-22 01:26:53 +0200
committerMichael Bien <[email protected]>2009-10-22 01:26:53 +0200
commitb3881a0924ecbe17cf27cededeae8df40b2d6933 (patch)
treea2ae7930eca5d771ab89fddd1a9e62802b6ffac9
parentfe1e2739bf7596bc488de977166603edd18c41fb (diff)
api cleanup and refactoring.
-rw-r--r--src/com/mbien/opencl/CLBuffer.java84
-rw-r--r--src/com/mbien/opencl/CLCommandQueue.java49
-rw-r--r--src/com/mbien/opencl/CLContext.java73
-rw-r--r--src/com/mbien/opencl/CLKernel.java37
-rw-r--r--src/com/mbien/opencl/CLProgram.java34
-rw-r--r--test/com/mbien/opencl/HighLevelBindingTest.java28
6 files changed, 242 insertions, 63 deletions
diff --git a/src/com/mbien/opencl/CLBuffer.java b/src/com/mbien/opencl/CLBuffer.java
index f9f3a239..7dcd2928 100644
--- a/src/com/mbien/opencl/CLBuffer.java
+++ b/src/com/mbien/opencl/CLBuffer.java
@@ -9,13 +9,93 @@ import static com.mbien.opencl.CLException.*;
*/
public class CLBuffer {
+ public enum MEM {
+
+ /**
+ * This flag specifies that the memory object will be read and
+ * written by a kernel.
+ */
+ READ_WRITE(CL.CL_MEM_READ_WRITE),
+
+ /**
+ * This flags specifies that the memory object will be written
+ * but not read by a kernel.
+ * Reading from a buffer or image object created with WRITE_ONLY
+ * inside a kernel is undefined.
+ */
+ WRITE_ONLY(CL.CL_MEM_WRITE_ONLY),
+
+ /**
+ * This flag specifies that the memory object is a read-only memory
+ * object when used inside a kernel. Writing to a buffer or image object
+ * created withREAD_ONLY inside a kernel is undefined.
+ */
+ READ_ONLY(CL.CL_MEM_READ_ONLY);
+
+ /**
+ * If specified, it indicates that the application wants the OpenCL
+ * implementation to use memory referenced by host_ptr as the storage
+ * bits for the memory object. OpenCL implementations are allowed
+ * to cache the buffer contents pointed to by host_ptr in device memory.
+ * This cached copy can be used when kernels are executed on a device.
+ */
+// USE_HOST_PTR(CL.CL_MEM_USE_HOST_PTR),
+
+// ALLOC_HOST_PTR(CL.CL_MEM_ALLOC_HOST_PTR), // this is the default in java world anyway
+
+ /**
+ * If CL_MEM_COPY_HOST_PTR specified, it indicates that the application
+ * wants the OpenCL implementation to allocate memory for the memory object
+ * and copy the data from memory referenced by host_ptr.<br/>
+ * COPY_HOST_PTR and USE_HOST_PTR are mutually exclusive.
+ */
+// COPY_HOST_PTR(CL.CL_MEM_COPY_HOST_PTR);
+
+ /**
+ * Value of wrapped OpenCL flag.
+ */
+ public final int CL_FLAG;
+
+ private MEM(int CL_TYPE) {
+ this.CL_FLAG = CL_TYPE;
+ }
+
+ public static MEM valueOf(int bufferFlag) {
+ switch(bufferFlag) {
+ case(CL.CL_MEM_READ_WRITE):
+ return READ_WRITE;
+ case(CL.CL_MEM_READ_ONLY):
+ return READ_ONLY;
+// case(CL.CL_MEM_USE_HOST_PTR):
+// return USE_HOST_PTR;
+// case(CL.CL_MEM_ALLOC_HOST_PTR):
+// return ALLOC_HOST_PTR;
+// case(CL.CL_MEM_COPY_HOST_PTR):
+// return COPY_HOST_PTR;
+ }
+ return null;
+ }
+
+ static int flagsToInt(MEM[] flags) {
+ int clFlags = CL.CL_MEM_READ_WRITE;
+ if(flags != null) {
+ for (int i = 0; i < flags.length; i++) {
+ clFlags |= flags[i].CL_FLAG;
+ }
+ }
+ return clFlags;
+ }
+
+ }
+
+
public final ByteBuffer buffer;
public final long ID;
private final CLContext context;
private final CL cl;
- CLBuffer(CLContext context, int flags, ByteBuffer directBuffer) {
+ CLBuffer(CLContext context, ByteBuffer directBuffer, int flags) {
if(!directBuffer.isDirect())
throw new IllegalArgumentException("buffer is not a direct buffer");
@@ -34,7 +114,7 @@ public class CLBuffer {
public void release() {
int ret = cl.clReleaseMemObject(ID);
- context.bufferReleased(this);
+ context.onBufferReleased(this);
checkForError(ret, "can not release mem object");
}
diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java
index 7d1d09f9..5abc5cd5 100644
--- a/src/com/mbien/opencl/CLCommandQueue.java
+++ b/src/com/mbien/opencl/CLCommandQueue.java
@@ -123,10 +123,13 @@ public class CLCommandQueue {
}
*/
-// public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
-// return this.putNDRangeKernel(kernel, workDimension,
-// new long[] {globalWorkOffset}, new long[] {globalWorkSize}, new long[] {localWorkSize});
-// }
+ public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
+ return this.putNDRangeKernel(
+ kernel, workDimension,
+ globalWorkOffset==0 ? null : new long[] {globalWorkOffset},
+ globalWorkSize ==0 ? null : new long[] {globalWorkSize },
+ localWorkSize ==0 ? null : new long[] {localWorkSize } );
+ }
public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) {
@@ -145,11 +148,47 @@ public class CLCommandQueue {
return this;
}
+ public CLCommandQueue finish() {
+ int ret = cl.clFinish(ID);
+ checkForError(ret, "can not finish command queue");
+ return this;
+ }
+
public void release() {
int ret = cl.clReleaseCommandQueue(ID);
- context.commandQueueReleased(device, this);
+ context.onCommandQueueReleased(device, this);
checkForError(ret, "can not release command queue");
}
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final CLCommandQueue other = (CLCommandQueue) obj;
+ if (this.ID != other.ID) {
+ return false;
+ }
+ if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) {
+ return false;
+ }
+ if (this.device != other.device && (this.device == null || !this.device.equals(other.device))) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 3;
+ hash = 89 * hash + (int) (this.ID ^ (this.ID >>> 32));
+ hash = 89 * hash + (this.context != null ? this.context.hashCode() : 0);
+ hash = 89 * hash + (this.device != null ? this.device.hashCode() : 0);
+ return hash;
+ }
+
}
diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java
index 76fbc2ee..7eaada8c 100644
--- a/src/com/mbien/opencl/CLContext.java
+++ b/src/com/mbien/opencl/CLContext.java
@@ -1,5 +1,6 @@
package com.mbien.opencl;
+import com.mbien.opencl.CLBuffer.MEM;
import com.sun.gluegen.runtime.BufferFactory;
import java.io.BufferedReader;
import java.io.IOException;
@@ -99,14 +100,27 @@ public final class CLContext {
return createProgram(sb.toString());
}
- public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) {
- CLBuffer buffer = new CLBuffer(this, flags, directBuffer);
- buffers.add(buffer);
- return buffer;
+ /**
+ * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer.
+ */
+ public CLBuffer createBuffer(ByteBuffer directBuffer, MEM... flags) {
+ return createBuffer(directBuffer, MEM.flagsToInt(flags));
+ }
+ /**
+ * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer.
+ */
+ public CLBuffer createBuffer(int size, MEM... flags) {
+ return createBuffer(size, MEM.flagsToInt(flags));
}
- public CLBuffer createBuffer(int flags, int size) {
- return createBuffer(flags, BufferFactory.newDirectByteBuffer(size));
+ public CLBuffer createBuffer(int size, int flags) {
+ return createBuffer(BufferFactory.newDirectByteBuffer(size), flags);
+ }
+
+ public CLBuffer createBuffer(ByteBuffer directBuffer, int flags) {
+ CLBuffer buffer = new CLBuffer(this, directBuffer, flags);
+ buffers.add(buffer);
+ return buffer;
}
CLCommandQueue createCommandQueue(CLDevice device, long properties) {
@@ -123,15 +137,15 @@ public final class CLContext {
return queue;
}
- void programReleased(CLProgram program) {
+ void onProgramReleased(CLProgram program) {
programs.remove(program);
}
- void bufferReleased(CLBuffer buffer) {
+ void onBufferReleased(CLBuffer buffer) {
buffers.remove(buffer);
}
- void commandQueueReleased(CLDevice device, CLCommandQueue queue) {
+ void onCommandQueueReleased(CLDevice device, CLCommandQueue queue) {
List<CLCommandQueue> list = queuesMap.get(device);
list.remove(queue);
// remove empty lists from map
@@ -173,39 +187,28 @@ public final class CLContext {
/**
* Gets the device with maximal FLOPS from this context.
*/
- /*
public CLDevice getMaxFlopsDevice() {
- long[] longBuffer = new long[1];
-// ByteBuffer bb = ByteBuffer.allocate(8);
-// bb.order(ByteOrder.nativeOrder());
+ CLDevice[] clDevices = getCLDevices();
+ CLDevice maxFLOPSDevice = null;
- int ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, longBuffer, 0);
- if(CL.CL_SUCCESS != ret)
- throw new CLException(ret, "can not receive context info");
+ int maxflops = -1;
- System.out.println("#devices: "+longBuffer[0]);
+ for (int i = 0; i < clDevices.length; i++) {
- long[] deviceIDs = new long[(int)longBuffer[0]];
- ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, deviceIDs, 0);
+ CLDevice device = clDevices[i];
+ int maxComputeUnits = device.getMaxComputeUnits();
+ int maxClockFrequency = device.getMaxClockFrequency();
+ int flops = maxComputeUnits*maxClockFrequency;
- if(CL.CL_SUCCESS != ret)
- throw new CLException(ret, "can not receive context info");
-
- for (int i = 0; i < deviceIDs.length; i++) {
- long l = deviceIDs[i];
- System.out.println("device id"+l);
+ if(flops > maxflops) {
+ maxflops = flops;
+ maxFLOPSDevice = device;
+ }
}
- // get the list of GPU devices associated with context
-// ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes);
-// cl_device_id *cdDevices = (cl_device_id *)malloc(dataBytes);
-// ciErrNum |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes, cdDevices, NULL);
-// shrCheckError(ciErrNum, CL_SUCCESS);
-
- return null;
+ return maxFLOPSDevice;
}
-*/
/**
* Returns all devices associated with this CLContext.
@@ -214,7 +217,7 @@ public final class CLContext {
if(devices == null) {
- int sizeofDeviceID = 8; // TODO doublechek deviceID size on 32 bit systems
+ int sizeofDeviceID = 8; // TODO doublecheck deviceID size on 32 bit systems
long[] longBuffer = new long[1];
@@ -229,7 +232,7 @@ public final class CLContext {
devices = new CLDevice[deviceIDs.capacity()/sizeofDeviceID];
for (int i = 0; i < devices.length; i++)
- devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublechek deviceID size on 32 bit systems
+ devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublecheck deviceID size on 32 bit systems
}
diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java
index 9f184ce4..dcf00c9a 100644
--- a/src/com/mbien/opencl/CLKernel.java
+++ b/src/com/mbien/opencl/CLKernel.java
@@ -1,6 +1,7 @@
package com.mbien.opencl;
import com.sun.gluegen.runtime.BufferFactory;
+import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import static com.mbien.opencl.CLException.*;
@@ -36,25 +37,47 @@ public class CLKernel {
}
- public CLKernel setArg(int argumentIndex, int argumentSize, CLBuffer value) {
- int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value.ID));
+ public CLKernel setArg(int argumentIndex, CLBuffer value) {
+ int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value.ID));
checkForError(ret, "error on clSetKernelArg");
return this;
}
- public CLKernel setArg(int argumentIndex, int argumentSize, long value) {
- int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value));
+ public CLKernel setArg(int argumentIndex, int value) {
+ int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value));
checkForError(ret, "error on clSetKernelArg");
return this;
}
- private final ByteBuffer wrapLong(long value) {
- return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind();
+ public CLKernel setArg(int argumentIndex, long value) {
+ int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value));
+ checkForError(ret, "error on clSetKernelArg");
+ return this;
+ }
+
+ public CLKernel setArg(int argumentIndex, float value) {
+ int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value));
+ checkForError(ret, "error on clSetKernelArg");
+ return this;
+ }
+
+ public CLKernel setArg(int argumentIndex, double value) {
+ int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value));
+ checkForError(ret, "error on clSetKernelArg");
+ return this;
+ }
+
+ private final Buffer wrap(double value) {
+ return BufferFactory.newDirectByteBuffer(8).putDouble(value).rewind();
+ }
+
+ private final Buffer wrap(long value) {
+ return BufferFactory.newDirectByteBuffer(8).putLong(value).rewind();
}
public void release() {
int ret = cl.clReleaseKernel(ID);
- program.kernelReleased(this);
+ program.onKernelReleased(this);
checkForError(ret, "can not release kernel");
}
diff --git a/src/com/mbien/opencl/CLProgram.java b/src/com/mbien/opencl/CLProgram.java
index 020de17c..566ae6b2 100644
--- a/src/com/mbien/opencl/CLProgram.java
+++ b/src/com/mbien/opencl/CLProgram.java
@@ -123,7 +123,7 @@ public class CLProgram {
return Collections.unmodifiableMap(kernels);
}
- void kernelReleased(CLKernel kernel) {
+ void onKernelReleased(CLKernel kernel) {
this.kernels.remove(kernel.name);
}
@@ -140,7 +140,7 @@ public class CLProgram {
}
int ret = cl.clReleaseProgram(ID);
- context.programReleased(this);
+ context.onProgramReleased(this);
checkForError(ret, "can not release program");
}
@@ -184,7 +184,35 @@ public class CLProgram {
return getProgramInfoString(CL.CL_PROGRAM_SOURCE);
}
- // TODO binaries, serialization, program build options
+ public Map<CLDevice, byte[]> getBinaries() {
+
+ CLDevice[] devices = getCLDevices();
+
+ ByteBuffer sizes = ByteBuffer.allocate(8*devices.length).order(ByteOrder.nativeOrder());
+ int ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARY_SIZES, sizes.capacity(), sizes, null, 0);
+ checkForError(ret, "on clGetProgramInfo");
+
+ int binarySize = 0;
+ while(sizes.remaining() != 0)
+ binarySize += (int)sizes.getLong();
+
+ ByteBuffer binaries = ByteBuffer.allocate(binarySize).order(ByteOrder.nativeOrder());
+ ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARIES, binaries.capacity(), binaries, null, 0); // crash, driver bug?
+ checkForError(ret, "on clGetProgramInfo");
+
+ Map<CLDevice, byte[]> map = new HashMap<CLDevice, byte[]>();
+
+ for (int i = 0; i < devices.length; i++) {
+ byte[] bytes = new byte[(int)sizes.getLong()];
+ binaries.get(bytes);
+ map.put(devices[i], bytes);
+ }
+
+ return map;
+ }
+
+
+ // TODO serialization, program build options
private final String getBuildInfoString(long device, int flag) {
diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java
index b9643e9b..8ae61b2a 100644
--- a/test/com/mbien/opencl/HighLevelBindingTest.java
+++ b/test/com/mbien/opencl/HighLevelBindingTest.java
@@ -1,5 +1,6 @@
package com.mbien.opencl;
+import com.mbien.opencl.CLBuffer.MEM;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
@@ -87,6 +88,9 @@ public class HighLevelBindingTest {
assertFalse(source.trim().isEmpty());
// out.println("source:\n"+source);
+// Map<CLDevice, byte[]> binaries = program.getBinaries();
+// assertFalse(binaries.isEmpty());
+
int elementCount = 11444777; // Length of float arrays to process (odd # for illustration)
int localWorkSize = 256; // set and log Global and Local work size dimensions
int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize
@@ -100,9 +104,9 @@ public class HighLevelBindingTest {
fillBuffer(srcA, 23456);
fillBuffer(srcB, 46987);
- CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA);
- CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB);
- CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest);
+ CLBuffer clBufferA = context.createBuffer(srcA, MEM.READ_ONLY);
+ CLBuffer clBufferB = context.createBuffer(srcB, MEM.READ_ONLY);
+ CLBuffer clBufferC = context.createBuffer(dest, MEM.WRITE_ONLY);
Map<String, CLKernel> kernels = program.getCLKernels();
for (CLKernel kernel : kernels.values()) {
@@ -114,10 +118,10 @@ public class HighLevelBindingTest {
CLKernel vectorAddKernel = kernels.get("VectorAddGM");
- vectorAddKernel.setArg(0, SIZEOF_LONG, clBufferA)
- .setArg(1, SIZEOF_LONG, clBufferB)
- .setArg(2, SIZEOF_LONG, clBufferC)
- .setArg(3, SIZEOF_INT, elementCount);
+ vectorAddKernel.setArg(0, clBufferA)
+ .setArg(1, clBufferB)
+ .setArg(2, clBufferC)
+ .setArg(3, elementCount);
CLCommandQueue queue = programDevices[0].createCommandQueue();
@@ -125,7 +129,8 @@ public class HighLevelBindingTest {
queue.putWriteBuffer(clBufferA, false)
.putWriteBuffer(clBufferB, false)
.putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
- .putReadBuffer(clBufferC, true).release();
+ .putReadBuffer(clBufferC, true)
+ .finish().release();
out.println("a+b=c result snapshot: ");
for(int i = 0; i < 10; i++)
@@ -164,8 +169,8 @@ public class HighLevelBindingTest {
CLContext context = CLContext.create();
// the CL.MEM_* flag is probably completly irrelevant in our case since we do not use a kernel in this test
- CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, elements*SIZEOF_INT);
- CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, elements*SIZEOF_INT);
+ CLBuffer clBufferA = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY);
+ CLBuffer clBufferB = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY);
// fill only first read buffer -> we will copy the payload to the second later.
fillBuffer(clBufferA.buffer, 12345);
@@ -175,7 +180,8 @@ public class HighLevelBindingTest {
// asynchronous write of data to GPU device, blocking read later to get the computed results back.
queue.putWriteBuffer(clBufferA, false) // write A
.putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B
- .putReadBuffer(clBufferB, true); // read B
+ .putReadBuffer(clBufferB, true) // read B
+ .finish();
context.release();