diff options
author | Michael Bien <[email protected]> | 2009-10-22 01:26:53 +0200 |
---|---|---|
committer | Michael Bien <[email protected]> | 2009-10-22 01:26:53 +0200 |
commit | b3881a0924ecbe17cf27cededeae8df40b2d6933 (patch) | |
tree | a2ae7930eca5d771ab89fddd1a9e62802b6ffac9 | |
parent | fe1e2739bf7596bc488de977166603edd18c41fb (diff) |
api cleanup and refactoring.
-rw-r--r-- | src/com/mbien/opencl/CLBuffer.java | 84 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLCommandQueue.java | 49 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLContext.java | 73 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLKernel.java | 37 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLProgram.java | 34 | ||||
-rw-r--r-- | test/com/mbien/opencl/HighLevelBindingTest.java | 28 |
6 files changed, 242 insertions, 63 deletions
diff --git a/src/com/mbien/opencl/CLBuffer.java b/src/com/mbien/opencl/CLBuffer.java index f9f3a239..7dcd2928 100644 --- a/src/com/mbien/opencl/CLBuffer.java +++ b/src/com/mbien/opencl/CLBuffer.java @@ -9,13 +9,93 @@ import static com.mbien.opencl.CLException.*; */ public class CLBuffer { + public enum MEM { + + /** + * This flag specifies that the memory object will be read and + * written by a kernel. + */ + READ_WRITE(CL.CL_MEM_READ_WRITE), + + /** + * This flags specifies that the memory object will be written + * but not read by a kernel. + * Reading from a buffer or image object created with WRITE_ONLY + * inside a kernel is undefined. + */ + WRITE_ONLY(CL.CL_MEM_WRITE_ONLY), + + /** + * This flag specifies that the memory object is a read-only memory + * object when used inside a kernel. Writing to a buffer or image object + * created withREAD_ONLY inside a kernel is undefined. + */ + READ_ONLY(CL.CL_MEM_READ_ONLY); + + /** + * If specified, it indicates that the application wants the OpenCL + * implementation to use memory referenced by host_ptr as the storage + * bits for the memory object. OpenCL implementations are allowed + * to cache the buffer contents pointed to by host_ptr in device memory. + * This cached copy can be used when kernels are executed on a device. + */ +// USE_HOST_PTR(CL.CL_MEM_USE_HOST_PTR), + +// ALLOC_HOST_PTR(CL.CL_MEM_ALLOC_HOST_PTR), // this is the default in java world anyway + + /** + * If CL_MEM_COPY_HOST_PTR specified, it indicates that the application + * wants the OpenCL implementation to allocate memory for the memory object + * and copy the data from memory referenced by host_ptr.<br/> + * COPY_HOST_PTR and USE_HOST_PTR are mutually exclusive. + */ +// COPY_HOST_PTR(CL.CL_MEM_COPY_HOST_PTR); + + /** + * Value of wrapped OpenCL flag. + */ + public final int CL_FLAG; + + private MEM(int CL_TYPE) { + this.CL_FLAG = CL_TYPE; + } + + public static MEM valueOf(int bufferFlag) { + switch(bufferFlag) { + case(CL.CL_MEM_READ_WRITE): + return READ_WRITE; + case(CL.CL_MEM_READ_ONLY): + return READ_ONLY; +// case(CL.CL_MEM_USE_HOST_PTR): +// return USE_HOST_PTR; +// case(CL.CL_MEM_ALLOC_HOST_PTR): +// return ALLOC_HOST_PTR; +// case(CL.CL_MEM_COPY_HOST_PTR): +// return COPY_HOST_PTR; + } + return null; + } + + static int flagsToInt(MEM[] flags) { + int clFlags = CL.CL_MEM_READ_WRITE; + if(flags != null) { + for (int i = 0; i < flags.length; i++) { + clFlags |= flags[i].CL_FLAG; + } + } + return clFlags; + } + + } + + public final ByteBuffer buffer; public final long ID; private final CLContext context; private final CL cl; - CLBuffer(CLContext context, int flags, ByteBuffer directBuffer) { + CLBuffer(CLContext context, ByteBuffer directBuffer, int flags) { if(!directBuffer.isDirect()) throw new IllegalArgumentException("buffer is not a direct buffer"); @@ -34,7 +114,7 @@ public class CLBuffer { public void release() { int ret = cl.clReleaseMemObject(ID); - context.bufferReleased(this); + context.onBufferReleased(this); checkForError(ret, "can not release mem object"); } diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java index 7d1d09f9..5abc5cd5 100644 --- a/src/com/mbien/opencl/CLCommandQueue.java +++ b/src/com/mbien/opencl/CLCommandQueue.java @@ -123,10 +123,13 @@ public class CLCommandQueue { } */ -// public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) { -// return this.putNDRangeKernel(kernel, workDimension, -// new long[] {globalWorkOffset}, new long[] {globalWorkSize}, new long[] {localWorkSize}); -// } + public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) { + return this.putNDRangeKernel( + kernel, workDimension, + globalWorkOffset==0 ? null : new long[] {globalWorkOffset}, + globalWorkSize ==0 ? null : new long[] {globalWorkSize }, + localWorkSize ==0 ? null : new long[] {localWorkSize } ); + } public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) { @@ -145,11 +148,47 @@ public class CLCommandQueue { return this; } + public CLCommandQueue finish() { + int ret = cl.clFinish(ID); + checkForError(ret, "can not finish command queue"); + return this; + } + public void release() { int ret = cl.clReleaseCommandQueue(ID); - context.commandQueueReleased(device, this); + context.onCommandQueueReleased(device, this); checkForError(ret, "can not release command queue"); } + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CLCommandQueue other = (CLCommandQueue) obj; + if (this.ID != other.ID) { + return false; + } + if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) { + return false; + } + if (this.device != other.device && (this.device == null || !this.device.equals(other.device))) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 3; + hash = 89 * hash + (int) (this.ID ^ (this.ID >>> 32)); + hash = 89 * hash + (this.context != null ? this.context.hashCode() : 0); + hash = 89 * hash + (this.device != null ? this.device.hashCode() : 0); + return hash; + } + } diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java index 76fbc2ee..7eaada8c 100644 --- a/src/com/mbien/opencl/CLContext.java +++ b/src/com/mbien/opencl/CLContext.java @@ -1,5 +1,6 @@ package com.mbien.opencl; +import com.mbien.opencl.CLBuffer.MEM; import com.sun.gluegen.runtime.BufferFactory; import java.io.BufferedReader; import java.io.IOException; @@ -99,14 +100,27 @@ public final class CLContext { return createProgram(sb.toString()); } - public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) { - CLBuffer buffer = new CLBuffer(this, flags, directBuffer); - buffers.add(buffer); - return buffer; + /** + * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer. + */ + public CLBuffer createBuffer(ByteBuffer directBuffer, MEM... flags) { + return createBuffer(directBuffer, MEM.flagsToInt(flags)); + } + /** + * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer. + */ + public CLBuffer createBuffer(int size, MEM... flags) { + return createBuffer(size, MEM.flagsToInt(flags)); } - public CLBuffer createBuffer(int flags, int size) { - return createBuffer(flags, BufferFactory.newDirectByteBuffer(size)); + public CLBuffer createBuffer(int size, int flags) { + return createBuffer(BufferFactory.newDirectByteBuffer(size), flags); + } + + public CLBuffer createBuffer(ByteBuffer directBuffer, int flags) { + CLBuffer buffer = new CLBuffer(this, directBuffer, flags); + buffers.add(buffer); + return buffer; } CLCommandQueue createCommandQueue(CLDevice device, long properties) { @@ -123,15 +137,15 @@ public final class CLContext { return queue; } - void programReleased(CLProgram program) { + void onProgramReleased(CLProgram program) { programs.remove(program); } - void bufferReleased(CLBuffer buffer) { + void onBufferReleased(CLBuffer buffer) { buffers.remove(buffer); } - void commandQueueReleased(CLDevice device, CLCommandQueue queue) { + void onCommandQueueReleased(CLDevice device, CLCommandQueue queue) { List<CLCommandQueue> list = queuesMap.get(device); list.remove(queue); // remove empty lists from map @@ -173,39 +187,28 @@ public final class CLContext { /** * Gets the device with maximal FLOPS from this context. */ - /* public CLDevice getMaxFlopsDevice() { - long[] longBuffer = new long[1]; -// ByteBuffer bb = ByteBuffer.allocate(8); -// bb.order(ByteOrder.nativeOrder()); + CLDevice[] clDevices = getCLDevices(); + CLDevice maxFLOPSDevice = null; - int ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, longBuffer, 0); - if(CL.CL_SUCCESS != ret) - throw new CLException(ret, "can not receive context info"); + int maxflops = -1; - System.out.println("#devices: "+longBuffer[0]); + for (int i = 0; i < clDevices.length; i++) { - long[] deviceIDs = new long[(int)longBuffer[0]]; - ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, deviceIDs, 0); + CLDevice device = clDevices[i]; + int maxComputeUnits = device.getMaxComputeUnits(); + int maxClockFrequency = device.getMaxClockFrequency(); + int flops = maxComputeUnits*maxClockFrequency; - if(CL.CL_SUCCESS != ret) - throw new CLException(ret, "can not receive context info"); - - for (int i = 0; i < deviceIDs.length; i++) { - long l = deviceIDs[i]; - System.out.println("device id"+l); + if(flops > maxflops) { + maxflops = flops; + maxFLOPSDevice = device; + } } - // get the list of GPU devices associated with context -// ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes); -// cl_device_id *cdDevices = (cl_device_id *)malloc(dataBytes); -// ciErrNum |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes, cdDevices, NULL); -// shrCheckError(ciErrNum, CL_SUCCESS); - - return null; + return maxFLOPSDevice; } -*/ /** * Returns all devices associated with this CLContext. @@ -214,7 +217,7 @@ public final class CLContext { if(devices == null) { - int sizeofDeviceID = 8; // TODO doublechek deviceID size on 32 bit systems + int sizeofDeviceID = 8; // TODO doublecheck deviceID size on 32 bit systems long[] longBuffer = new long[1]; @@ -229,7 +232,7 @@ public final class CLContext { devices = new CLDevice[deviceIDs.capacity()/sizeofDeviceID]; for (int i = 0; i < devices.length; i++) - devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublechek deviceID size on 32 bit systems + devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublecheck deviceID size on 32 bit systems } diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java index 9f184ce4..dcf00c9a 100644 --- a/src/com/mbien/opencl/CLKernel.java +++ b/src/com/mbien/opencl/CLKernel.java @@ -1,6 +1,7 @@ package com.mbien.opencl; import com.sun.gluegen.runtime.BufferFactory; +import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.ByteOrder; import static com.mbien.opencl.CLException.*; @@ -36,25 +37,47 @@ public class CLKernel { } - public CLKernel setArg(int argumentIndex, int argumentSize, CLBuffer value) { - int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value.ID)); + public CLKernel setArg(int argumentIndex, CLBuffer value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value.ID)); checkForError(ret, "error on clSetKernelArg"); return this; } - public CLKernel setArg(int argumentIndex, int argumentSize, long value) { - int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value)); + public CLKernel setArg(int argumentIndex, int value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value)); checkForError(ret, "error on clSetKernelArg"); return this; } - private final ByteBuffer wrapLong(long value) { - return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); + public CLKernel setArg(int argumentIndex, long value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + public CLKernel setArg(int argumentIndex, float value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + public CLKernel setArg(int argumentIndex, double value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + private final Buffer wrap(double value) { + return BufferFactory.newDirectByteBuffer(8).putDouble(value).rewind(); + } + + private final Buffer wrap(long value) { + return BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); } public void release() { int ret = cl.clReleaseKernel(ID); - program.kernelReleased(this); + program.onKernelReleased(this); checkForError(ret, "can not release kernel"); } diff --git a/src/com/mbien/opencl/CLProgram.java b/src/com/mbien/opencl/CLProgram.java index 020de17c..566ae6b2 100644 --- a/src/com/mbien/opencl/CLProgram.java +++ b/src/com/mbien/opencl/CLProgram.java @@ -123,7 +123,7 @@ public class CLProgram { return Collections.unmodifiableMap(kernels); } - void kernelReleased(CLKernel kernel) { + void onKernelReleased(CLKernel kernel) { this.kernels.remove(kernel.name); } @@ -140,7 +140,7 @@ public class CLProgram { } int ret = cl.clReleaseProgram(ID); - context.programReleased(this); + context.onProgramReleased(this); checkForError(ret, "can not release program"); } @@ -184,7 +184,35 @@ public class CLProgram { return getProgramInfoString(CL.CL_PROGRAM_SOURCE); } - // TODO binaries, serialization, program build options + public Map<CLDevice, byte[]> getBinaries() { + + CLDevice[] devices = getCLDevices(); + + ByteBuffer sizes = ByteBuffer.allocate(8*devices.length).order(ByteOrder.nativeOrder()); + int ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARY_SIZES, sizes.capacity(), sizes, null, 0); + checkForError(ret, "on clGetProgramInfo"); + + int binarySize = 0; + while(sizes.remaining() != 0) + binarySize += (int)sizes.getLong(); + + ByteBuffer binaries = ByteBuffer.allocate(binarySize).order(ByteOrder.nativeOrder()); + ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARIES, binaries.capacity(), binaries, null, 0); // crash, driver bug? + checkForError(ret, "on clGetProgramInfo"); + + Map<CLDevice, byte[]> map = new HashMap<CLDevice, byte[]>(); + + for (int i = 0; i < devices.length; i++) { + byte[] bytes = new byte[(int)sizes.getLong()]; + binaries.get(bytes); + map.put(devices[i], bytes); + } + + return map; + } + + + // TODO serialization, program build options private final String getBuildInfoString(long device, int flag) { diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java index b9643e9b..8ae61b2a 100644 --- a/test/com/mbien/opencl/HighLevelBindingTest.java +++ b/test/com/mbien/opencl/HighLevelBindingTest.java @@ -1,5 +1,6 @@ package com.mbien.opencl; +import com.mbien.opencl.CLBuffer.MEM; import java.io.IOException; import java.nio.ByteBuffer; import java.util.Map; @@ -87,6 +88,9 @@ public class HighLevelBindingTest { assertFalse(source.trim().isEmpty()); // out.println("source:\n"+source); +// Map<CLDevice, byte[]> binaries = program.getBinaries(); +// assertFalse(binaries.isEmpty()); + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) int localWorkSize = 256; // set and log Global and Local work size dimensions int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize @@ -100,9 +104,9 @@ public class HighLevelBindingTest { fillBuffer(srcA, 23456); fillBuffer(srcB, 46987); - CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA); - CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB); - CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest); + CLBuffer clBufferA = context.createBuffer(srcA, MEM.READ_ONLY); + CLBuffer clBufferB = context.createBuffer(srcB, MEM.READ_ONLY); + CLBuffer clBufferC = context.createBuffer(dest, MEM.WRITE_ONLY); Map<String, CLKernel> kernels = program.getCLKernels(); for (CLKernel kernel : kernels.values()) { @@ -114,10 +118,10 @@ public class HighLevelBindingTest { CLKernel vectorAddKernel = kernels.get("VectorAddGM"); - vectorAddKernel.setArg(0, SIZEOF_LONG, clBufferA) - .setArg(1, SIZEOF_LONG, clBufferB) - .setArg(2, SIZEOF_LONG, clBufferC) - .setArg(3, SIZEOF_INT, elementCount); + vectorAddKernel.setArg(0, clBufferA) + .setArg(1, clBufferB) + .setArg(2, clBufferC) + .setArg(3, elementCount); CLCommandQueue queue = programDevices[0].createCommandQueue(); @@ -125,7 +129,8 @@ public class HighLevelBindingTest { queue.putWriteBuffer(clBufferA, false) .putWriteBuffer(clBufferB, false) .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) - .putReadBuffer(clBufferC, true).release(); + .putReadBuffer(clBufferC, true) + .finish().release(); out.println("a+b=c result snapshot: "); for(int i = 0; i < 10; i++) @@ -164,8 +169,8 @@ public class HighLevelBindingTest { CLContext context = CLContext.create(); // the CL.MEM_* flag is probably completly irrelevant in our case since we do not use a kernel in this test - CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, elements*SIZEOF_INT); - CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, elements*SIZEOF_INT); + CLBuffer clBufferA = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY); + CLBuffer clBufferB = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY); // fill only first read buffer -> we will copy the payload to the second later. fillBuffer(clBufferA.buffer, 12345); @@ -175,7 +180,8 @@ public class HighLevelBindingTest { // asynchronous write of data to GPU device, blocking read later to get the computed results back. queue.putWriteBuffer(clBufferA, false) // write A .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B - .putReadBuffer(clBufferB, true); // read B + .putReadBuffer(clBufferB, true) // read B + .finish(); context.release(); |