From b3881a0924ecbe17cf27cededeae8df40b2d6933 Mon Sep 17 00:00:00 2001 From: Michael Bien Date: Thu, 22 Oct 2009 01:26:53 +0200 Subject: api cleanup and refactoring. --- src/com/mbien/opencl/CLBuffer.java | 84 +++++++++++++++++++++++++++++++- src/com/mbien/opencl/CLCommandQueue.java | 49 +++++++++++++++++-- src/com/mbien/opencl/CLContext.java | 73 ++++++++++++++------------- src/com/mbien/opencl/CLKernel.java | 37 +++++++++++--- src/com/mbien/opencl/CLProgram.java | 34 +++++++++++-- 5 files changed, 225 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/com/mbien/opencl/CLBuffer.java b/src/com/mbien/opencl/CLBuffer.java index f9f3a239..7dcd2928 100644 --- a/src/com/mbien/opencl/CLBuffer.java +++ b/src/com/mbien/opencl/CLBuffer.java @@ -9,13 +9,93 @@ import static com.mbien.opencl.CLException.*; */ public class CLBuffer { + public enum MEM { + + /** + * This flag specifies that the memory object will be read and + * written by a kernel. + */ + READ_WRITE(CL.CL_MEM_READ_WRITE), + + /** + * This flags specifies that the memory object will be written + * but not read by a kernel. + * Reading from a buffer or image object created with WRITE_ONLY + * inside a kernel is undefined. + */ + WRITE_ONLY(CL.CL_MEM_WRITE_ONLY), + + /** + * This flag specifies that the memory object is a read-only memory + * object when used inside a kernel. Writing to a buffer or image object + * created withREAD_ONLY inside a kernel is undefined. + */ + READ_ONLY(CL.CL_MEM_READ_ONLY); + + /** + * If specified, it indicates that the application wants the OpenCL + * implementation to use memory referenced by host_ptr as the storage + * bits for the memory object. OpenCL implementations are allowed + * to cache the buffer contents pointed to by host_ptr in device memory. + * This cached copy can be used when kernels are executed on a device. + */ +// USE_HOST_PTR(CL.CL_MEM_USE_HOST_PTR), + +// ALLOC_HOST_PTR(CL.CL_MEM_ALLOC_HOST_PTR), // this is the default in java world anyway + + /** + * If CL_MEM_COPY_HOST_PTR specified, it indicates that the application + * wants the OpenCL implementation to allocate memory for the memory object + * and copy the data from memory referenced by host_ptr.
+ * COPY_HOST_PTR and USE_HOST_PTR are mutually exclusive. + */ +// COPY_HOST_PTR(CL.CL_MEM_COPY_HOST_PTR); + + /** + * Value of wrapped OpenCL flag. + */ + public final int CL_FLAG; + + private MEM(int CL_TYPE) { + this.CL_FLAG = CL_TYPE; + } + + public static MEM valueOf(int bufferFlag) { + switch(bufferFlag) { + case(CL.CL_MEM_READ_WRITE): + return READ_WRITE; + case(CL.CL_MEM_READ_ONLY): + return READ_ONLY; +// case(CL.CL_MEM_USE_HOST_PTR): +// return USE_HOST_PTR; +// case(CL.CL_MEM_ALLOC_HOST_PTR): +// return ALLOC_HOST_PTR; +// case(CL.CL_MEM_COPY_HOST_PTR): +// return COPY_HOST_PTR; + } + return null; + } + + static int flagsToInt(MEM[] flags) { + int clFlags = CL.CL_MEM_READ_WRITE; + if(flags != null) { + for (int i = 0; i < flags.length; i++) { + clFlags |= flags[i].CL_FLAG; + } + } + return clFlags; + } + + } + + public final ByteBuffer buffer; public final long ID; private final CLContext context; private final CL cl; - CLBuffer(CLContext context, int flags, ByteBuffer directBuffer) { + CLBuffer(CLContext context, ByteBuffer directBuffer, int flags) { if(!directBuffer.isDirect()) throw new IllegalArgumentException("buffer is not a direct buffer"); @@ -34,7 +114,7 @@ public class CLBuffer { public void release() { int ret = cl.clReleaseMemObject(ID); - context.bufferReleased(this); + context.onBufferReleased(this); checkForError(ret, "can not release mem object"); } diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java index 7d1d09f9..5abc5cd5 100644 --- a/src/com/mbien/opencl/CLCommandQueue.java +++ b/src/com/mbien/opencl/CLCommandQueue.java @@ -123,10 +123,13 @@ public class CLCommandQueue { } */ -// public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) { -// return this.putNDRangeKernel(kernel, workDimension, -// new long[] {globalWorkOffset}, new long[] {globalWorkSize}, new long[] {localWorkSize}); -// } + public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) { + return this.putNDRangeKernel( + kernel, workDimension, + globalWorkOffset==0 ? null : new long[] {globalWorkOffset}, + globalWorkSize ==0 ? null : new long[] {globalWorkSize }, + localWorkSize ==0 ? null : new long[] {localWorkSize } ); + } public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) { @@ -145,11 +148,47 @@ public class CLCommandQueue { return this; } + public CLCommandQueue finish() { + int ret = cl.clFinish(ID); + checkForError(ret, "can not finish command queue"); + return this; + } + public void release() { int ret = cl.clReleaseCommandQueue(ID); - context.commandQueueReleased(device, this); + context.onCommandQueueReleased(device, this); checkForError(ret, "can not release command queue"); } + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CLCommandQueue other = (CLCommandQueue) obj; + if (this.ID != other.ID) { + return false; + } + if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) { + return false; + } + if (this.device != other.device && (this.device == null || !this.device.equals(other.device))) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 3; + hash = 89 * hash + (int) (this.ID ^ (this.ID >>> 32)); + hash = 89 * hash + (this.context != null ? this.context.hashCode() : 0); + hash = 89 * hash + (this.device != null ? this.device.hashCode() : 0); + return hash; + } + } diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java index 76fbc2ee..7eaada8c 100644 --- a/src/com/mbien/opencl/CLContext.java +++ b/src/com/mbien/opencl/CLContext.java @@ -1,5 +1,6 @@ package com.mbien.opencl; +import com.mbien.opencl.CLBuffer.MEM; import com.sun.gluegen.runtime.BufferFactory; import java.io.BufferedReader; import java.io.IOException; @@ -99,14 +100,27 @@ public final class CLContext { return createProgram(sb.toString()); } - public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) { - CLBuffer buffer = new CLBuffer(this, flags, directBuffer); - buffers.add(buffer); - return buffer; + /** + * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer. + */ + public CLBuffer createBuffer(ByteBuffer directBuffer, MEM... flags) { + return createBuffer(directBuffer, MEM.flagsToInt(flags)); + } + /** + * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer. + */ + public CLBuffer createBuffer(int size, MEM... flags) { + return createBuffer(size, MEM.flagsToInt(flags)); } - public CLBuffer createBuffer(int flags, int size) { - return createBuffer(flags, BufferFactory.newDirectByteBuffer(size)); + public CLBuffer createBuffer(int size, int flags) { + return createBuffer(BufferFactory.newDirectByteBuffer(size), flags); + } + + public CLBuffer createBuffer(ByteBuffer directBuffer, int flags) { + CLBuffer buffer = new CLBuffer(this, directBuffer, flags); + buffers.add(buffer); + return buffer; } CLCommandQueue createCommandQueue(CLDevice device, long properties) { @@ -123,15 +137,15 @@ public final class CLContext { return queue; } - void programReleased(CLProgram program) { + void onProgramReleased(CLProgram program) { programs.remove(program); } - void bufferReleased(CLBuffer buffer) { + void onBufferReleased(CLBuffer buffer) { buffers.remove(buffer); } - void commandQueueReleased(CLDevice device, CLCommandQueue queue) { + void onCommandQueueReleased(CLDevice device, CLCommandQueue queue) { List list = queuesMap.get(device); list.remove(queue); // remove empty lists from map @@ -173,39 +187,28 @@ public final class CLContext { /** * Gets the device with maximal FLOPS from this context. */ - /* public CLDevice getMaxFlopsDevice() { - long[] longBuffer = new long[1]; -// ByteBuffer bb = ByteBuffer.allocate(8); -// bb.order(ByteOrder.nativeOrder()); + CLDevice[] clDevices = getCLDevices(); + CLDevice maxFLOPSDevice = null; - int ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, longBuffer, 0); - if(CL.CL_SUCCESS != ret) - throw new CLException(ret, "can not receive context info"); + int maxflops = -1; - System.out.println("#devices: "+longBuffer[0]); + for (int i = 0; i < clDevices.length; i++) { - long[] deviceIDs = new long[(int)longBuffer[0]]; - ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, deviceIDs, 0); + CLDevice device = clDevices[i]; + int maxComputeUnits = device.getMaxComputeUnits(); + int maxClockFrequency = device.getMaxClockFrequency(); + int flops = maxComputeUnits*maxClockFrequency; - if(CL.CL_SUCCESS != ret) - throw new CLException(ret, "can not receive context info"); - - for (int i = 0; i < deviceIDs.length; i++) { - long l = deviceIDs[i]; - System.out.println("device id"+l); + if(flops > maxflops) { + maxflops = flops; + maxFLOPSDevice = device; + } } - // get the list of GPU devices associated with context -// ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes); -// cl_device_id *cdDevices = (cl_device_id *)malloc(dataBytes); -// ciErrNum |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes, cdDevices, NULL); -// shrCheckError(ciErrNum, CL_SUCCESS); - - return null; + return maxFLOPSDevice; } -*/ /** * Returns all devices associated with this CLContext. @@ -214,7 +217,7 @@ public final class CLContext { if(devices == null) { - int sizeofDeviceID = 8; // TODO doublechek deviceID size on 32 bit systems + int sizeofDeviceID = 8; // TODO doublecheck deviceID size on 32 bit systems long[] longBuffer = new long[1]; @@ -229,7 +232,7 @@ public final class CLContext { devices = new CLDevice[deviceIDs.capacity()/sizeofDeviceID]; for (int i = 0; i < devices.length; i++) - devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublechek deviceID size on 32 bit systems + devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublecheck deviceID size on 32 bit systems } diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java index 9f184ce4..dcf00c9a 100644 --- a/src/com/mbien/opencl/CLKernel.java +++ b/src/com/mbien/opencl/CLKernel.java @@ -1,6 +1,7 @@ package com.mbien.opencl; import com.sun.gluegen.runtime.BufferFactory; +import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.ByteOrder; import static com.mbien.opencl.CLException.*; @@ -36,25 +37,47 @@ public class CLKernel { } - public CLKernel setArg(int argumentIndex, int argumentSize, CLBuffer value) { - int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value.ID)); + public CLKernel setArg(int argumentIndex, CLBuffer value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value.ID)); checkForError(ret, "error on clSetKernelArg"); return this; } - public CLKernel setArg(int argumentIndex, int argumentSize, long value) { - int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value)); + public CLKernel setArg(int argumentIndex, int value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value)); checkForError(ret, "error on clSetKernelArg"); return this; } - private final ByteBuffer wrapLong(long value) { - return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); + public CLKernel setArg(int argumentIndex, long value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + public CLKernel setArg(int argumentIndex, float value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + public CLKernel setArg(int argumentIndex, double value) { + int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value)); + checkForError(ret, "error on clSetKernelArg"); + return this; + } + + private final Buffer wrap(double value) { + return BufferFactory.newDirectByteBuffer(8).putDouble(value).rewind(); + } + + private final Buffer wrap(long value) { + return BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); } public void release() { int ret = cl.clReleaseKernel(ID); - program.kernelReleased(this); + program.onKernelReleased(this); checkForError(ret, "can not release kernel"); } diff --git a/src/com/mbien/opencl/CLProgram.java b/src/com/mbien/opencl/CLProgram.java index 020de17c..566ae6b2 100644 --- a/src/com/mbien/opencl/CLProgram.java +++ b/src/com/mbien/opencl/CLProgram.java @@ -123,7 +123,7 @@ public class CLProgram { return Collections.unmodifiableMap(kernels); } - void kernelReleased(CLKernel kernel) { + void onKernelReleased(CLKernel kernel) { this.kernels.remove(kernel.name); } @@ -140,7 +140,7 @@ public class CLProgram { } int ret = cl.clReleaseProgram(ID); - context.programReleased(this); + context.onProgramReleased(this); checkForError(ret, "can not release program"); } @@ -184,7 +184,35 @@ public class CLProgram { return getProgramInfoString(CL.CL_PROGRAM_SOURCE); } - // TODO binaries, serialization, program build options + public Map getBinaries() { + + CLDevice[] devices = getCLDevices(); + + ByteBuffer sizes = ByteBuffer.allocate(8*devices.length).order(ByteOrder.nativeOrder()); + int ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARY_SIZES, sizes.capacity(), sizes, null, 0); + checkForError(ret, "on clGetProgramInfo"); + + int binarySize = 0; + while(sizes.remaining() != 0) + binarySize += (int)sizes.getLong(); + + ByteBuffer binaries = ByteBuffer.allocate(binarySize).order(ByteOrder.nativeOrder()); + ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARIES, binaries.capacity(), binaries, null, 0); // crash, driver bug? + checkForError(ret, "on clGetProgramInfo"); + + Map map = new HashMap(); + + for (int i = 0; i < devices.length; i++) { + byte[] bytes = new byte[(int)sizes.getLong()]; + binaries.get(bytes); + map.put(devices[i], bytes); + } + + return map; + } + + + // TODO serialization, program build options private final String getBuildInfoString(long device, int flag) { -- cgit v1.2.3