diff options
-rw-r--r-- | resources/clImplCustomCode.c | 8 | ||||
-rw-r--r-- | resources/clImplCustomCode.java | 21 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLException.java | 2 | ||||
-rw-r--r-- | test/com/mbien/opencl/JOCLTest.java | 110 |
4 files changed, 121 insertions, 20 deletions
diff --git a/resources/clImplCustomCode.c b/resources/clImplCustomCode.c index 9c87c24a..41e9f159 100644 --- a/resources/clImplCustomCode.c +++ b/resources/clImplCustomCode.c @@ -110,6 +110,14 @@ Java_com_mbien_opencl_impl_CLImpl_clBuildProgram0(JNIEnv *env, jobject _unused, if (deviceList != NULL) { _deviceListPtr = (void *) (((char*) (*env)->GetPrimitiveArrayCritical(env, deviceList, NULL)) + offset); } + +/* + printf("---------------------------------------------------------------------------\n"); + printf("deviceList: %d\n", _deviceListPtr); + printf("_strchars_options: %d\n", _strchars_options); + printf("deviceCount: %d\n", deviceCount); + printf("---------------------------------------------------------------------------\n"); +*/ // TODO payload, callback... _res = clBuildProgram((cl_program)program, (cl_uint)deviceCount, _deviceListPtr, _strchars_options, NULL, NULL); diff --git a/resources/clImplCustomCode.java b/resources/clImplCustomCode.java index 618b03c2..3c5e1f07 100644 --- a/resources/clImplCustomCode.java +++ b/resources/clImplCustomCode.java @@ -1,5 +1,6 @@ public long clCreateContext(IntBuffer properties, int offset1, long[] devices, CreateContextCallback cb, Object userData, IntBuffer errcode_ret, int offset2) { + throw new RuntimeException("not yet implemented, use clCreateContextFromType instead"); // return this.clCreateContext0(properties, offset1, devices, cb, null, errcode_ret, offset2); } @@ -7,10 +8,13 @@ public long clCreateContextFromType(IntBuffer properties, int offset1, long device_type, CreateContextCallback pfn_notify, Object userData, IntBuffer errcode_ret, int offset2) { + if(pfn_notify != null) throw new RuntimeException("asynchronous execution with callback is not yet implemented, pass null through this method to block until complete."); + if(userData != null) System.out.println("WARNING: userData not yet implemented... ignoring"); + return this.clCreateContextFromType0(properties, offset1, device_type, pfn_notify, null, errcode_ret, offset2); } private native long clCreateContextFromType0(IntBuffer properties, int size, long device_type, CreateContextCallback pfn_notify, Object userData, IntBuffer errcode_ret, int size2); @@ -18,11 +22,18 @@ /** Interface to C language function: <br> <code> int32_t clBuildProgram(cl_program, uint32_t, cl_device_id * , const char * , void * ); </code> */ public int clBuildProgram(long program, long[] deviceList, String options, BuildProgramCallback cb, Object userData) { - if(cb != null) - throw new RuntimeException("asynchronous execution with callback is not yet implemented, pass null through this method to block until complete."); - if(userData != null) - System.out.println("WARNING: userData not yet implemented... ignoring"); - return clBuildProgram0(program, deviceList.length, deviceList, 0, options, cb, userData); + + if(cb != null) + throw new RuntimeException("asynchronous execution with callback is not yet implemented, pass null through this method to block until complete."); + + if(userData != null) + System.out.println("WARNING: userData not yet implemented... ignoring"); + + int listLength = 0; + if(deviceList != null) + listLength = deviceList.length; + + return clBuildProgram0(program, listLength, deviceList, 0, options, cb, userData); } /** Entry point to C language function: <code> int32_t clBuildProgram(cl_program, uint32_t, cl_device_id * , const char * , void * ); </code> */ private native int clBuildProgram0(long program, int devices, Object deviceList, int arg2_byte_offset, String options, BuildProgramCallback cb, Object userData); diff --git a/src/com/mbien/opencl/CLException.java b/src/com/mbien/opencl/CLException.java index 4539046d..1f8f16d5 100644 --- a/src/com/mbien/opencl/CLException.java +++ b/src/com/mbien/opencl/CLException.java @@ -125,7 +125,7 @@ public class CLException extends RuntimeException { return "CL_INVALID_MIP_LEVEL"; default: - return "unknown cause: error "+error; + return "unknown cause: error " + error; } } diff --git a/test/com/mbien/opencl/JOCLTest.java b/test/com/mbien/opencl/JOCLTest.java index 3ed51cbf..6da063ca 100644 --- a/test/com/mbien/opencl/JOCLTest.java +++ b/test/com/mbien/opencl/JOCLTest.java @@ -1,8 +1,10 @@ package com.mbien.opencl; +import com.sun.gluegen.runtime.BufferFactory; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; +import java.util.Random; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; @@ -107,8 +109,7 @@ public class JOCLTest { // }; long[] longBuffer = new long[1]; - ByteBuffer bb = ByteBuffer.allocate(1024); - bb.order(ByteOrder.nativeOrder()); + ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder()); CL cl = CLContext.getLowLevelBinding(); @@ -138,20 +139,20 @@ public class JOCLTest { long commandQueue = cl.clCreateCommandQueue(context, firstDeviceID, 0, intArray, 0); checkError("on clCreateCommandQueue", intArray[0]); - int iNumElements = 11444777; // Length of float arrays to process (odd # for illustration) - int szLocalWorkSize = 256; // set and log Global and Local work size dimensions - int szGlobalWorkSize = roundUp(szLocalWorkSize, iNumElements); // rounded up to the nearest multiple of the LocalWorkSize + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) + int localWorkSize = 256; // set and log Global and Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize int sizeofFloat = 4; // TODO sizeof float ... - + // Allocate the OpenCL buffer memory objects for source and result on the device GMEM - long cmDevSrcA = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, sizeofFloat * szGlobalWorkSize, null, intArray, 0); + long devSrcA = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, sizeofFloat * globalWorkSize, null, intArray, 0); checkError("on clCreateBuffer", intArray[0]); - long cmDevSrcB = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, sizeofFloat * szGlobalWorkSize, null, intArray, 0); + long devSrcB = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, sizeofFloat * globalWorkSize, null, intArray, 0); checkError("on clCreateBuffer", intArray[0]); - long cmDevDst = cl.clCreateBuffer(context, CL.CL_MEM_WRITE_ONLY, sizeofFloat * szGlobalWorkSize, null, intArray, 0); + long devDst = cl.clCreateBuffer(context, CL.CL_MEM_WRITE_ONLY, sizeofFloat * globalWorkSize, null, intArray, 0); checkError("on clCreateBuffer", intArray[0]); - - String src = + + String src = " // OpenCL Kernel Function for element by element vector addition \n" + "__kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int iNumElements) { \n" + " // get index into global data array \n" @@ -168,11 +169,27 @@ public class JOCLTest { // Create the program long program = cl.clCreateProgramWithSource(context, 1, new String[] {src}, new long[]{src.length()}, 0, intArray, 0); checkError("on clCreateProgramWithSource", intArray[0]); - + // Build the program - ret = cl.clBuildProgram(program, new long[] { firstDeviceID }, null, null, null); + ret = cl.clBuildProgram(program, null, null, null, null); checkError("on clBuildProgram", ret); + // Read program infos + bb.rewind(); + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_NUM_DEVICES, bb.capacity(), bb, null, 0); + checkError("on clGetProgramInfo1", ret); + out.println("program associated with "+bb.getInt(0)+" device(s)"); + + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, 0, bb, longBuffer, 0); + checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); + out.println("program source length (cl): "+longBuffer[0]); + out.println("program source length (java): "+src.length()); + + bb.rewind(); + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, bb.capacity(), bb, null, 0); + checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); + out.println("program source:\n"+new String(bb.array(), 0, (int)longBuffer[0])); + // Check program status Arrays.fill(longBuffer, 42); bb.rewind(); @@ -194,14 +211,79 @@ public class JOCLTest { out.println("log:\n" + new String(bb.array(), 0, (int)longBuffer[0])); // Create the kernel + Arrays.fill(intArray, 42); long kernel = cl.clCreateKernel(program, "VectorAdd", intArray, 0); checkError("on clCreateKernel", intArray[0]); - + + + ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_FLOAT); + ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_FLOAT); + ByteBuffer dst = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_FLOAT); + ByteBuffer elementCountBuffer = BufferFactory.newDirectByteBuffer(BufferFactory.SIZEOF_INT); + elementCountBuffer.putInt(elementCount); + + srcA.limit(elementCount*BufferFactory.SIZEOF_FLOAT); + srcB.limit(elementCount*BufferFactory.SIZEOF_FLOAT); + + fillBuffer(srcA, 23456); + fillBuffer(srcB, 46987); + + // Set the Argument values + ret = cl.clSetKernelArg(kernel, 0, BufferFactory.SIZEOF_LONG, wrap(devSrcA)); checkError("on clSetKernelArg0", ret); + ret = cl.clSetKernelArg(kernel, 1, BufferFactory.SIZEOF_LONG, wrap(devSrcB)); checkError("on clSetKernelArg1", ret); + ret = cl.clSetKernelArg(kernel, 2, BufferFactory.SIZEOF_LONG, wrap(devDst)); checkError("on clSetKernelArg2", ret); + ret = cl.clSetKernelArg(kernel, 3, BufferFactory.SIZEOF_INT, elementCountBuffer); checkError("on clSetKernelArg3", ret); + + // Asynchronous write of data to GPU device + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, 0, null, 0); + checkError("on clEnqueueWriteBuffer", ret); + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, 0, null, 0); + checkError("on clEnqueueWriteBuffer", ret); + + // Launch kernel + ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, 0, + new long[]{ globalWorkSize }, 0, + new long[]{ localWorkSize }, 0, 0, + null, 0, + null, 0); + checkError("on clEnqueueNDRangeKernel", ret); + + // Synchronous/blocking read of results + ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, BufferFactory.SIZEOF_FLOAT * globalWorkSize, dst, 0, null, 0, null, 0); + checkError("on clEnqueueReadBuffer", ret); + +// for(int i = 0; i < 50; i++) +// System.out.println(dst.getFloat()); + + // cleanup + ret = cl.clReleaseProgram(program); + checkError("on clReleaseProgram", ret); + + ret = cl.clReleaseKernel(kernel); + checkError("on clReleaseKernel", ret); + + ret = cl.clUnloadCompiler(); + checkError("on clUnloadCompiler", ret); + ret = cl.clReleaseContext(context); checkError("on clReleaseContext", ret); } + private void fillBuffer(ByteBuffer buffer, int seed) { + + Random rnd = new Random(seed); + + while(buffer.remaining() != 0) + buffer.putFloat(rnd.nextFloat()); + + buffer.rewind(); + } + + private ByteBuffer wrap(long value) { + return (ByteBuffer)ByteBuffer.allocateDirect(8).order(ByteOrder.nativeOrder()).putLong(value).rewind(); + } + private String getBuildStatus(int status) { switch(status) { case CL.CL_BUILD_SUCCESS: |