api cleanup and refactoring.

author: Michael Bien <[email protected]> 2009-10-22 01:26:53 +0200
committer: Michael Bien <[email protected]> 2009-10-22 01:26:53 +0200
commit: b3881a0924ecbe17cf27cededeae8df40b2d6933 (patch)
tree: a2ae7930eca5d771ab89fddd1a9e62802b6ffac9
parent: fe1e2739bf7596bc488de977166603edd18c41fb (diff)
6 files changed, 242 insertions, 63 deletions
diff --git a/src/com/mbien/opencl/CLBuffer.java b/src/com/mbien/opencl/CLBuffer.java
index f9f3a239..7dcd2928 100644
--- a/src/com/mbien/opencl/CLBuffer.java
+++ b/src/com/mbien/opencl/CLBuffer.java
@@ -9,13 +9,93 @@ import static com.mbien.opencl.CLException.*;
  */
 public class CLBuffer {
 
+    public enum MEM {
+
+        /**
+         * This flag specifies that the memory object will be read and
+         * written by a kernel.
+         */
+        READ_WRITE(CL.CL_MEM_READ_WRITE),
+
+        /**
+         * This flags specifies that the memory object will be written
+         * but not read by a kernel.
+         * Reading from a buffer or image object created with WRITE_ONLY
+         * inside a kernel is undefined.
+         */
+        WRITE_ONLY(CL.CL_MEM_WRITE_ONLY),
+
+        /**
+         * This flag specifies that the memory object is a read-only memory
+         * object when used inside a kernel. Writing to a buffer or image object
+         * created withREAD_ONLY inside a kernel is undefined.
+         */
+        READ_ONLY(CL.CL_MEM_READ_ONLY);
+
+        /**
+         * If specified, it indicates that the application wants the OpenCL
+         * implementation to use memory referenced by host_ptr as the storage
+         * bits for the memory object. OpenCL implementations are allowed
+         * to cache the buffer contents pointed to by host_ptr in device memory.
+         * This cached copy can be used when kernels are executed on a device.
+         */
+//        USE_HOST_PTR(CL.CL_MEM_USE_HOST_PTR),
+
+//        ALLOC_HOST_PTR(CL.CL_MEM_ALLOC_HOST_PTR), // this is the default in java world anyway
+
+        /**
+         * If CL_MEM_COPY_HOST_PTR specified, it indicates that the application
+         * wants the OpenCL implementation to allocate memory for the memory object
+         * and copy the data from memory referenced by host_ptr.<br/>
+         * COPY_HOST_PTR and USE_HOST_PTR are mutually exclusive.
+         */
+//        COPY_HOST_PTR(CL.CL_MEM_COPY_HOST_PTR);
+
+        /**
+         * Value of wrapped OpenCL flag.
+         */
+        public final int CL_FLAG;
+
+        private MEM(int CL_TYPE) {
+            this.CL_FLAG = CL_TYPE;
+        }
+
+        public static MEM valueOf(int bufferFlag) {
+            switch(bufferFlag) {
+                case(CL.CL_MEM_READ_WRITE):
+                    return READ_WRITE;
+                case(CL.CL_MEM_READ_ONLY):
+                    return READ_ONLY;
+//                case(CL.CL_MEM_USE_HOST_PTR):
+//                    return USE_HOST_PTR;
+//                case(CL.CL_MEM_ALLOC_HOST_PTR):
+//                    return ALLOC_HOST_PTR;
+//                case(CL.CL_MEM_COPY_HOST_PTR):
+//                    return COPY_HOST_PTR;
+            }
+            return null;
+        }
+
+        static int flagsToInt(MEM[] flags) {
+            int clFlags = CL.CL_MEM_READ_WRITE;
+            if(flags != null) {
+                for (int i = 0; i < flags.length; i++) {
+                    clFlags |= flags[i].CL_FLAG;
+                }
+            }
+            return clFlags;
+        }
+
+    }
+
+
     public final ByteBuffer buffer;
     public final long ID;
     
     private final CLContext context;
     private final CL cl;
 
-    CLBuffer(CLContext context, int flags, ByteBuffer directBuffer) {
+    CLBuffer(CLContext context, ByteBuffer directBuffer, int flags) {
         
         if(!directBuffer.isDirect())
             throw new IllegalArgumentException("buffer is not a direct buffer");
@@ -34,7 +114,7 @@ public class CLBuffer {
 
     public void release() {
         int ret = cl.clReleaseMemObject(ID);
-        context.bufferReleased(this);
+        context.onBufferReleased(this);
         checkForError(ret, "can not release mem object");
     }
 
diff --git a/src/com/mbien/opencl/CLCommandQueue.java b/src/com/mbien/opencl/CLCommandQueue.java
index 7d1d09f9..5abc5cd5 100644
--- a/src/com/mbien/opencl/CLCommandQueue.java
+++ b/src/com/mbien/opencl/CLCommandQueue.java
@@ -123,10 +123,13 @@ public class CLCommandQueue {
     }
 */
     
-//    public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
-//        return this.putNDRangeKernel(kernel, workDimension,
-//                new long[] {globalWorkOffset}, new long[] {globalWorkSize}, new long[] {localWorkSize});
-//    }
+    public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long globalWorkOffset, long globalWorkSize, long localWorkSize) {
+        return this.putNDRangeKernel(
+                kernel, workDimension,
+                globalWorkOffset==0 ? null : new long[] {globalWorkOffset},
+                globalWorkSize  ==0 ? null : new long[] {globalWorkSize  },
+                localWorkSize   ==0 ? null : new long[] {localWorkSize   }  );
+    }
 
     public CLCommandQueue putNDRangeKernel(CLKernel kernel, int workDimension, long[] globalWorkOffset, long[] globalWorkSize, long[] localWorkSize) {
 
@@ -145,11 +148,47 @@ public class CLCommandQueue {
         return this;
     }
 
+    public CLCommandQueue finish() {
+        int ret = cl.clFinish(ID);
+        checkForError(ret, "can not finish command queue");
+        return this;
+    }
+
     public void release() {
         int ret = cl.clReleaseCommandQueue(ID);
-        context.commandQueueReleased(device, this);
+        context.onCommandQueueReleased(device, this);
         checkForError(ret, "can not release command queue");
     }
 
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        final CLCommandQueue other = (CLCommandQueue) obj;
+        if (this.ID != other.ID) {
+            return false;
+        }
+        if (this.context != other.context && (this.context == null || !this.context.equals(other.context))) {
+            return false;
+        }
+        if (this.device != other.device && (this.device == null || !this.device.equals(other.device))) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int hash = 3;
+        hash = 89 * hash + (int) (this.ID ^ (this.ID >>> 32));
+        hash = 89 * hash + (this.context != null ? this.context.hashCode() : 0);
+        hash = 89 * hash + (this.device != null ? this.device.hashCode() : 0);
+        return hash;
+    }
+
 
 }
diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java
index 76fbc2ee..7eaada8c 100644
--- a/src/com/mbien/opencl/CLContext.java
+++ b/src/com/mbien/opencl/CLContext.java
@@ -1,5 +1,6 @@
 package com.mbien.opencl;
 
+import com.mbien.opencl.CLBuffer.MEM;
 import com.sun.gluegen.runtime.BufferFactory;
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -99,14 +100,27 @@ public final class CLContext {
         return createProgram(sb.toString());
     }
 
-    public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) {
-        CLBuffer buffer = new CLBuffer(this, flags, directBuffer);
-        buffers.add(buffer);
-        return buffer;
+    /**
+     * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer.
+     */
+    public CLBuffer createBuffer(ByteBuffer directBuffer, MEM... flags) {
+        return createBuffer(directBuffer, MEM.flagsToInt(flags));
+    }
+    /**
+     * Creates a CLBuffer with the specified flags. No flags creates a MEM.READ_WRITE buffer.
+     */
+    public CLBuffer createBuffer(int size, MEM... flags) {
+        return createBuffer(size, MEM.flagsToInt(flags));
     }
 
-    public CLBuffer createBuffer(int flags, int size) {
-        return createBuffer(flags, BufferFactory.newDirectByteBuffer(size));
+    public CLBuffer createBuffer(int size, int flags) {
+        return createBuffer(BufferFactory.newDirectByteBuffer(size), flags);
+    }
+
+    public CLBuffer createBuffer(ByteBuffer directBuffer, int flags) {
+        CLBuffer buffer = new CLBuffer(this, directBuffer, flags);
+        buffers.add(buffer);
+        return buffer;
     }
 
     CLCommandQueue createCommandQueue(CLDevice device, long properties) {
@@ -123,15 +137,15 @@ public final class CLContext {
         return queue;
     }
 
-    void programReleased(CLProgram program) {
+    void onProgramReleased(CLProgram program) {
         programs.remove(program);
     }
 
-    void bufferReleased(CLBuffer buffer) {
+    void onBufferReleased(CLBuffer buffer) {
         buffers.remove(buffer);
     }
 
-    void commandQueueReleased(CLDevice device, CLCommandQueue queue) {
+    void onCommandQueueReleased(CLDevice device, CLCommandQueue queue) {
         List<CLCommandQueue> list = queuesMap.get(device);
         list.remove(queue);
         // remove empty lists from map
@@ -173,39 +187,28 @@ public final class CLContext {
     /**
      * Gets the device with maximal FLOPS from this context.
      */
-    /*
     public CLDevice getMaxFlopsDevice() {
 
-        long[] longBuffer = new long[1];
-//        ByteBuffer bb = ByteBuffer.allocate(8);
-//        bb.order(ByteOrder.nativeOrder());
+        CLDevice[] clDevices = getCLDevices();
+        CLDevice maxFLOPSDevice = null;
 
-        int ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, longBuffer, 0);
-        if(CL.CL_SUCCESS != ret)
-            throw new CLException(ret, "can not receive context info");
+        int maxflops = -1;
 
-        System.out.println("#devices: "+longBuffer[0]);
+        for (int i = 0; i < clDevices.length; i++) {
 
-        long[] deviceIDs = new long[(int)longBuffer[0]];
-        ret = cl.clGetContextInfo(contextID, CL.CL_CONTEXT_DEVICES, 0, null, deviceIDs, 0);
+            CLDevice device = clDevices[i];
+            int maxComputeUnits     = device.getMaxComputeUnits();
+            int maxClockFrequency   = device.getMaxClockFrequency();
+            int flops = maxComputeUnits*maxClockFrequency;
 
-        if(CL.CL_SUCCESS != ret)
-            throw new CLException(ret, "can not receive context info");
-
-        for (int i = 0; i < deviceIDs.length; i++) {
-            long l = deviceIDs[i];
-            System.out.println("device id"+l);
+            if(flops > maxflops) {
+                maxflops = flops;
+                maxFLOPSDevice = device;
+            }
         }
 
-            // get the list of GPU devices associated with context
-//        ciErrNum = clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes);
-//        cl_device_id *cdDevices = (cl_device_id *)malloc(dataBytes);
-//        ciErrNum |= clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, dataBytes, cdDevices, NULL);
-//        shrCheckError(ciErrNum, CL_SUCCESS);
-
-        return null;
+        return maxFLOPSDevice;
     }
-*/
 
     /**
      * Returns all devices associated with this CLContext.
@@ -214,7 +217,7 @@ public final class CLContext {
 
         if(devices == null) {
 
-            int sizeofDeviceID = 8; // TODO doublechek deviceID size on 32 bit systems
+            int sizeofDeviceID = 8; // TODO doublecheck deviceID size on 32 bit systems
 
             long[] longBuffer = new long[1];
 
@@ -229,7 +232,7 @@ public final class CLContext {
 
             devices = new CLDevice[deviceIDs.capacity()/sizeofDeviceID];
             for (int i = 0; i < devices.length; i++)
-                devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublechek deviceID size on 32 bit systems
+                devices[i] = new CLDevice(this, deviceIDs.getLong()); // TODO doublecheck deviceID size on 32 bit systems
 
         }
 
diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java
index 9f184ce4..dcf00c9a 100644
--- a/src/com/mbien/opencl/CLKernel.java
+++ b/src/com/mbien/opencl/CLKernel.java
@@ -1,6 +1,7 @@
 package com.mbien.opencl;
 
 import com.sun.gluegen.runtime.BufferFactory;
+import java.nio.Buffer;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import static com.mbien.opencl.CLException.*;
@@ -36,25 +37,47 @@ public class CLKernel {
 
     }
 
-    public CLKernel setArg(int argumentIndex, int argumentSize, CLBuffer value) {
-        int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value.ID));
+    public CLKernel setArg(int argumentIndex, CLBuffer value) {
+        int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value.ID));
         checkForError(ret, "error on clSetKernelArg");
         return this;
     }
 
-    public CLKernel setArg(int argumentIndex, int argumentSize, long value) {
-        int ret = cl.clSetKernelArg(ID, argumentIndex, argumentSize, wrapLong(value));
+    public CLKernel setArg(int argumentIndex, int value) {
+        int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value));
         checkForError(ret, "error on clSetKernelArg");
         return this;
     }
 
-    private final ByteBuffer wrapLong(long value) {
-        return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind();
+    public CLKernel setArg(int argumentIndex, long value) {
+        int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value));
+        checkForError(ret, "error on clSetKernelArg");
+        return this;
+    }
+
+    public CLKernel setArg(int argumentIndex, float value) {
+        int ret = cl.clSetKernelArg(ID, argumentIndex, 4, wrap(value));
+        checkForError(ret, "error on clSetKernelArg");
+        return this;
+    }
+
+    public CLKernel setArg(int argumentIndex, double value) {
+        int ret = cl.clSetKernelArg(ID, argumentIndex, 8, wrap(value));
+        checkForError(ret, "error on clSetKernelArg");
+        return this;
+    }
+
+    private final Buffer wrap(double value) {
+        return BufferFactory.newDirectByteBuffer(8).putDouble(value).rewind();
+    }
+
+    private final Buffer wrap(long value) {
+        return BufferFactory.newDirectByteBuffer(8).putLong(value).rewind();
     }
 
     public void release() {
         int ret = cl.clReleaseKernel(ID);
-        program.kernelReleased(this);
+        program.onKernelReleased(this);
         checkForError(ret, "can not release kernel");
     }
 
diff --git a/src/com/mbien/opencl/CLProgram.java b/src/com/mbien/opencl/CLProgram.java
index 020de17c..566ae6b2 100644
--- a/src/com/mbien/opencl/CLProgram.java
+++ b/src/com/mbien/opencl/CLProgram.java
@@ -123,7 +123,7 @@ public class CLProgram {
         return Collections.unmodifiableMap(kernels);
     }
 
-    void kernelReleased(CLKernel kernel) {
+    void onKernelReleased(CLKernel kernel) {
         this.kernels.remove(kernel.name);
     }
 
@@ -140,7 +140,7 @@ public class CLProgram {
         }
 
         int ret = cl.clReleaseProgram(ID);
-        context.programReleased(this);
+        context.onProgramReleased(this);
         checkForError(ret, "can not release program");
         
     }
@@ -184,7 +184,35 @@ public class CLProgram {
         return getProgramInfoString(CL.CL_PROGRAM_SOURCE);
     }
 
-    // TODO binaries, serialization, program build options
+    public Map<CLDevice, byte[]> getBinaries() {
+
+        CLDevice[] devices = getCLDevices();
+
+        ByteBuffer sizes = ByteBuffer.allocate(8*devices.length).order(ByteOrder.nativeOrder());
+        int ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARY_SIZES, sizes.capacity(), sizes, null, 0);
+        checkForError(ret, "on clGetProgramInfo");
+
+        int binarySize = 0;
+        while(sizes.remaining() != 0)
+            binarySize += (int)sizes.getLong();
+
+        ByteBuffer binaries = ByteBuffer.allocate(binarySize).order(ByteOrder.nativeOrder());
+        ret = cl.clGetProgramInfo(ID, CL.CL_PROGRAM_BINARIES, binaries.capacity(), binaries, null, 0); // crash, driver bug?
+        checkForError(ret, "on clGetProgramInfo");
+
+        Map<CLDevice, byte[]> map = new HashMap<CLDevice, byte[]>();
+
+        for (int i = 0; i < devices.length; i++) {
+            byte[] bytes = new byte[(int)sizes.getLong()];
+            binaries.get(bytes);
+            map.put(devices[i], bytes);
+        }
+
+        return map;
+    }
+
+
+    // TODO serialization, program build options
 
     private final String getBuildInfoString(long device, int flag) {
 
diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java
index b9643e9b..8ae61b2a 100644
--- a/test/com/mbien/opencl/HighLevelBindingTest.java
+++ b/test/com/mbien/opencl/HighLevelBindingTest.java
@@ -1,5 +1,6 @@
 package com.mbien.opencl;
 
+import com.mbien.opencl.CLBuffer.MEM;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Map;
@@ -87,6 +88,9 @@ public class HighLevelBindingTest {
         assertFalse(source.trim().isEmpty());
 //        out.println("source:\n"+source);
 
+//        Map<CLDevice, byte[]> binaries = program.getBinaries();
+//        assertFalse(binaries.isEmpty());
+
         int elementCount = 11444777;	// Length of float arrays to process (odd # for illustration)
         int localWorkSize = 256;      // set and log Global and Local work size dimensions
         int globalWorkSize = roundUp(localWorkSize, elementCount);  // rounded up to the nearest multiple of the LocalWorkSize
@@ -100,9 +104,9 @@ public class HighLevelBindingTest {
         fillBuffer(srcA, 23456);
         fillBuffer(srcB, 46987);
 
-        CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA);
-        CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB);
-        CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest);
+        CLBuffer clBufferA = context.createBuffer(srcA, MEM.READ_ONLY);
+        CLBuffer clBufferB = context.createBuffer(srcB, MEM.READ_ONLY);
+        CLBuffer clBufferC = context.createBuffer(dest, MEM.WRITE_ONLY);
 
         Map<String, CLKernel> kernels = program.getCLKernels();
         for (CLKernel kernel : kernels.values()) {
@@ -114,10 +118,10 @@ public class HighLevelBindingTest {
 
         CLKernel vectorAddKernel = kernels.get("VectorAddGM");
 
-        vectorAddKernel.setArg(0, SIZEOF_LONG, clBufferA)
-                       .setArg(1, SIZEOF_LONG, clBufferB)
-                       .setArg(2, SIZEOF_LONG, clBufferC)
-                       .setArg(3, SIZEOF_INT, elementCount);
+        vectorAddKernel.setArg(0, clBufferA)
+                       .setArg(1, clBufferB)
+                       .setArg(2, clBufferC)
+                       .setArg(3, elementCount);
 
         CLCommandQueue queue = programDevices[0].createCommandQueue();
 
@@ -125,7 +129,8 @@ public class HighLevelBindingTest {
         queue.putWriteBuffer(clBufferA, false)
              .putWriteBuffer(clBufferB, false)
              .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
-             .putReadBuffer(clBufferC, true).release();
+             .putReadBuffer(clBufferC, true)
+             .finish().release();
 
         out.println("a+b=c result snapshot: ");
         for(int i = 0; i < 10; i++)
@@ -164,8 +169,8 @@ public class HighLevelBindingTest {
         CLContext context = CLContext.create();
 
          // the CL.MEM_* flag is probably completly irrelevant in our case since we do not use a kernel in this test
-        CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY,  elements*SIZEOF_INT);
-        CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY,  elements*SIZEOF_INT);
+        CLBuffer clBufferA = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY);
+        CLBuffer clBufferB = context.createBuffer(elements*SIZEOF_INT, MEM.READ_ONLY);
 
         // fill only first read buffer -> we will copy the payload to the second later.
         fillBuffer(clBufferA.buffer, 12345);
@@ -175,7 +180,8 @@ public class HighLevelBindingTest {
         // asynchronous write of data to GPU device, blocking read later to get the computed results back.
         queue.putWriteBuffer(clBufferA, false)                                 // write A
              .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B
-             .putReadBuffer(clBufferB, true);                                  // read B
+             .putReadBuffer(clBufferB, true)                                   // read B
+             .finish();
 
         context.release();
author	Michael Bien <[email protected]>	2009-10-22 01:26:53 +0200
committer	Michael Bien <[email protected]>	2009-10-22 01:26:53 +0200
commit	b3881a0924ecbe17cf27cededeae8df40b2d6933 (patch)
tree	a2ae7930eca5d771ab89fddd1a9e62802b6ffac9
parent	fe1e2739bf7596bc488de977166603edd18c41fb (diff)