2 files changed, 24 insertions, 23 deletions
diff --git a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
index 31cbdef..b9b07c2 100644
--- a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
+++ b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
@@ -1,7 +1,7 @@
 package com.mbien.opencl.demos.hellojocl;
 
-import com.mbien.opencl.CL;
 import com.mbien.opencl.CLBuffer;
+import com.mbien.opencl.CLBuffer.MEM;
 import com.mbien.opencl.CLCommandQueue;
 import com.mbien.opencl.CLContext;
 import com.mbien.opencl.CLKernel;
@@ -24,8 +24,8 @@ public class HelloJOCL {
 
     public static void main(String[] args) throws IOException {
         
-        int elementCount = 11444777;                                // Length of float arrays to process
-        int localWorkSize = 256;                                    // set and log Global and Local work size dimensions
+        int elementCount = 11444777;                                // Length of arrays to process
+        int localWorkSize = 256;                                    // Local work size dimensions
         int globalWorkSize = roundUp(localWorkSize, elementCount);  // rounded up to the nearest multiple of the localWorkSize
 
         // set up
@@ -33,23 +33,23 @@ public class HelloJOCL {
 
         CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();
 
-        CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY,  globalWorkSize*SIZEOF_INT);
-        CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY,  globalWorkSize*SIZEOF_INT);
-        CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, globalWorkSize*SIZEOF_INT);
+        CLBuffer clBufferA = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY);
+        CLBuffer clBufferB = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY);
+        CLBuffer clBufferC = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.WRITE_ONLY);
 
         out.println("used device memory: "
             + (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()+clBufferC.buffer.capacity())/1000000 +"MB");
 
-        // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between).
+        // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between runs).
         fillBuffer(clBufferA.buffer, 12345);
         fillBuffer(clBufferB.buffer, 67890);
 
         // get a reference to the kernel functon with the name 'VectorAdd' and map the buffers to its input parameters.
         CLKernel kernel = program.getCLKernels().get("VectorAdd");
-        kernel.setArg(0, SIZEOF_LONG, clBufferA)
-              .setArg(1, SIZEOF_LONG, clBufferB)
-              .setArg(2, SIZEOF_LONG, clBufferC)
-              .setArg(3, SIZEOF_INT, elementCount);
+        kernel.setArg(0, clBufferA)
+              .setArg(1, clBufferB)
+              .setArg(2, clBufferC)
+              .setArg(3, elementCount);
 
         // create command queue on first device.
         CLCommandQueue queue = context.getCLDevices()[0].createCommandQueue();
@@ -58,8 +58,9 @@ public class HelloJOCL {
         long time = nanoTime();
         queue.putWriteBuffer(clBufferA, false)
              .putWriteBuffer(clBufferB, false)
-             .putNDRangeKernel(kernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
-             .putReadBuffer(clBufferC, true);
+             .putNDRangeKernel(kernel, 1, 0, globalWorkSize, localWorkSize)
+             .putReadBuffer(clBufferC, true)
+             .finish();
         time = nanoTime() - time;
 
         // cleanup all resources associated with this context.
@@ -68,24 +69,21 @@ public class HelloJOCL {
         // print first few elements of the resulting buffer to the console.
         out.println("a+b=c results snapshot: ");
         for(int i = 0; i < 10; i++)
-            out.print(clBufferC.buffer.getInt() + ", ");
-        out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_INT + " more");
+            out.print(clBufferC.buffer.getFloat() + ", ");
+        out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_FLOAT + " more");
         
         System.out.println("computation took: "+(time/1000000)+"ms");
 
     }
 
-    public static final void fillBuffer(ByteBuffer buffer, int seed) {
-
+    private static final void fillBuffer(ByteBuffer buffer, int seed) {
         Random rnd = new Random(seed);
-
         while(buffer.remaining() != 0)
-            buffer.putInt(rnd.nextInt());
-
+            buffer.putFloat(rnd.nextFloat()*100);
         buffer.rewind();
     }
 
-    public static final int roundUp(int groupSize, int globalSize) {
+    private static final int roundUp(int groupSize, int globalSize) {
         int r = globalSize % groupSize;
         if (r == 0) {
             return globalSize;
diff --git a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
index b53fc41..f9b4f32 100644
--- a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
+++ b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
@@ -1,12 +1,15 @@
 
     // OpenCL Kernel Function for element by element vector addition
-    __kernel void VectorAdd(__global const int* a, __global const int* b, __global int* c, int iNumElements) {
+    __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) {
+
         // get index into global data array
         int iGID = get_global_id(0);
+
         // bound check (equivalent to the limit on a 'for' loop for standard/serial C code
-        if (iGID >= iNumElements)  {
+        if (iGID >= numElements)  {
             return;
         }
+
         // add the vector elements
         c[iGID] = a[iGID] + b[iGID];
     }