diff options
author | Michael Bien <[email protected]> | 2009-10-22 01:22:34 +0200 |
---|---|---|
committer | Michael Bien <[email protected]> | 2009-10-22 01:22:34 +0200 |
commit | 3b01a2b95fb27293e8959f2a85870204be02fdad (patch) | |
tree | b2994587692f510f2efd2dc59233824dc193e151 /src/com/mbien/opencl | |
parent | 84cc0fddb4da1add636978afecf58df01fb3c35b (diff) |
cleanup
Diffstat (limited to 'src/com/mbien/opencl')
-rw-r--r-- | src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java | 40 | ||||
-rw-r--r-- | src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl | 7 |
2 files changed, 24 insertions, 23 deletions
diff --git a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java index 31cbdef..b9b07c2 100644 --- a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java +++ b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java @@ -1,7 +1,7 @@ package com.mbien.opencl.demos.hellojocl; -import com.mbien.opencl.CL; import com.mbien.opencl.CLBuffer; +import com.mbien.opencl.CLBuffer.MEM; import com.mbien.opencl.CLCommandQueue; import com.mbien.opencl.CLContext; import com.mbien.opencl.CLKernel; @@ -24,8 +24,8 @@ public class HelloJOCL { public static void main(String[] args) throws IOException { - int elementCount = 11444777; // Length of float arrays to process - int localWorkSize = 256; // set and log Global and Local work size dimensions + int elementCount = 11444777; // Length of arrays to process + int localWorkSize = 256; // Local work size dimensions int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize // set up @@ -33,23 +33,23 @@ public class HelloJOCL { CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build(); - CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, globalWorkSize*SIZEOF_INT); - CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, globalWorkSize*SIZEOF_INT); - CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, globalWorkSize*SIZEOF_INT); + CLBuffer clBufferA = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY); + CLBuffer clBufferB = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY); + CLBuffer clBufferC = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.WRITE_ONLY); out.println("used device memory: " + (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()+clBufferC.buffer.capacity())/1000000 +"MB"); - // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between). + // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between runs). fillBuffer(clBufferA.buffer, 12345); fillBuffer(clBufferB.buffer, 67890); // get a reference to the kernel functon with the name 'VectorAdd' and map the buffers to its input parameters. CLKernel kernel = program.getCLKernels().get("VectorAdd"); - kernel.setArg(0, SIZEOF_LONG, clBufferA) - .setArg(1, SIZEOF_LONG, clBufferB) - .setArg(2, SIZEOF_LONG, clBufferC) - .setArg(3, SIZEOF_INT, elementCount); + kernel.setArg(0, clBufferA) + .setArg(1, clBufferB) + .setArg(2, clBufferC) + .setArg(3, elementCount); // create command queue on first device. CLCommandQueue queue = context.getCLDevices()[0].createCommandQueue(); @@ -58,8 +58,9 @@ public class HelloJOCL { long time = nanoTime(); queue.putWriteBuffer(clBufferA, false) .putWriteBuffer(clBufferB, false) - .putNDRangeKernel(kernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) - .putReadBuffer(clBufferC, true); + .putNDRangeKernel(kernel, 1, 0, globalWorkSize, localWorkSize) + .putReadBuffer(clBufferC, true) + .finish(); time = nanoTime() - time; // cleanup all resources associated with this context. @@ -68,24 +69,21 @@ public class HelloJOCL { // print first few elements of the resulting buffer to the console. out.println("a+b=c results snapshot: "); for(int i = 0; i < 10; i++) - out.print(clBufferC.buffer.getInt() + ", "); - out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_INT + " more"); + out.print(clBufferC.buffer.getFloat() + ", "); + out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_FLOAT + " more"); System.out.println("computation took: "+(time/1000000)+"ms"); } - public static final void fillBuffer(ByteBuffer buffer, int seed) { - + private static final void fillBuffer(ByteBuffer buffer, int seed) { Random rnd = new Random(seed); - while(buffer.remaining() != 0) - buffer.putInt(rnd.nextInt()); - + buffer.putFloat(rnd.nextFloat()*100); buffer.rewind(); } - public static final int roundUp(int groupSize, int globalSize) { + private static final int roundUp(int groupSize, int globalSize) { int r = globalSize % groupSize; if (r == 0) { return globalSize; diff --git a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl index b53fc41..f9b4f32 100644 --- a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl +++ b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl @@ -1,12 +1,15 @@ // OpenCL Kernel Function for element by element vector addition - __kernel void VectorAdd(__global const int* a, __global const int* b, __global int* c, int iNumElements) { + __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) { + // get index into global data array int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code - if (iGID >= iNumElements) { + if (iGID >= numElements) { return; } + // add the vector elements c[iGID] = a[iGID] + b[iGID]; } |