summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java40
-rw-r--r--src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl7
2 files changed, 24 insertions, 23 deletions
diff --git a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
index 31cbdef..b9b07c2 100644
--- a/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
+++ b/src/com/mbien/opencl/demos/hellojocl/HelloJOCL.java
@@ -1,7 +1,7 @@
package com.mbien.opencl.demos.hellojocl;
-import com.mbien.opencl.CL;
import com.mbien.opencl.CLBuffer;
+import com.mbien.opencl.CLBuffer.MEM;
import com.mbien.opencl.CLCommandQueue;
import com.mbien.opencl.CLContext;
import com.mbien.opencl.CLKernel;
@@ -24,8 +24,8 @@ public class HelloJOCL {
public static void main(String[] args) throws IOException {
- int elementCount = 11444777; // Length of float arrays to process
- int localWorkSize = 256; // set and log Global and Local work size dimensions
+ int elementCount = 11444777; // Length of arrays to process
+ int localWorkSize = 256; // Local work size dimensions
int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize
// set up
@@ -33,23 +33,23 @@ public class HelloJOCL {
CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build();
- CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, globalWorkSize*SIZEOF_INT);
- CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, globalWorkSize*SIZEOF_INT);
- CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, globalWorkSize*SIZEOF_INT);
+ CLBuffer clBufferA = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY);
+ CLBuffer clBufferB = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.READ_ONLY);
+ CLBuffer clBufferC = context.createBuffer(globalWorkSize*SIZEOF_FLOAT, MEM.WRITE_ONLY);
out.println("used device memory: "
+ (clBufferA.buffer.capacity()+clBufferB.buffer.capacity()+clBufferC.buffer.capacity())/1000000 +"MB");
- // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between).
+ // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between runs).
fillBuffer(clBufferA.buffer, 12345);
fillBuffer(clBufferB.buffer, 67890);
// get a reference to the kernel functon with the name 'VectorAdd' and map the buffers to its input parameters.
CLKernel kernel = program.getCLKernels().get("VectorAdd");
- kernel.setArg(0, SIZEOF_LONG, clBufferA)
- .setArg(1, SIZEOF_LONG, clBufferB)
- .setArg(2, SIZEOF_LONG, clBufferC)
- .setArg(3, SIZEOF_INT, elementCount);
+ kernel.setArg(0, clBufferA)
+ .setArg(1, clBufferB)
+ .setArg(2, clBufferC)
+ .setArg(3, elementCount);
// create command queue on first device.
CLCommandQueue queue = context.getCLDevices()[0].createCommandQueue();
@@ -58,8 +58,9 @@ public class HelloJOCL {
long time = nanoTime();
queue.putWriteBuffer(clBufferA, false)
.putWriteBuffer(clBufferB, false)
- .putNDRangeKernel(kernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
- .putReadBuffer(clBufferC, true);
+ .putNDRangeKernel(kernel, 1, 0, globalWorkSize, localWorkSize)
+ .putReadBuffer(clBufferC, true)
+ .finish();
time = nanoTime() - time;
// cleanup all resources associated with this context.
@@ -68,24 +69,21 @@ public class HelloJOCL {
// print first few elements of the resulting buffer to the console.
out.println("a+b=c results snapshot: ");
for(int i = 0; i < 10; i++)
- out.print(clBufferC.buffer.getInt() + ", ");
- out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_INT + " more");
+ out.print(clBufferC.buffer.getFloat() + ", ");
+ out.println("...; " + clBufferC.buffer.remaining()/SIZEOF_FLOAT + " more");
System.out.println("computation took: "+(time/1000000)+"ms");
}
- public static final void fillBuffer(ByteBuffer buffer, int seed) {
-
+ private static final void fillBuffer(ByteBuffer buffer, int seed) {
Random rnd = new Random(seed);
-
while(buffer.remaining() != 0)
- buffer.putInt(rnd.nextInt());
-
+ buffer.putFloat(rnd.nextFloat()*100);
buffer.rewind();
}
- public static final int roundUp(int groupSize, int globalSize) {
+ private static final int roundUp(int groupSize, int globalSize) {
int r = globalSize % groupSize;
if (r == 0) {
return globalSize;
diff --git a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
index b53fc41..f9b4f32 100644
--- a/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
+++ b/src/com/mbien/opencl/demos/hellojocl/VectorAdd.cl
@@ -1,12 +1,15 @@
// OpenCL Kernel Function for element by element vector addition
- __kernel void VectorAdd(__global const int* a, __global const int* b, __global int* c, int iNumElements) {
+ __kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int numElements) {
+
// get index into global data array
int iGID = get_global_id(0);
+
// bound check (equivalent to the limit on a 'for' loop for standard/serial C code
- if (iGID >= iNumElements) {
+ if (iGID >= numElements) {
return;
}
+
// add the vector elements
c[iGID] = a[iGID] + b[iGID];
}