diff options
author | Michael Bien <[email protected]> | 2010-04-12 22:27:03 +0200 |
---|---|---|
committer | Michael Bien <[email protected]> | 2010-04-12 22:27:03 +0200 |
commit | 2c85c416d85205ab98b33e1a0b0daab32d4d81ff (patch) | |
tree | 4187ba06dec81da46495a300bb4f914e931f0c58 /src/com/jogamp/opencl/demos/hellojocl | |
parent | b51f2e1c254cdd74c9e43904c62694f64e6ae7e6 (diff) |
changes due to package renaming in jocl.
Diffstat (limited to 'src/com/jogamp/opencl/demos/hellojocl')
-rw-r--r-- | src/com/jogamp/opencl/demos/hellojocl/HelloJOCL.java | 91 | ||||
-rw-r--r-- | src/com/jogamp/opencl/demos/hellojocl/VectorAdd.cl | 15 |
2 files changed, 106 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/demos/hellojocl/HelloJOCL.java b/src/com/jogamp/opencl/demos/hellojocl/HelloJOCL.java new file mode 100644 index 0000000..31fabab --- /dev/null +++ b/src/com/jogamp/opencl/demos/hellojocl/HelloJOCL.java @@ -0,0 +1,91 @@ +package com.jogamp.opencl.demos.hellojocl; + +import com.jogamp.opencl.CLBuffer; +import com.jogamp.opencl.CLCommandQueue; +import com.jogamp.opencl.CLContext; +import com.jogamp.opencl.CLKernel; +import com.jogamp.opencl.CLProgram; +import java.io.IOException; +import java.nio.FloatBuffer; +import java.util.Random; + +import static java.lang.System.*; +import static com.jogamp.opencl.CLMemory.Mem.*; + +/** + * Hello Java OpenCL example. Adds all elements of buffer A to buffer B + * and stores the result in buffer C.<br/> + * Sample was inspired by the Nvidia VectorAdd example written in C/C++ + * which is bundled in the Nvidia OpenCL SDK. + * @author Michael Bien + */ +public class HelloJOCL { + + public static void main(String[] args) throws IOException { + + int elementCount = 11444777; // Length of arrays to process + int localWorkSize = 256; // Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize + + // set up + CLContext context = CLContext.create(); + + CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build(); + + CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY); + CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY); + CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY); + + out.println("used device memory: " + + (clBufferA.getSize()+clBufferB.getSize()+clBufferC.getSize())/1000000 +"MB"); + + // fill read buffers with random numbers (just to have test data; seed is fixed -> results will not change between runs). + fillBuffer(clBufferA.getBuffer(), 12345); + fillBuffer(clBufferB.getBuffer(), 67890); + + // get a reference to the kernel functon with the name 'VectorAdd' + // and map the buffers to its input parameters. + CLKernel kernel = program.createCLKernel("VectorAdd"); + kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount); + + // create command queue on fastest device. + CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue(); + + // asynchronous write of data to GPU device, blocking read later to get the computed results back. + long time = nanoTime(); + queue.putWriteBuffer(clBufferA, false) + .putWriteBuffer(clBufferB, false) + .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize) + .putReadBuffer(clBufferC, true); + time = nanoTime() - time; + + // cleanup all resources associated with this context. + context.release(); + + // print first few elements of the resulting buffer to the console. + out.println("a+b=c results snapshot: "); + for(int i = 0; i < 10; i++) + out.print(clBufferC.getBuffer().get() + ", "); + out.println("...; " + clBufferC.getBuffer().remaining() + " more"); + + out.println("computation took: "+(time/1000000)+"ms"); + + } + + private static final void fillBuffer(FloatBuffer buffer, int seed) { + Random rnd = new Random(seed); + while(buffer.remaining() != 0) + buffer.put(rnd.nextFloat()*100); + buffer.rewind(); + } + + private static final int roundUp(int groupSize, int globalSize) { + int r = globalSize % groupSize; + if (r == 0) { + return globalSize; + } else { + return globalSize + groupSize - r; + } + } + +}
\ No newline at end of file diff --git a/src/com/jogamp/opencl/demos/hellojocl/VectorAdd.cl b/src/com/jogamp/opencl/demos/hellojocl/VectorAdd.cl new file mode 100644 index 0000000..ac9dde2 --- /dev/null +++ b/src/com/jogamp/opencl/demos/hellojocl/VectorAdd.cl @@ -0,0 +1,15 @@ + + // OpenCL Kernel Function for element by element vector addition + kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) { + + // get index into global data array + int iGID = get_global_id(0); + + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= numElements) { + return; + } + + // add the vector elements + c[iGID] = a[iGID] + b[iGID]; + }
\ No newline at end of file |