JOCL FAQ
Get the Source Code
Create a local copy/branch of the git repository, either anonymous:
- git clone git://github.com/mbien/gluegen.git gluegen
- git clone git://github.com/mbien/jocl.git jocl
- git clone git://github.com/mbien/jocl-demos.git jocl-demos
- git clone git://github.com/mbien/jogl.git jogl
or via SSH and your user credential, so you can easily push back your changes to the github server:
- git clone [email protected]:username/gluegen.git gluegen
- git clone [email protected]:username/jocl.git jocl
- git clone [email protected]:username/jocl-demos.git jocl-demos
- git clone [email protected]:username/jogl.git jogl
Getting Started
Hello JOCL host program:
import com.mbien.opencl.*; import java.io.IOException; import java.nio.FloatBuffer; import java.util.Random; import static java.lang.System.*; import static com.mbien.opencl.CLMemory.Mem.*; /** * Hello Java OpenCL example. Adds all elements of buffer A to buffer B * and stores the result in buffer C.<br/> * Sample was inspired by the Nvidia VectorAdd example written in C/C++ * which is bundled in the Nvidia OpenCL SDK. * @author Michael Bien */ public class HelloJOCL { public static void main(String[] args) throws IOException { int elementCount = 11444777; // Length of arrays to process int localWorkSize = 256; // Local work size int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the localWorkSize // set up CLContext context = CLContext.create(); CLProgram program = context.createProgram(HelloJOCL.class.getResourceAsStream("VectorAdd.cl")).build(); CLBuffer<FloatBuffer> clBufferA = context.createFloatBuffer(globalWorkSize, READ_ONLY); CLBuffer<FloatBuffer> clBufferB = context.createFloatBuffer(globalWorkSize, READ_ONLY); CLBuffer<FloatBuffer> clBufferC = context.createFloatBuffer(globalWorkSize, WRITE_ONLY); // fill read buffers with random numbers. fillBuffer(clBufferA.getBuffer(), 12345); fillBuffer(clBufferB.getBuffer(), 67890); // get a reference to the kernel functon with the name 'VectorAdd' // and map the buffers to its input parameters. CLKernel kernel = program.createCLKernel("VectorAdd"); kernel.putArgs(clBufferA, clBufferB, clBufferC).putArg(elementCount); // create command queue on fastest device. CLCommandQueue queue = context.getMaxFlopsDevice().createCommandQueue(); // asynchronous write of data to GPU device, // blocking read later to get the computed results back. long time = nanoTime(); queue.putWriteBuffer(clBufferA, false) .putWriteBuffer(clBufferB, false) .put1DRangeKernel(kernel, 0, globalWorkSize, localWorkSize) .putReadBuffer(clBufferC, true); time = nanoTime() - time; // cleanup all resources associated with this context. context.release(); // print first few elements of the resulting buffer to the console. out.println("a+b=c results snapshot: "); for(int i = 0; i < 10; i++) out.print(clBufferC.getBuffer().get() + ", "); out.println("...; " + clBufferC.getBuffer().remaining() + " more"); out.println("computation took: "+(time/1000000)+"ms"); } /* utilities */ private static void fillBuffer(FloatBuffer buffer, int seed) { Random rnd = new Random(seed); while(buffer.remaining() != 0) buffer.put(rnd.nextFloat()*100); buffer.rewind(); } private static int roundUp(int groupSize, int globalSize) { int r = globalSize % groupSize; if (r == 0) return globalSize; else return globalSize + groupSize - r; } }
Hello JOCL Kernel:
// OpenCL Kernel Function for element by element vector addition kernel void VectorAdd(global const float* a, global const float* b, global float* c, int numElements) { // get index into global data array int iGID = get_global_id(0); // bound check, equivalent to the limit on a 'for' loop if (iGID >= numElements) { return; } // add the vector elements c[iGID] = a[iGID] + b[iGID]; }