aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Bien <[email protected]>2009-10-20 22:06:10 +0200
committerMichael Bien <[email protected]>2009-10-20 22:06:10 +0200
commitabe0135b4457d4c4ff722b0f39a47cad6c178f7e (patch)
treecba794c54c5cc0f9d005b8ab2d7781f739c01d07
parent7f2db980b303fa75f3830679ce65fe4ae41c30dc (diff)
refactored JOCLTest into LowLevelBindingTest and HighLevelBindingTest.
moved listCLPlatforms() and getLowLevelBinding() from CLContext to CLPlatform. added method to create CLPrograms from InputStreams and updated test.
-rw-r--r--src/com/mbien/opencl/CLContext.java76
-rw-r--r--src/com/mbien/opencl/CLKernel.java2
-rw-r--r--src/com/mbien/opencl/CLPlatform.java43
-rw-r--r--test/com/mbien/opencl/HighLevelBindingTest.java156
-rw-r--r--test/com/mbien/opencl/LowLevelBindingTest.java (renamed from test/com/mbien/opencl/JOCLTest.java)166
-rw-r--r--test/com/mbien/opencl/TestUtils.java29
-rw-r--r--test/com/mbien/opencl/testkernels.cl22
7 files changed, 286 insertions, 208 deletions
diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java
index dda8eb05..db32a446 100644
--- a/src/com/mbien/opencl/CLContext.java
+++ b/src/com/mbien/opencl/CLContext.java
@@ -1,6 +1,9 @@
package com.mbien.opencl;
-import com.mbien.opencl.impl.CLImpl;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
@@ -18,7 +21,7 @@ import static com.mbien.opencl.CLException.*;
*/
public final class CLContext {
- final static CL cl;
+ final CL cl;
public final long ID;
private CLDevice[] devices;
@@ -27,13 +30,9 @@ public final class CLContext {
private final List<CLBuffer> buffers;
private final Map<CLDevice, List<CLCommandQueue>> queuesMap;
- static{
- System.loadLibrary("gluegen-rt");
- System.loadLibrary("jocl");
- cl = new CLImpl();
- }
private CLContext(long contextID) {
+ this.cl = CLPlatform.getLowLevelBinding();
this.ID = contextID;
this.programs = new ArrayList<CLProgram>();
this.buffers = new ArrayList<CLBuffer>();
@@ -62,20 +61,43 @@ public final class CLContext {
private static final CLContext createContext(long deviceType) {
- IntBuffer error = IntBuffer.allocate(1);
- long context = cl.clCreateContextFromType(null, 0, deviceType, null, null, error, 0);
+ IntBuffer status = IntBuffer.allocate(1);
+ long context = CLPlatform.getLowLevelBinding().clCreateContextFromType(null, 0, deviceType, null, null, status, 0);
- checkForError(error.get(), "can not create CL context");
+ checkForError(status.get(), "can not create CL context");
return new CLContext(context);
}
-
+
+ /**
+ * Creates a program from the given sources, the program is not build yet.
+ */
public CLProgram createProgram(String src) {
CLProgram program = new CLProgram(this, src, ID);
programs.add(program);
return program;
}
+ /**
+ * Creates a program and reads the sources from stream, the program is not build yet.
+ * @throws IOException when a IOException occurred while reading or closing the stream.
+ */
+ public CLProgram createProgram(InputStream sources) throws IOException {
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(sources));
+ StringBuilder sb = new StringBuilder();
+
+ String line = null;
+ try {
+ while ((line = reader.readLine()) != null)
+ sb.append(line).append("\n");
+ } finally {
+ sources.close();
+ }
+
+ return createProgram(sb.toString());
+ }
+
public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) {
CLBuffer buffer = new CLBuffer(this, flags, directBuffer);
buffers.add(buffer);
@@ -218,38 +240,6 @@ public final class CLContext {
return null;
}
- /**
- * Lists all available OpenCL implementaitons.
- * @throws CLException if something went wrong initializing OpenCL
- */
- public static CLPlatform[] listCLPlatforms() {
-
- int[] intBuffer = new int[1];
- // find all available OpenCL platforms
- int ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0);
- checkForError(ret, "can not enumerate platforms");
-
- // receive platform ids
- long[] platformId = new long[intBuffer[0]];
- ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0);
- checkForError(ret, "can not enumerate platforms");
-
- CLPlatform[] platforms = new CLPlatform[platformId.length];
-
- for (int i = 0; i < platformId.length; i++)
- platforms[i] = new CLPlatform(cl, platformId[i]);
-
- return platforms;
- }
-
- /**
- * Returns the low level binding interface to the OpenCL APIs.
- */
- public static CL getLowLevelBinding() {
- return cl;
- }
-
-
@Override
public String toString() {
return "CLContext [id: " + ID
diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java
index 1db3da38..9f184ce4 100644
--- a/src/com/mbien/opencl/CLKernel.java
+++ b/src/com/mbien/opencl/CLKernel.java
@@ -32,7 +32,7 @@ public class CLKernel {
ret = cl.clGetKernelInfo(ID, CL.CL_KERNEL_FUNCTION_NAME, bb.capacity(), bb, null, 0);
checkForError(ret, "error while asking for kernel function name");
- this.name = new String(bb.array(), 0, (int)longArray[0]).trim();
+ this.name = new String(bb.array(), 0, bb.capacity()).trim();
}
diff --git a/src/com/mbien/opencl/CLPlatform.java b/src/com/mbien/opencl/CLPlatform.java
index dde9994b..56ef3713 100644
--- a/src/com/mbien/opencl/CLPlatform.java
+++ b/src/com/mbien/opencl/CLPlatform.java
@@ -1,5 +1,6 @@
package com.mbien.opencl;
+import com.mbien.opencl.impl.CLImpl;
import java.nio.ByteBuffer;
import static com.mbien.opencl.CLException.*;
/**
@@ -13,11 +14,47 @@ public final class CLPlatform {
*/
public final long ID;
- private final CL cl;
+ private static final CL cl;
- CLPlatform(CL cl, long id) {
+ static{
+ System.loadLibrary("gluegen-rt");
+ System.loadLibrary("jocl");
+ cl = new CLImpl();
+ }
+
+ CLPlatform(long id) {
this.ID = id;
- this.cl = cl;
+ }
+
+ /**
+ * Lists all available OpenCL implementaitons.
+ * @throws CLException if something went wrong initializing OpenCL
+ */
+ public static CLPlatform[] listCLPlatforms() {
+
+ int[] intBuffer = new int[1];
+ // find all available OpenCL platforms
+ int ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0);
+ checkForError(ret, "can not enumerate platforms");
+
+ // receive platform ids
+ long[] platformId = new long[intBuffer[0]];
+ ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0);
+ checkForError(ret, "can not enumerate platforms");
+
+ CLPlatform[] platforms = new CLPlatform[platformId.length];
+
+ for (int i = 0; i < platformId.length; i++)
+ platforms[i] = new CLPlatform(platformId[i]);
+
+ return platforms;
+ }
+
+ /**
+ * Returns the low level binding interface to the OpenCL APIs.
+ */
+ public static CL getLowLevelBinding() {
+ return cl;
}
/**
diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java
new file mode 100644
index 00000000..a2bfce91
--- /dev/null
+++ b/test/com/mbien/opencl/HighLevelBindingTest.java
@@ -0,0 +1,156 @@
+package com.mbien.opencl;
+
+import com.sun.gluegen.runtime.BufferFactory;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Map;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.junit.Assert.*;
+import static java.lang.System.*;
+import static com.mbien.opencl.TestUtils.*;
+
+/**
+ * Test testing the high level bindings.
+ * @author Michael Bien
+ */
+public class HighLevelBindingTest {
+
+ @BeforeClass
+ public static void setUpClass() throws Exception {
+ out.println("OS: " + System.getProperty("os.name"));
+ out.println("VM: " + System.getProperty("java.vm.name"));
+ }
+
+ @Test
+ public void contextlessTest() {
+
+ out.println(" - - - highLevelTest; contextless - - - ");
+
+ CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms();
+
+ for (CLPlatform platform : clPlatforms) {
+
+ out.println("platform info:");
+ out.println(" name: "+platform.getName());
+ out.println(" profile: "+platform.getProfile());
+ out.println(" version: "+platform.getVersion());
+ out.println(" vendor: "+platform.getVendor());
+
+ CLDevice[] clDevices = platform.listCLDevices();
+ for (CLDevice device : clDevices) {
+ out.println("device info:");
+ out.println(" name: "+device.getName());
+ out.println(" profile: "+device.getProfile());
+ out.println(" vendor: "+device.getVendor());
+ out.println(" type: "+device.getType());
+ out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB");
+ out.println(" local mem: "+device.getLocalMemSize()/1024+" KB");
+ out.println(" clock: "+device.getMaxClockFrequency()+" MHz");
+ out.println(" max work group size: "+device.getMaxWorkGroupSize());
+ out.println(" max compute units: "+device.getMaxComputeUnits());
+ out.println(" extensions: "+device.getExtensions());
+ }
+ }
+
+ }
+
+ @Test
+ public void vectorAddGMTest() throws IOException {
+
+ out.println(" - - - highLevelTest; global memory kernel - - - ");
+
+ CLContext context = CLContext.create();
+
+ CLDevice[] contextDevices = context.getCLDevices();
+
+ out.println("context devices:");
+ for (CLDevice device : contextDevices) {
+ out.println(" "+device.toString());
+ }
+
+ CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build();
+
+ CLDevice[] programDevices = program.getCLDevices();
+
+ assertEquals(contextDevices.length, programDevices.length);
+
+ out.println("program devices:");
+ for (CLDevice device : programDevices) {
+ out.println(" "+device.toString());
+ out.println(" build log: "+program.getBuildLog(device));
+ out.println(" build status: "+program.getBuildStatus(device));
+ }
+
+ String source = program.getSource();
+ assertFalse(source.trim().isEmpty());
+// out.println("source:\n"+source);
+
+ int elementCount = 11444777; // Length of float arrays to process (odd # for illustration)
+ int localWorkSize = 256; // set and log Global and Local work size dimensions
+ int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize
+
+ out.println("allocateing buffers of size: "+globalWorkSize);
+
+ ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
+ ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
+ ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
+
+ fillBuffer(srcA, 23456);
+ fillBuffer(srcB, 46987);
+
+ CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA);
+ CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB);
+ CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest);
+
+ Map<String, CLKernel> kernels = program.getCLKernels();
+ for (CLKernel kernel : kernels.values()) {
+ out.println("kernel: "+kernel.toString());
+ }
+
+ assertNotNull(kernels.get("VectorAddGM"));
+ assertNotNull(kernels.get("Test"));
+
+ CLKernel vectorAddKernel = kernels.get("VectorAddGM");
+
+ vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA)
+ .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB)
+ .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC)
+ .setArg(3, BufferFactory.SIZEOF_INT, elementCount);
+
+ CLCommandQueue queue = programDevices[0].createCommandQueue();
+
+ // Asynchronous write of data to GPU device, blocking read later
+ queue.putWriteBuffer(clBufferA, false)
+ .putWriteBuffer(clBufferB, false)
+ .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
+ .putReadBuffer(clBufferC, true).release();
+
+ out.println("a+b=c result snapshot: ");
+ for(int i = 0; i < 10; i++)
+ out.print(dest.getInt()+", ");
+ out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more");
+
+ assertTrue(3 == context.getCLBuffers().size());
+ clBufferA.release();
+ assertTrue(2 == context.getCLBuffers().size());
+
+ assertTrue(2 == context.getCLBuffers().size());
+ clBufferB.release();
+ assertTrue(1 == context.getCLBuffers().size());
+
+ assertTrue(1 == context.getCLBuffers().size());
+ clBufferC.release();
+ assertTrue(0 == context.getCLBuffers().size());
+
+
+ assertTrue(1 == context.getCLPrograms().size());
+ program.release();
+ assertTrue(0 == context.getCLPrograms().size());
+
+// CLDevice device = ctx.getMaxFlopsDevice();
+// out.println("max FLOPS device: " + device);
+ context.release();
+ }
+
+}
diff --git a/test/com/mbien/opencl/JOCLTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java
index 224a6768..e5175129 100644
--- a/test/com/mbien/opencl/JOCLTest.java
+++ b/test/com/mbien/opencl/LowLevelBindingTest.java
@@ -4,18 +4,17 @@ import com.sun.gluegen.runtime.BufferFactory;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
-import java.util.Map;
-import java.util.Random;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import static java.lang.System.*;
+import static com.mbien.opencl.TestUtils.*;
/**
- * Test for testing basic functionality.
+ * Test testing the low level bindings.
* @author Michael Bien
*/
-public class JOCLTest {
+public class LowLevelBindingTest {
private final static String programSource =
" // OpenCL Kernel Function for element by element vector addition \n"
@@ -39,8 +38,6 @@ public class JOCLTest {
+ " c[iGID] = iGID; \n"
+ "} \n";
- public JOCLTest() {
- }
@BeforeClass
public static void setUpClass() throws Exception {
@@ -55,7 +52,7 @@ public class JOCLTest {
int ret = CL.CL_SUCCESS;
- CL cl = CLContext.getLowLevelBinding();
+ CL cl = CLPlatform.getLowLevelBinding();
int[] intBuffer = new int[1];
// find all available OpenCL platforms
@@ -134,7 +131,7 @@ public class JOCLTest {
long[] longArray = new long[1];
ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder());
- CL cl = CLContext.getLowLevelBinding();
+ CL cl = CLPlatform.getLowLevelBinding();
int ret = CL.CL_SUCCESS;
int[] intArray = new int[1];
@@ -302,163 +299,10 @@ public class JOCLTest {
}
}
- private void fillBuffer(ByteBuffer buffer, int seed) {
-
- Random rnd = new Random(seed);
-
- while(buffer.remaining() != 0)
- buffer.putInt(rnd.nextInt());
-
- buffer.rewind();
- }
-
private ByteBuffer wrap(long value) {
return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind();
}
- @Test
- public void highLevelTest1() {
-
- out.println(" - - - highLevelTest; contextless - - - ");
-
- CLPlatform[] clPlatforms = CLContext.listCLPlatforms();
-
- for (CLPlatform platform : clPlatforms) {
-
- out.println("platform info:");
- out.println(" name: "+platform.getName());
- out.println(" profile: "+platform.getProfile());
- out.println(" version: "+platform.getVersion());
- out.println(" vendor: "+platform.getVendor());
-
- CLDevice[] clDevices = platform.listCLDevices();
- for (CLDevice device : clDevices) {
- out.println("device info:");
- out.println(" name: "+device.getName());
- out.println(" profile: "+device.getProfile());
- out.println(" vendor: "+device.getVendor());
- out.println(" type: "+device.getType());
- out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB");
- out.println(" local mem: "+device.getLocalMemSize()/1024+" KB");
- out.println(" clock: "+device.getMaxClockFrequency()+" MHz");
- out.println(" max work group size: "+device.getMaxWorkGroupSize());
- out.println(" max compute units: "+device.getMaxComputeUnits());
- out.println(" extensions: "+device.getExtensions());
- }
- }
-
-
- }
-
-
- @Test
- public void highLevelTest2() {
-
- out.println(" - - - highLevelTest - - - ");
-
- CLContext context = CLContext.create();
-
- CLDevice[] contextDevices = context.getCLDevices();
-
- out.println("context devices:");
- for (CLDevice device : contextDevices) {
- out.println(" "+device.toString());
- }
-
- CLProgram program = context.createProgram(programSource).build();
-
- CLDevice[] programDevices = program.getCLDevices();
-
- assertEquals(contextDevices.length, programDevices.length);
-
- out.println("program devices:");
- for (CLDevice device : programDevices) {
- out.println(" "+device.toString());
- out.println(" build log: "+program.getBuildLog(device));
- out.println(" build status: "+program.getBuildStatus(device));
- }
-
- String source = program.getSource();
- assertFalse(source.trim().isEmpty());
-// out.println("source:\n"+source);
-
- int elementCount = 11444777; // Length of float arrays to process (odd # for illustration)
- int localWorkSize = 256; // set and log Global and Local work size dimensions
- int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize
-
- out.println("allocateing buffers of size: "+globalWorkSize);
-
- ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
- ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
- ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT);
-
- fillBuffer(srcA, 23456);
- fillBuffer(srcB, 46987);
-
- CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA);
- CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB);
- CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest);
-
- Map<String, CLKernel> kernels = program.getCLKernels();
- for (CLKernel kernel : kernels.values()) {
- out.println("kernel: "+kernel.toString());
- }
-
- assertNotNull(kernels.get("VectorAdd"));
- assertNotNull(kernels.get("Test"));
-
- CLKernel vectorAddKernel = kernels.get("VectorAdd");
-
- vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA)
- .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB)
- .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC)
- .setArg(3, BufferFactory.SIZEOF_INT, elementCount);
-
- CLCommandQueue queue = programDevices[0].createCommandQueue();
-
- // Asynchronous write of data to GPU device, blocking read later
- queue.putWriteBuffer(clBufferA, false)
- .putWriteBuffer(clBufferB, false)
- .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize })
- .putReadBuffer(clBufferC, true).release();
-
- out.println("a+b=c result snapshot: ");
- for(int i = 0; i < 10; i++)
- out.print(dest.getInt()+", ");
- out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more");
-
- assertTrue(3 == context.getCLBuffers().size());
- clBufferA.release();
- assertTrue(2 == context.getCLBuffers().size());
-
- assertTrue(2 == context.getCLBuffers().size());
- clBufferB.release();
- assertTrue(1 == context.getCLBuffers().size());
-
- assertTrue(1 == context.getCLBuffers().size());
- clBufferC.release();
- assertTrue(0 == context.getCLBuffers().size());
-
-
- assertTrue(1 == context.getCLPrograms().size());
- program.release();
- assertTrue(0 == context.getCLPrograms().size());
-
-// CLDevice device = ctx.getMaxFlopsDevice();
-// out.println("max FLOPS device: " + device);
- context.release();
- }
-
-
- private final int roundUp(int groupSize, int globalSize) {
- int r = globalSize % groupSize;
- if (r == 0) {
- return globalSize;
- } else {
- return globalSize + groupSize - r;
- }
- }
-
private final void checkForError(int ret) {
this.checkError("", ret);
}
diff --git a/test/com/mbien/opencl/TestUtils.java b/test/com/mbien/opencl/TestUtils.java
new file mode 100644
index 00000000..70bade8a
--- /dev/null
+++ b/test/com/mbien/opencl/TestUtils.java
@@ -0,0 +1,29 @@
+package com.mbien.opencl;
+
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+/**
+ * @author Michael Bien
+ */
+public class TestUtils {
+
+ public static final void fillBuffer(ByteBuffer buffer, int seed) {
+
+ Random rnd = new Random(seed);
+
+ while(buffer.remaining() != 0)
+ buffer.putInt(rnd.nextInt());
+
+ buffer.rewind();
+ }
+
+ public static final int roundUp(int groupSize, int globalSize) {
+ int r = globalSize % groupSize;
+ if (r == 0) {
+ return globalSize;
+ } else {
+ return globalSize + groupSize - r;
+ }
+ }
+}
diff --git a/test/com/mbien/opencl/testkernels.cl b/test/com/mbien/opencl/testkernels.cl
new file mode 100644
index 00000000..0790cb32
--- /dev/null
+++ b/test/com/mbien/opencl/testkernels.cl
@@ -0,0 +1,22 @@
+
+ // OpenCL Kernel Function for element by element vector addition
+ __kernel void VectorAddGM(__global const int* a, __global const int* b, __global int* c, int iNumElements) {
+ // get index into global data array
+ int iGID = get_global_id(0);
+ // bound check (equivalent to the limit on a 'for' loop for standard/serial C code
+ if (iGID >= iNumElements) {
+ return;
+ }
+ // add the vector elements
+ c[iGID] = a[iGID] + b[iGID];
+ }
+
+ __kernel void Test(__global const int* a, __global const int* b, __global int* c, int iNumElements) {
+ // get index into global data array
+ int iGID = get_global_id(0);
+ // bound check (equivalent to the limit on a 'for' loop for standard/serial C code
+ if (iGID >= iNumElements) {
+ return;
+ }
+ c[iGID] = iGID;
+ }