diff options
author | Michael Bien <[email protected]> | 2009-10-20 22:06:10 +0200 |
---|---|---|
committer | Michael Bien <[email protected]> | 2009-10-20 22:06:10 +0200 |
commit | abe0135b4457d4c4ff722b0f39a47cad6c178f7e (patch) | |
tree | cba794c54c5cc0f9d005b8ab2d7781f739c01d07 | |
parent | 7f2db980b303fa75f3830679ce65fe4ae41c30dc (diff) |
refactored JOCLTest into LowLevelBindingTest and HighLevelBindingTest.
moved listCLPlatforms() and getLowLevelBinding() from CLContext to CLPlatform.
added method to create CLPrograms from InputStreams and updated test.
-rw-r--r-- | src/com/mbien/opencl/CLContext.java | 76 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLKernel.java | 2 | ||||
-rw-r--r-- | src/com/mbien/opencl/CLPlatform.java | 43 | ||||
-rw-r--r-- | test/com/mbien/opencl/HighLevelBindingTest.java | 156 | ||||
-rw-r--r-- | test/com/mbien/opencl/LowLevelBindingTest.java (renamed from test/com/mbien/opencl/JOCLTest.java) | 166 | ||||
-rw-r--r-- | test/com/mbien/opencl/TestUtils.java | 29 | ||||
-rw-r--r-- | test/com/mbien/opencl/testkernels.cl | 22 |
7 files changed, 286 insertions, 208 deletions
diff --git a/src/com/mbien/opencl/CLContext.java b/src/com/mbien/opencl/CLContext.java index dda8eb05..db32a446 100644 --- a/src/com/mbien/opencl/CLContext.java +++ b/src/com/mbien/opencl/CLContext.java @@ -1,6 +1,9 @@ package com.mbien.opencl; -import com.mbien.opencl.impl.CLImpl; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.IntBuffer; @@ -18,7 +21,7 @@ import static com.mbien.opencl.CLException.*; */ public final class CLContext { - final static CL cl; + final CL cl; public final long ID; private CLDevice[] devices; @@ -27,13 +30,9 @@ public final class CLContext { private final List<CLBuffer> buffers; private final Map<CLDevice, List<CLCommandQueue>> queuesMap; - static{ - System.loadLibrary("gluegen-rt"); - System.loadLibrary("jocl"); - cl = new CLImpl(); - } private CLContext(long contextID) { + this.cl = CLPlatform.getLowLevelBinding(); this.ID = contextID; this.programs = new ArrayList<CLProgram>(); this.buffers = new ArrayList<CLBuffer>(); @@ -62,20 +61,43 @@ public final class CLContext { private static final CLContext createContext(long deviceType) { - IntBuffer error = IntBuffer.allocate(1); - long context = cl.clCreateContextFromType(null, 0, deviceType, null, null, error, 0); + IntBuffer status = IntBuffer.allocate(1); + long context = CLPlatform.getLowLevelBinding().clCreateContextFromType(null, 0, deviceType, null, null, status, 0); - checkForError(error.get(), "can not create CL context"); + checkForError(status.get(), "can not create CL context"); return new CLContext(context); } - + + /** + * Creates a program from the given sources, the program is not build yet. + */ public CLProgram createProgram(String src) { CLProgram program = new CLProgram(this, src, ID); programs.add(program); return program; } + /** + * Creates a program and reads the sources from stream, the program is not build yet. + * @throws IOException when a IOException occurred while reading or closing the stream. + */ + public CLProgram createProgram(InputStream sources) throws IOException { + + BufferedReader reader = new BufferedReader(new InputStreamReader(sources)); + StringBuilder sb = new StringBuilder(); + + String line = null; + try { + while ((line = reader.readLine()) != null) + sb.append(line).append("\n"); + } finally { + sources.close(); + } + + return createProgram(sb.toString()); + } + public CLBuffer createBuffer(int flags, ByteBuffer directBuffer) { CLBuffer buffer = new CLBuffer(this, flags, directBuffer); buffers.add(buffer); @@ -218,38 +240,6 @@ public final class CLContext { return null; } - /** - * Lists all available OpenCL implementaitons. - * @throws CLException if something went wrong initializing OpenCL - */ - public static CLPlatform[] listCLPlatforms() { - - int[] intBuffer = new int[1]; - // find all available OpenCL platforms - int ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0); - checkForError(ret, "can not enumerate platforms"); - - // receive platform ids - long[] platformId = new long[intBuffer[0]]; - ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0); - checkForError(ret, "can not enumerate platforms"); - - CLPlatform[] platforms = new CLPlatform[platformId.length]; - - for (int i = 0; i < platformId.length; i++) - platforms[i] = new CLPlatform(cl, platformId[i]); - - return platforms; - } - - /** - * Returns the low level binding interface to the OpenCL APIs. - */ - public static CL getLowLevelBinding() { - return cl; - } - - @Override public String toString() { return "CLContext [id: " + ID diff --git a/src/com/mbien/opencl/CLKernel.java b/src/com/mbien/opencl/CLKernel.java index 1db3da38..9f184ce4 100644 --- a/src/com/mbien/opencl/CLKernel.java +++ b/src/com/mbien/opencl/CLKernel.java @@ -32,7 +32,7 @@ public class CLKernel { ret = cl.clGetKernelInfo(ID, CL.CL_KERNEL_FUNCTION_NAME, bb.capacity(), bb, null, 0); checkForError(ret, "error while asking for kernel function name"); - this.name = new String(bb.array(), 0, (int)longArray[0]).trim(); + this.name = new String(bb.array(), 0, bb.capacity()).trim(); } diff --git a/src/com/mbien/opencl/CLPlatform.java b/src/com/mbien/opencl/CLPlatform.java index dde9994b..56ef3713 100644 --- a/src/com/mbien/opencl/CLPlatform.java +++ b/src/com/mbien/opencl/CLPlatform.java @@ -1,5 +1,6 @@ package com.mbien.opencl; +import com.mbien.opencl.impl.CLImpl; import java.nio.ByteBuffer; import static com.mbien.opencl.CLException.*; /** @@ -13,11 +14,47 @@ public final class CLPlatform { */ public final long ID; - private final CL cl; + private static final CL cl; - CLPlatform(CL cl, long id) { + static{ + System.loadLibrary("gluegen-rt"); + System.loadLibrary("jocl"); + cl = new CLImpl(); + } + + CLPlatform(long id) { this.ID = id; - this.cl = cl; + } + + /** + * Lists all available OpenCL implementaitons. + * @throws CLException if something went wrong initializing OpenCL + */ + public static CLPlatform[] listCLPlatforms() { + + int[] intBuffer = new int[1]; + // find all available OpenCL platforms + int ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0); + checkForError(ret, "can not enumerate platforms"); + + // receive platform ids + long[] platformId = new long[intBuffer[0]]; + ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0); + checkForError(ret, "can not enumerate platforms"); + + CLPlatform[] platforms = new CLPlatform[platformId.length]; + + for (int i = 0; i < platformId.length; i++) + platforms[i] = new CLPlatform(platformId[i]); + + return platforms; + } + + /** + * Returns the low level binding interface to the OpenCL APIs. + */ + public static CL getLowLevelBinding() { + return cl; } /** diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java new file mode 100644 index 00000000..a2bfce91 --- /dev/null +++ b/test/com/mbien/opencl/HighLevelBindingTest.java @@ -0,0 +1,156 @@ +package com.mbien.opencl; + +import com.sun.gluegen.runtime.BufferFactory; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.mbien.opencl.TestUtils.*; + +/** + * Test testing the high level bindings. + * @author Michael Bien + */ +public class HighLevelBindingTest { + + @BeforeClass + public static void setUpClass() throws Exception { + out.println("OS: " + System.getProperty("os.name")); + out.println("VM: " + System.getProperty("java.vm.name")); + } + + @Test + public void contextlessTest() { + + out.println(" - - - highLevelTest; contextless - - - "); + + CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); + + for (CLPlatform platform : clPlatforms) { + + out.println("platform info:"); + out.println(" name: "+platform.getName()); + out.println(" profile: "+platform.getProfile()); + out.println(" version: "+platform.getVersion()); + out.println(" vendor: "+platform.getVendor()); + + CLDevice[] clDevices = platform.listCLDevices(); + for (CLDevice device : clDevices) { + out.println("device info:"); + out.println(" name: "+device.getName()); + out.println(" profile: "+device.getProfile()); + out.println(" vendor: "+device.getVendor()); + out.println(" type: "+device.getType()); + out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); + out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); + out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); + out.println(" max work group size: "+device.getMaxWorkGroupSize()); + out.println(" max compute units: "+device.getMaxComputeUnits()); + out.println(" extensions: "+device.getExtensions()); + } + } + + } + + @Test + public void vectorAddGMTest() throws IOException { + + out.println(" - - - highLevelTest; global memory kernel - - - "); + + CLContext context = CLContext.create(); + + CLDevice[] contextDevices = context.getCLDevices(); + + out.println("context devices:"); + for (CLDevice device : contextDevices) { + out.println(" "+device.toString()); + } + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + CLDevice[] programDevices = program.getCLDevices(); + + assertEquals(contextDevices.length, programDevices.length); + + out.println("program devices:"); + for (CLDevice device : programDevices) { + out.println(" "+device.toString()); + out.println(" build log: "+program.getBuildLog(device)); + out.println(" build status: "+program.getBuildStatus(device)); + } + + String source = program.getSource(); + assertFalse(source.trim().isEmpty()); +// out.println("source:\n"+source); + + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) + int localWorkSize = 256; // set and log Global and Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize + + out.println("allocateing buffers of size: "+globalWorkSize); + + ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + + fillBuffer(srcA, 23456); + fillBuffer(srcB, 46987); + + CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA); + CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB); + CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest); + + Map<String, CLKernel> kernels = program.getCLKernels(); + for (CLKernel kernel : kernels.values()) { + out.println("kernel: "+kernel.toString()); + } + + assertNotNull(kernels.get("VectorAddGM")); + assertNotNull(kernels.get("Test")); + + CLKernel vectorAddKernel = kernels.get("VectorAddGM"); + + vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA) + .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB) + .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC) + .setArg(3, BufferFactory.SIZEOF_INT, elementCount); + + CLCommandQueue queue = programDevices[0].createCommandQueue(); + + // Asynchronous write of data to GPU device, blocking read later + queue.putWriteBuffer(clBufferA, false) + .putWriteBuffer(clBufferB, false) + .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) + .putReadBuffer(clBufferC, true).release(); + + out.println("a+b=c result snapshot: "); + for(int i = 0; i < 10; i++) + out.print(dest.getInt()+", "); + out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more"); + + assertTrue(3 == context.getCLBuffers().size()); + clBufferA.release(); + assertTrue(2 == context.getCLBuffers().size()); + + assertTrue(2 == context.getCLBuffers().size()); + clBufferB.release(); + assertTrue(1 == context.getCLBuffers().size()); + + assertTrue(1 == context.getCLBuffers().size()); + clBufferC.release(); + assertTrue(0 == context.getCLBuffers().size()); + + + assertTrue(1 == context.getCLPrograms().size()); + program.release(); + assertTrue(0 == context.getCLPrograms().size()); + +// CLDevice device = ctx.getMaxFlopsDevice(); +// out.println("max FLOPS device: " + device); + context.release(); + } + +} diff --git a/test/com/mbien/opencl/JOCLTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java index 224a6768..e5175129 100644 --- a/test/com/mbien/opencl/JOCLTest.java +++ b/test/com/mbien/opencl/LowLevelBindingTest.java @@ -4,18 +4,17 @@ import com.sun.gluegen.runtime.BufferFactory; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; -import java.util.Map; -import java.util.Random; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; import static java.lang.System.*; +import static com.mbien.opencl.TestUtils.*; /** - * Test for testing basic functionality. + * Test testing the low level bindings. * @author Michael Bien */ -public class JOCLTest { +public class LowLevelBindingTest { private final static String programSource = " // OpenCL Kernel Function for element by element vector addition \n" @@ -39,8 +38,6 @@ public class JOCLTest { + " c[iGID] = iGID; \n" + "} \n"; - public JOCLTest() { - } @BeforeClass public static void setUpClass() throws Exception { @@ -55,7 +52,7 @@ public class JOCLTest { int ret = CL.CL_SUCCESS; - CL cl = CLContext.getLowLevelBinding(); + CL cl = CLPlatform.getLowLevelBinding(); int[] intBuffer = new int[1]; // find all available OpenCL platforms @@ -134,7 +131,7 @@ public class JOCLTest { long[] longArray = new long[1]; ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder()); - CL cl = CLContext.getLowLevelBinding(); + CL cl = CLPlatform.getLowLevelBinding(); int ret = CL.CL_SUCCESS; int[] intArray = new int[1]; @@ -302,163 +299,10 @@ public class JOCLTest { } } - private void fillBuffer(ByteBuffer buffer, int seed) { - - Random rnd = new Random(seed); - - while(buffer.remaining() != 0) - buffer.putInt(rnd.nextInt()); - - buffer.rewind(); - } - private ByteBuffer wrap(long value) { return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); } - @Test - public void highLevelTest1() { - - out.println(" - - - highLevelTest; contextless - - - "); - - CLPlatform[] clPlatforms = CLContext.listCLPlatforms(); - - for (CLPlatform platform : clPlatforms) { - - out.println("platform info:"); - out.println(" name: "+platform.getName()); - out.println(" profile: "+platform.getProfile()); - out.println(" version: "+platform.getVersion()); - out.println(" vendor: "+platform.getVendor()); - - CLDevice[] clDevices = platform.listCLDevices(); - for (CLDevice device : clDevices) { - out.println("device info:"); - out.println(" name: "+device.getName()); - out.println(" profile: "+device.getProfile()); - out.println(" vendor: "+device.getVendor()); - out.println(" type: "+device.getType()); - out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); - out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); - out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); - out.println(" max work group size: "+device.getMaxWorkGroupSize()); - out.println(" max compute units: "+device.getMaxComputeUnits()); - out.println(" extensions: "+device.getExtensions()); - } - } - - - } - - - @Test - public void highLevelTest2() { - - out.println(" - - - highLevelTest - - - "); - - CLContext context = CLContext.create(); - - CLDevice[] contextDevices = context.getCLDevices(); - - out.println("context devices:"); - for (CLDevice device : contextDevices) { - out.println(" "+device.toString()); - } - - CLProgram program = context.createProgram(programSource).build(); - - CLDevice[] programDevices = program.getCLDevices(); - - assertEquals(contextDevices.length, programDevices.length); - - out.println("program devices:"); - for (CLDevice device : programDevices) { - out.println(" "+device.toString()); - out.println(" build log: "+program.getBuildLog(device)); - out.println(" build status: "+program.getBuildStatus(device)); - } - - String source = program.getSource(); - assertFalse(source.trim().isEmpty()); -// out.println("source:\n"+source); - - int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) - int localWorkSize = 256; // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - - fillBuffer(srcA, 23456); - fillBuffer(srcB, 46987); - - CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA); - CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB); - CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest); - - Map<String, CLKernel> kernels = program.getCLKernels(); - for (CLKernel kernel : kernels.values()) { - out.println("kernel: "+kernel.toString()); - } - - assertNotNull(kernels.get("VectorAdd")); - assertNotNull(kernels.get("Test")); - - CLKernel vectorAddKernel = kernels.get("VectorAdd"); - - vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA) - .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB) - .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC) - .setArg(3, BufferFactory.SIZEOF_INT, elementCount); - - CLCommandQueue queue = programDevices[0].createCommandQueue(); - - // Asynchronous write of data to GPU device, blocking read later - queue.putWriteBuffer(clBufferA, false) - .putWriteBuffer(clBufferB, false) - .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) - .putReadBuffer(clBufferC, true).release(); - - out.println("a+b=c result snapshot: "); - for(int i = 0; i < 10; i++) - out.print(dest.getInt()+", "); - out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more"); - - assertTrue(3 == context.getCLBuffers().size()); - clBufferA.release(); - assertTrue(2 == context.getCLBuffers().size()); - - assertTrue(2 == context.getCLBuffers().size()); - clBufferB.release(); - assertTrue(1 == context.getCLBuffers().size()); - - assertTrue(1 == context.getCLBuffers().size()); - clBufferC.release(); - assertTrue(0 == context.getCLBuffers().size()); - - - assertTrue(1 == context.getCLPrograms().size()); - program.release(); - assertTrue(0 == context.getCLPrograms().size()); - -// CLDevice device = ctx.getMaxFlopsDevice(); -// out.println("max FLOPS device: " + device); - context.release(); - } - - - private final int roundUp(int groupSize, int globalSize) { - int r = globalSize % groupSize; - if (r == 0) { - return globalSize; - } else { - return globalSize + groupSize - r; - } - } - private final void checkForError(int ret) { this.checkError("", ret); } diff --git a/test/com/mbien/opencl/TestUtils.java b/test/com/mbien/opencl/TestUtils.java new file mode 100644 index 00000000..70bade8a --- /dev/null +++ b/test/com/mbien/opencl/TestUtils.java @@ -0,0 +1,29 @@ +package com.mbien.opencl; + +import java.nio.ByteBuffer; +import java.util.Random; + +/** + * @author Michael Bien + */ +public class TestUtils { + + public static final void fillBuffer(ByteBuffer buffer, int seed) { + + Random rnd = new Random(seed); + + while(buffer.remaining() != 0) + buffer.putInt(rnd.nextInt()); + + buffer.rewind(); + } + + public static final int roundUp(int groupSize, int globalSize) { + int r = globalSize % groupSize; + if (r == 0) { + return globalSize; + } else { + return globalSize + groupSize - r; + } + } +} diff --git a/test/com/mbien/opencl/testkernels.cl b/test/com/mbien/opencl/testkernels.cl new file mode 100644 index 00000000..0790cb32 --- /dev/null +++ b/test/com/mbien/opencl/testkernels.cl @@ -0,0 +1,22 @@ + + // OpenCL Kernel Function for element by element vector addition + __kernel void VectorAddGM(__global const int* a, __global const int* b, __global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + // add the vector elements + c[iGID] = a[iGID] + b[iGID]; + } + + __kernel void Test(__global const int* a, __global const int* b, __global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + c[iGID] = iGID; + } |