diff options
Diffstat (limited to 'test/com/mbien')
-rw-r--r-- | test/com/mbien/opencl/HighLevelBindingTest.java | 156 | ||||
-rw-r--r-- | test/com/mbien/opencl/LowLevelBindingTest.java (renamed from test/com/mbien/opencl/JOCLTest.java) | 166 | ||||
-rw-r--r-- | test/com/mbien/opencl/TestUtils.java | 29 | ||||
-rw-r--r-- | test/com/mbien/opencl/testkernels.cl | 22 |
4 files changed, 212 insertions, 161 deletions
diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java new file mode 100644 index 00000000..a2bfce91 --- /dev/null +++ b/test/com/mbien/opencl/HighLevelBindingTest.java @@ -0,0 +1,156 @@ +package com.mbien.opencl; + +import com.sun.gluegen.runtime.BufferFactory; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Map; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.mbien.opencl.TestUtils.*; + +/** + * Test testing the high level bindings. + * @author Michael Bien + */ +public class HighLevelBindingTest { + + @BeforeClass + public static void setUpClass() throws Exception { + out.println("OS: " + System.getProperty("os.name")); + out.println("VM: " + System.getProperty("java.vm.name")); + } + + @Test + public void contextlessTest() { + + out.println(" - - - highLevelTest; contextless - - - "); + + CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); + + for (CLPlatform platform : clPlatforms) { + + out.println("platform info:"); + out.println(" name: "+platform.getName()); + out.println(" profile: "+platform.getProfile()); + out.println(" version: "+platform.getVersion()); + out.println(" vendor: "+platform.getVendor()); + + CLDevice[] clDevices = platform.listCLDevices(); + for (CLDevice device : clDevices) { + out.println("device info:"); + out.println(" name: "+device.getName()); + out.println(" profile: "+device.getProfile()); + out.println(" vendor: "+device.getVendor()); + out.println(" type: "+device.getType()); + out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); + out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); + out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); + out.println(" max work group size: "+device.getMaxWorkGroupSize()); + out.println(" max compute units: "+device.getMaxComputeUnits()); + out.println(" extensions: "+device.getExtensions()); + } + } + + } + + @Test + public void vectorAddGMTest() throws IOException { + + out.println(" - - - highLevelTest; global memory kernel - - - "); + + CLContext context = CLContext.create(); + + CLDevice[] contextDevices = context.getCLDevices(); + + out.println("context devices:"); + for (CLDevice device : contextDevices) { + out.println(" "+device.toString()); + } + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + CLDevice[] programDevices = program.getCLDevices(); + + assertEquals(contextDevices.length, programDevices.length); + + out.println("program devices:"); + for (CLDevice device : programDevices) { + out.println(" "+device.toString()); + out.println(" build log: "+program.getBuildLog(device)); + out.println(" build status: "+program.getBuildStatus(device)); + } + + String source = program.getSource(); + assertFalse(source.trim().isEmpty()); +// out.println("source:\n"+source); + + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) + int localWorkSize = 256; // set and log Global and Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize + + out.println("allocateing buffers of size: "+globalWorkSize); + + ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); + + fillBuffer(srcA, 23456); + fillBuffer(srcB, 46987); + + CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA); + CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB); + CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest); + + Map<String, CLKernel> kernels = program.getCLKernels(); + for (CLKernel kernel : kernels.values()) { + out.println("kernel: "+kernel.toString()); + } + + assertNotNull(kernels.get("VectorAddGM")); + assertNotNull(kernels.get("Test")); + + CLKernel vectorAddKernel = kernels.get("VectorAddGM"); + + vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA) + .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB) + .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC) + .setArg(3, BufferFactory.SIZEOF_INT, elementCount); + + CLCommandQueue queue = programDevices[0].createCommandQueue(); + + // Asynchronous write of data to GPU device, blocking read later + queue.putWriteBuffer(clBufferA, false) + .putWriteBuffer(clBufferB, false) + .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) + .putReadBuffer(clBufferC, true).release(); + + out.println("a+b=c result snapshot: "); + for(int i = 0; i < 10; i++) + out.print(dest.getInt()+", "); + out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more"); + + assertTrue(3 == context.getCLBuffers().size()); + clBufferA.release(); + assertTrue(2 == context.getCLBuffers().size()); + + assertTrue(2 == context.getCLBuffers().size()); + clBufferB.release(); + assertTrue(1 == context.getCLBuffers().size()); + + assertTrue(1 == context.getCLBuffers().size()); + clBufferC.release(); + assertTrue(0 == context.getCLBuffers().size()); + + + assertTrue(1 == context.getCLPrograms().size()); + program.release(); + assertTrue(0 == context.getCLPrograms().size()); + +// CLDevice device = ctx.getMaxFlopsDevice(); +// out.println("max FLOPS device: " + device); + context.release(); + } + +} diff --git a/test/com/mbien/opencl/JOCLTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java index 224a6768..e5175129 100644 --- a/test/com/mbien/opencl/JOCLTest.java +++ b/test/com/mbien/opencl/LowLevelBindingTest.java @@ -4,18 +4,17 @@ import com.sun.gluegen.runtime.BufferFactory; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Arrays; -import java.util.Map; -import java.util.Random; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.*; import static java.lang.System.*; +import static com.mbien.opencl.TestUtils.*; /** - * Test for testing basic functionality. + * Test testing the low level bindings. * @author Michael Bien */ -public class JOCLTest { +public class LowLevelBindingTest { private final static String programSource = " // OpenCL Kernel Function for element by element vector addition \n" @@ -39,8 +38,6 @@ public class JOCLTest { + " c[iGID] = iGID; \n" + "} \n"; - public JOCLTest() { - } @BeforeClass public static void setUpClass() throws Exception { @@ -55,7 +52,7 @@ public class JOCLTest { int ret = CL.CL_SUCCESS; - CL cl = CLContext.getLowLevelBinding(); + CL cl = CLPlatform.getLowLevelBinding(); int[] intBuffer = new int[1]; // find all available OpenCL platforms @@ -134,7 +131,7 @@ public class JOCLTest { long[] longArray = new long[1]; ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder()); - CL cl = CLContext.getLowLevelBinding(); + CL cl = CLPlatform.getLowLevelBinding(); int ret = CL.CL_SUCCESS; int[] intArray = new int[1]; @@ -302,163 +299,10 @@ public class JOCLTest { } } - private void fillBuffer(ByteBuffer buffer, int seed) { - - Random rnd = new Random(seed); - - while(buffer.remaining() != 0) - buffer.putInt(rnd.nextInt()); - - buffer.rewind(); - } - private ByteBuffer wrap(long value) { return (ByteBuffer) BufferFactory.newDirectByteBuffer(8).putLong(value).rewind(); } - @Test - public void highLevelTest1() { - - out.println(" - - - highLevelTest; contextless - - - "); - - CLPlatform[] clPlatforms = CLContext.listCLPlatforms(); - - for (CLPlatform platform : clPlatforms) { - - out.println("platform info:"); - out.println(" name: "+platform.getName()); - out.println(" profile: "+platform.getProfile()); - out.println(" version: "+platform.getVersion()); - out.println(" vendor: "+platform.getVendor()); - - CLDevice[] clDevices = platform.listCLDevices(); - for (CLDevice device : clDevices) { - out.println("device info:"); - out.println(" name: "+device.getName()); - out.println(" profile: "+device.getProfile()); - out.println(" vendor: "+device.getVendor()); - out.println(" type: "+device.getType()); - out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); - out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); - out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); - out.println(" max work group size: "+device.getMaxWorkGroupSize()); - out.println(" max compute units: "+device.getMaxComputeUnits()); - out.println(" extensions: "+device.getExtensions()); - } - } - - - } - - - @Test - public void highLevelTest2() { - - out.println(" - - - highLevelTest - - - "); - - CLContext context = CLContext.create(); - - CLDevice[] contextDevices = context.getCLDevices(); - - out.println("context devices:"); - for (CLDevice device : contextDevices) { - out.println(" "+device.toString()); - } - - CLProgram program = context.createProgram(programSource).build(); - - CLDevice[] programDevices = program.getCLDevices(); - - assertEquals(contextDevices.length, programDevices.length); - - out.println("program devices:"); - for (CLDevice device : programDevices) { - out.println(" "+device.toString()); - out.println(" build log: "+program.getBuildLog(device)); - out.println(" build status: "+program.getBuildStatus(device)); - } - - String source = program.getSource(); - assertFalse(source.trim().isEmpty()); -// out.println("source:\n"+source); - - int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) - int localWorkSize = 256; // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - ByteBuffer srcB = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - ByteBuffer dest = BufferFactory.newDirectByteBuffer(globalWorkSize*BufferFactory.SIZEOF_INT); - - fillBuffer(srcA, 23456); - fillBuffer(srcB, 46987); - - CLBuffer clBufferA = context.createBuffer(CL.CL_MEM_READ_ONLY, srcA); - CLBuffer clBufferB = context.createBuffer(CL.CL_MEM_READ_ONLY, srcB); - CLBuffer clBufferC = context.createBuffer(CL.CL_MEM_WRITE_ONLY, dest); - - Map<String, CLKernel> kernels = program.getCLKernels(); - for (CLKernel kernel : kernels.values()) { - out.println("kernel: "+kernel.toString()); - } - - assertNotNull(kernels.get("VectorAdd")); - assertNotNull(kernels.get("Test")); - - CLKernel vectorAddKernel = kernels.get("VectorAdd"); - - vectorAddKernel.setArg(0, BufferFactory.SIZEOF_LONG, clBufferA) - .setArg(1, BufferFactory.SIZEOF_LONG, clBufferB) - .setArg(2, BufferFactory.SIZEOF_LONG, clBufferC) - .setArg(3, BufferFactory.SIZEOF_INT, elementCount); - - CLCommandQueue queue = programDevices[0].createCommandQueue(); - - // Asynchronous write of data to GPU device, blocking read later - queue.putWriteBuffer(clBufferA, false) - .putWriteBuffer(clBufferB, false) - .putNDRangeKernel(vectorAddKernel, 1, null, new long[]{ globalWorkSize }, new long[]{ localWorkSize }) - .putReadBuffer(clBufferC, true).release(); - - out.println("a+b=c result snapshot: "); - for(int i = 0; i < 10; i++) - out.print(dest.getInt()+", "); - out.println("...; "+dest.remaining()/BufferFactory.SIZEOF_INT + " more"); - - assertTrue(3 == context.getCLBuffers().size()); - clBufferA.release(); - assertTrue(2 == context.getCLBuffers().size()); - - assertTrue(2 == context.getCLBuffers().size()); - clBufferB.release(); - assertTrue(1 == context.getCLBuffers().size()); - - assertTrue(1 == context.getCLBuffers().size()); - clBufferC.release(); - assertTrue(0 == context.getCLBuffers().size()); - - - assertTrue(1 == context.getCLPrograms().size()); - program.release(); - assertTrue(0 == context.getCLPrograms().size()); - -// CLDevice device = ctx.getMaxFlopsDevice(); -// out.println("max FLOPS device: " + device); - context.release(); - } - - - private final int roundUp(int groupSize, int globalSize) { - int r = globalSize % groupSize; - if (r == 0) { - return globalSize; - } else { - return globalSize + groupSize - r; - } - } - private final void checkForError(int ret) { this.checkError("", ret); } diff --git a/test/com/mbien/opencl/TestUtils.java b/test/com/mbien/opencl/TestUtils.java new file mode 100644 index 00000000..70bade8a --- /dev/null +++ b/test/com/mbien/opencl/TestUtils.java @@ -0,0 +1,29 @@ +package com.mbien.opencl; + +import java.nio.ByteBuffer; +import java.util.Random; + +/** + * @author Michael Bien + */ +public class TestUtils { + + public static final void fillBuffer(ByteBuffer buffer, int seed) { + + Random rnd = new Random(seed); + + while(buffer.remaining() != 0) + buffer.putInt(rnd.nextInt()); + + buffer.rewind(); + } + + public static final int roundUp(int groupSize, int globalSize) { + int r = globalSize % groupSize; + if (r == 0) { + return globalSize; + } else { + return globalSize + groupSize - r; + } + } +} diff --git a/test/com/mbien/opencl/testkernels.cl b/test/com/mbien/opencl/testkernels.cl new file mode 100644 index 00000000..0790cb32 --- /dev/null +++ b/test/com/mbien/opencl/testkernels.cl @@ -0,0 +1,22 @@ + + // OpenCL Kernel Function for element by element vector addition + __kernel void VectorAddGM(__global const int* a, __global const int* b, __global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + // add the vector elements + c[iGID] = a[iGID] + b[iGID]; + } + + __kernel void Test(__global const int* a, __global const int* b, __global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + c[iGID] = iGID; + } |