From bf07b44ed6a8958dd321cc4c08fd2bdd08299611 Mon Sep 17 00:00:00 2001 From: Michael Bien Date: Mon, 12 Apr 2010 22:18:39 +0200 Subject: renamed package com.mbien.* in com.jogamp.* JOCL is now officially a JogAmp team player ;). --- test/com/jogamp/opencl/CLBufferTest.java | 149 +++++++++ test/com/jogamp/opencl/CLCommandQueueTest.java | 266 ++++++++++++++++ test/com/jogamp/opencl/CLProgramTest.java | 224 ++++++++++++++ test/com/jogamp/opencl/HighLevelBindingTest.java | 305 +++++++++++++++++++ test/com/jogamp/opencl/LowLevelBindingTest.java | 364 ++++++++++++++++++++++ test/com/jogamp/opencl/TestUtils.java | 52 ++++ test/com/jogamp/opencl/testkernels.cl | 22 ++ test/com/mbien/opencl/CLBufferTest.java | 149 --------- test/com/mbien/opencl/CLCommandQueueTest.java | 266 ---------------- test/com/mbien/opencl/CLProgramTest.java | 224 -------------- test/com/mbien/opencl/HighLevelBindingTest.java | 305 ------------------- test/com/mbien/opencl/LowLevelBindingTest.java | 368 ----------------------- test/com/mbien/opencl/TestUtils.java | 52 ---- test/com/mbien/opencl/testkernels.cl | 22 -- 14 files changed, 1382 insertions(+), 1386 deletions(-) create mode 100644 test/com/jogamp/opencl/CLBufferTest.java create mode 100644 test/com/jogamp/opencl/CLCommandQueueTest.java create mode 100644 test/com/jogamp/opencl/CLProgramTest.java create mode 100644 test/com/jogamp/opencl/HighLevelBindingTest.java create mode 100644 test/com/jogamp/opencl/LowLevelBindingTest.java create mode 100644 test/com/jogamp/opencl/TestUtils.java create mode 100644 test/com/jogamp/opencl/testkernels.cl delete mode 100644 test/com/mbien/opencl/CLBufferTest.java delete mode 100644 test/com/mbien/opencl/CLCommandQueueTest.java delete mode 100644 test/com/mbien/opencl/CLProgramTest.java delete mode 100644 test/com/mbien/opencl/HighLevelBindingTest.java delete mode 100644 test/com/mbien/opencl/LowLevelBindingTest.java delete mode 100644 test/com/mbien/opencl/TestUtils.java delete mode 100644 test/com/mbien/opencl/testkernels.cl (limited to 'test/com') diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java new file mode 100644 index 00000000..d0c8c2f9 --- /dev/null +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -0,0 +1,149 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.CLMemory.Mem; +import com.jogamp.opencl.CLMemory.Map; +import com.jogamp.common.nio.Buffers; +import java.nio.ByteBuffer; +import org.junit.Test; + +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.jogamp.opencl.TestUtils.*; +import static com.jogamp.common.nio.Buffers.*; + +/** + * + * @author Michael Bien + */ +public class CLBufferTest { + + @Test + public void writeCopyReadBufferTest() { + + out.println(" - - - highLevelTest; copy buffer test - - - "); + + final int elements = NUM_ELEMENTS; + + CLContext context = CLContext.create(); + + // the CL.MEM_* flag is probably completely irrelevant in our case since we do not use a kernel in this test + CLBuffer clBufferA = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferB = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + // fill only first read buffer -> we will copy the payload to the second later. + fillBuffer(clBufferA.buffer, 12345); + + CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + // asynchronous write of data to GPU device, blocking read later to get the computed results back. + queue.putWriteBuffer(clBufferA, false) // write A + .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B + .putReadBuffer(clBufferB, true) // read B + .finish(); + + context.release(); + + out.println("validating computed results..."); + checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); + out.println("results are valid"); + + } + + @Test + public void bufferWithHostPointerTest() { + + out.println(" - - - highLevelTest; host pointer test - - - "); + + final int elements = NUM_ELEMENTS; + + CLContext context = CLContext.create(); + + ByteBuffer buffer = Buffers.newDirectByteBuffer(elements*SIZEOF_INT); + // fill only first read buffer -> we will copy the payload to the second later. + fillBuffer(buffer, 12345); + + CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + Mem[] bufferConfig = new Mem[] {Mem.COPY_BUFFER, Mem.USE_BUFFER}; + + for(int i = 0; i < bufferConfig.length; i++) { + + out.println("testing with "+bufferConfig[i] + " config"); + + CLBuffer clBufferA = context.createBuffer(buffer, Mem.READ_ONLY, bufferConfig[i]); + CLBuffer clBufferB = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); + + // asynchronous write of data to GPU device, blocking read later to get the computed results back. + queue.putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B + .putReadBuffer(clBufferB, true) // read B + .finish(); + + assertEquals(2, context.getMemoryObjects().size()); + clBufferA.release(); + assertEquals(1, context.getMemoryObjects().size()); + clBufferB.release(); + assertEquals(0, context.getMemoryObjects().size()); + + // uploading worked when a==b. + out.println("validating computed results..."); + checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); + out.println("results are valid"); + } + + context.release(); + } + + @Test + public void mapBufferTest() { + + out.println(" - - - highLevelTest; map buffer test - - - "); + + final int elements = NUM_ELEMENTS; + final int sizeInBytes = elements*SIZEOF_INT; + + CLContext context; + CLBuffer clBufferA; + CLBuffer clBufferB; + + // We will have to allocate mappable NIO memory on non CPU contexts + // since we can't map e.g GPU memory. + if(CLPlatform.getDefault().listCLDevices(CLDevice.Type.CPU).length > 0) { + + context = CLContext.create(CLDevice.Type.CPU); + + clBufferA = context.createBuffer(sizeInBytes, Mem.READ_WRITE); + clBufferB = context.createBuffer(sizeInBytes, Mem.READ_WRITE); + }else{ + + context = CLContext.create(); + + clBufferA = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); + clBufferB = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); + } + + CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + // fill only first buffer -> we will copy the payload to the second later. + ByteBuffer mappedBufferA = queue.putMapBuffer(clBufferA, Map.READ_WRITE, true); + assertEquals(sizeInBytes, mappedBufferA.capacity()); + + fillBuffer(mappedBufferA, 12345); // write to A + + queue.putUnmapMemory(clBufferA) // unmap A + .putCopyBuffer(clBufferA, clBufferB); // copy A -> B + + // map B for read operations + ByteBuffer mappedBufferB = queue.putMapBuffer(clBufferB, Map.READ, true); + assertEquals(sizeInBytes, mappedBufferB.capacity()); + + out.println("validating computed results..."); + checkIfEqual(mappedBufferA, mappedBufferB, elements); // A == B ? + out.println("results are valid"); + + queue.putUnmapMemory(clBufferB); // unmap B + + context.release(); + + } + +} diff --git a/test/com/jogamp/opencl/CLCommandQueueTest.java b/test/com/jogamp/opencl/CLCommandQueueTest.java new file mode 100644 index 00000000..a5d7afb1 --- /dev/null +++ b/test/com/jogamp/opencl/CLCommandQueueTest.java @@ -0,0 +1,266 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.util.MultiQueueBarrier; +import com.jogamp.opencl.CLCommandQueue.Mode; +import com.jogamp.opencl.CLMemory.Mem; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.EnumSet; +import org.junit.Test; + +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.jogamp.opencl.TestUtils.*; +import static com.jogamp.opencl.CLEvent.*; +import static com.jogamp.common.nio.Buffers.*; + +/** + * + * @author Michael Bien + */ +public class CLCommandQueueTest { + + private final int groupSize = 256; + + @Test + public void enumsTest() { + + //CLCommandQueueEnums + EnumSet queueMode = Mode.valuesOf(CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL.CL_QUEUE_PROFILING_ENABLE); + assertTrue(queueMode.contains(Mode.OUT_OF_ORDER_MODE)); + assertTrue(queueMode.contains(Mode.PROFILING_MODE)); + + assertNotNull(Mode.valuesOf(0)); + assertEquals(0, Mode.valuesOf(0).size()); + for (Mode mode : Mode.values()) { + assertEquals(mode, Mode.valueOf(mode.QUEUE_MODE)); + } + + // CLEvent enums + for (ProfilingCommand cmd : ProfilingCommand.values()) { + assertEquals(cmd, ProfilingCommand.valueOf(cmd.COMMAND)); + } + + for (CommandType type : CommandType.values()) { + assertEquals(type, CommandType.valueOf(type.TYPE)); + } + + for (ExecutionStatus status : ExecutionStatus.values()) { + assertEquals(status, ExecutionStatus.valueOf(status.STATUS)); + } + + } + + @Test + public void eventsTest() throws IOException { + + out.println(" - - - event synchronization test - - - "); + + final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer + + CLContext context = CLContext.create(); + + CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + + fillBuffer(clBufferA.buffer, 12345); + fillBuffer(clBufferB.buffer, 67890); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); + CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + final CLEventList events = new CLEventList(2); + + assertEquals(0, events.size()); + + queue.putWriteBuffer(clBufferA, false, events) // write A + .putWriteBuffer(clBufferB, false, events);// write B + + assertEquals(2, events.size()); + queue.putWaitForEvents(events, true); + + events.release(); + assertEquals(0, events.size()); + + vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B + queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); + + vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B + queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); + + assertEquals(2, events.size()); + queue.putWaitForEvent(events, 0, false) + .putWaitForEvent(events, 1, true); + + queue.putReadBuffer(clBufferC, false) + .putReadBuffer(clBufferD, true); + + events.release(); + + checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); + + + context.release(); + + + out.println("results are valid"); + + } + @Test + public void profilingEventsTest() throws IOException { + + out.println(" - - - event synchronization test - - - "); + + final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer + + CLContext context = CLContext.create(); + + CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + + fillBuffer(clBufferA.buffer, 12345); + fillBuffer(clBufferB.buffer, 67890); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); + CLCommandQueue queue = context.getDevices()[0].createCommandQueue(Mode.PROFILING_MODE); + + queue.putWriteBuffer(clBufferA, true) // write A + .putWriteBuffer(clBufferB, true);// write B + + final CLEventList events = new CLEventList(1); + + assertEquals(0, events.size()); + + vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B + queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); + + assertEquals(1, events.size()); + CLEvent probe = events.getEvent(0); + out.println(probe); + + queue.putWaitForEvents(events, true); + assertEquals(CLEvent.ExecutionStatus.COMPLETE, probe.getStatus()); + + out.println(probe); + long time = probe.getProfilingInfo(CLEvent.ProfilingCommand.END) + - probe.getProfilingInfo(CLEvent.ProfilingCommand.START); + out.println("time: "+time); + assertTrue(time > 0); + + events.release(); + context.release(); + + } + + @Test + public void concurrencyTest() throws IOException, InterruptedException { + + out.println(" - - - QueueBarrier test - - - "); + + final int elements = ONE_MB / SIZEOF_INT * 10; // 20MB per buffer + + CLContext context = CLContext.create(); + + CLDevice[] devices = context.getDevices(); + + if (devices.length < 2) { + out.println("aborting test... need at least 2 devices"); + context.release(); + return; + } + + final CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + + final CLBuffer clBufferA1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer clBufferB1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer clBufferA2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + final CLBuffer clBufferB2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + final CLKernel vectorAddKernel1 = program.createCLKernel("VectorAddGM").setArg(3, elements); + final CLKernel vectorAddKernel2 = program.createCLKernel("VectorAddGM").setArg(3, elements); + + int secondDevice = devices.length > 1 ? 1 : 0; + + final CLCommandQueue queue1 = devices[0 ].createCommandQueue(); + final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue(); + + fillBuffer(clBufferC.buffer, 12345); + + if (secondDevice > 0) { + System.out.println("using two devices"); + } + + final MultiQueueBarrier barrier = new MultiQueueBarrier(2); + + Thread thread1 = new Thread("C") { + + @Override + public void run() { + + fillBuffer(clBufferA1.buffer, 12345); + fillBuffer(clBufferB1.buffer, 67890); + +// System.out.println("C buffer"); + queue1.putWriteBuffer(clBufferA1, false) // write A + .putWriteBuffer(clBufferB1, false); // write B + +// System.out.println("C args"); + vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B + +// System.out.println("C kernels"); + CLEventList events1 = new CLEventList(2); + queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, groupSize, events1) + .putReadBuffer(clBufferC, false, events1); + + barrier.waitFor(queue1, events1); + + } + }; + + Thread thread2 = new Thread("D") { + + @Override + public void run() { + + fillBuffer(clBufferA2.buffer, 12345); + fillBuffer(clBufferB2.buffer, 67890); + +// System.out.println("D buffer"); + queue2.putWriteBuffer(clBufferA2, false) // write A + .putWriteBuffer(clBufferB2, false); // write B + +// System.out.println("D args"); + vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B + +// System.out.println("D kernels"); + CLEventList events2 = new CLEventList(2); + queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, groupSize, events2) + .putReadBuffer(clBufferD, false, events2); + + barrier.waitFor(queue2, events2); + + } + }; + + out.println("starting threads"); + thread1.start(); + thread2.start(); + barrier.await(); + out.println("done"); + + checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); + + context.release(); + + out.println("results are valid"); + + } +} diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java new file mode 100644 index 00000000..8b5d4362 --- /dev/null +++ b/test/com/jogamp/opencl/CLProgramTest.java @@ -0,0 +1,224 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.util.CLBuildConfiguration; +import com.jogamp.opencl.util.CLProgramConfiguration; +import com.jogamp.opencl.CLProgram.Status; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.Map; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.jogamp.opencl.CLProgram.CompilerOptions.*; + +/** + * + * @author Michael Bien + */ +public class CLProgramTest { + + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + + @Test + public void enumsTest() { + + // CLProgram enums + for (Status e : Status.values()) { + assertEquals(e, Status.valueOf(e.STATUS)); + } + } + + @Test + public void rebuildProgramTest() throws IOException { + + out.println(" - - - CLProgramTest; rebuild program test - - - "); + + CLContext context = CLContext.create(); + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")); + + try{ + program.createCLKernels(); + fail("expected exception but got none :("); + }catch(CLException ex) { + out.println("got expected exception: "+ex.getCLErrorString()); + assertEquals(ex.errorcode, CL.CL_INVALID_PROGRAM_EXECUTABLE); + } + + out.println(program.getBuildStatus()); + program.build(); + out.println(program.getBuildStatus()); + + assertTrue(program.isExecutable()); + + Map kernels = program.createCLKernels(); + assertNotNull(kernels); + assertTrue("kernel map is empty", kernels.size() > 0); + + // rebuild + // 1. release kernels (internally) + // 2. build program + program.build(); + assertTrue(program.isExecutable()); + out.println(program.getBuildStatus()); + + // try again with rebuilt program + kernels = program.createCLKernels(); + assertNotNull(kernels); + assertTrue("kernel map is empty", kernels.size() > 0); + assertTrue(kernels.size() > 0); + + context.release(); + } + + @Test + public void programBinariesTest() throws IOException { + + out.println(" - - - CLProgramTest; down-/upload binaries test - - - "); + + CLContext context = CLContext.create(); + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")) + .build(ENABLE_MAD, WARNINGS_ARE_ERRORS); + + // optain binaries + Map binaries = program.getBinaries(); + assertFalse(binaries.isEmpty()); + + CLDevice[] devices = program.getCLDevices(); + for (CLDevice device : devices) { + assertTrue(binaries.containsKey(device)); + } + + // 1. release program + // 2. re-create program with old binaries + program.release(); + + assertFalse(program.isExecutable()); + + assertNotNull(program.getBinaries()); + assertEquals(program.getBinaries().size(), 0); + + assertNotNull(program.getBuildLog()); + assertEquals(program.getBuildLog().length(), 0); + + assertNotNull(program.getSource()); + assertEquals(program.getSource().length(), 0); + + assertNotNull(program.getCLDevices()); + assertEquals(program.getCLDevices().length, 0); + + { + Map kernels = program.createCLKernels(); + assertNotNull(kernels); + assertEquals(kernels.size(), 0); + } + assertNull(program.createCLKernel("foo")); + + program = context.createProgram(binaries); + + assertFalse(program.isExecutable()); + + assertNotNull(program.getCLDevices()); + assertTrue(program.getCLDevices().length != 0); + + assertNotNull(program.getBinaries()); + assertEquals(program.getBinaries().size(), 0); + + assertNotNull(program.getBuildLog()); + assertTrue(program.getBuildLog().length() != 0); + + assertNotNull(program.getSource()); + assertEquals(program.getSource().length(), 0); + + try{ + Map kernels = program.createCLKernels(); + fail("expected an exception from createCLKernels but got: "+kernels); + }catch(CLException ex) { + // expected, not build yet + } + + out.println(program.getBuildStatus()); + program.build(); + out.println(program.getBuildStatus()); + + assertNotNull(program.createCLKernel("Test")); + + assertTrue(program.isExecutable()); + + } + + @Test + public void builderTest() throws IOException, ClassNotFoundException { + out.println(" - - - CLProgramTest; program builder test - - - "); + + CLContext context = CLContext.create(); + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")); + + // same as program.build() + program.prepare().build(); + + assertTrue(program.isExecutable()); + + + // complex build + program.prepare().withOption(ENABLE_MAD) + .forDevice(context.getMaxFlopsDevice()) + .withDefine("RADIUS", 5) + .withDefine("ENABLE_FOOBAR") + .build(); + + assertTrue(program.isExecutable()); + + // reusable builder + CLBuildConfiguration builder = CLProgramBuilder.createConfiguration() + .withOption(ENABLE_MAD) + .forDevices(context.getDevices()) + .withDefine("RADIUS", 5) + .withDefine("ENABLE_FOOBAR"); + + out.println(builder); + + builder.setProgram(program).build(); + assertTrue(program.isExecutable()); + + // serialization test + File file = tmpFolder.newFile("foobar.builder"); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file)); + builder.save(oos); + oos.close(); + + // build configuration + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); + CLBuildConfiguration buildConfig = CLProgramBuilder.loadConfiguration(ois); + ois.close(); + + assertEquals(builder, buildConfig); + + buildConfig.build(program); + assertTrue(program.isExecutable()); + + // program configuration + ois = new ObjectInputStream(new FileInputStream(file)); + CLProgramConfiguration programConfig = CLProgramBuilder.loadConfiguration(ois, context); + assertNotNull(programConfig.getProgram()); + ois.close(); + program = programConfig.build(); + assertTrue(program.isExecutable()); + + + // cloneing + assertEquals(builder, builder.clone()); + + } + + + +} diff --git a/test/com/jogamp/opencl/HighLevelBindingTest.java b/test/com/jogamp/opencl/HighLevelBindingTest.java new file mode 100644 index 00000000..33cce0b5 --- /dev/null +++ b/test/com/jogamp/opencl/HighLevelBindingTest.java @@ -0,0 +1,305 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.CLMemory.Mem; +import com.jogamp.opencl.CLMemory.GLObjectType; +import com.jogamp.opencl.CLSampler.AddressingMode; +import com.jogamp.opencl.CLSampler.FilteringMode; +import com.jogamp.opencl.CLImageFormat.ChannelOrder; +import com.jogamp.opencl.CLImageFormat.ChannelType; +import com.jogamp.opencl.CLDevice.FPConfig; +import com.jogamp.opencl.CLDevice.GlobalMemCacheType; +import com.jogamp.opencl.CLDevice.LocalMemType; +import com.jogamp.opencl.CLDevice.Type; +import com.jogamp.opencl.CLDevice.Capabilities; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.Map; +import org.junit.BeforeClass; +import org.junit.Test; + +import static org.junit.Assert.*; +import static java.lang.System.*; +import static com.jogamp.opencl.TestUtils.*; +import static com.jogamp.common.nio.Buffers.*; + +/** + * Test testing the high level bindings. + * @author Michael Bien + */ +public class HighLevelBindingTest { + + @BeforeClass + public static void setUpClass() throws Exception { + out.println("OS: " + System.getProperty("os.name")); + out.println("ARCH: " + System.getProperty("os.arch")); + out.println("VM: " + System.getProperty("java.vm.name")); + out.println("lib path: " + System.getProperty("java.library.path")); + } + + @Test + public void enumsTest() { + + // enum tests + final EnumSet singleFPConfig = FPConfig.valuesOf(CL.CL_FP_DENORM | CL.CL_FP_ROUND_TO_INF); + assertEquals(0, FPConfig.valuesOf(0).size()); + assertTrue(singleFPConfig.contains(FPConfig.DENORM)); + assertTrue(singleFPConfig.contains(FPConfig.ROUND_TO_INF)); + + // CLDevice enums + for (FPConfig e : FPConfig.values()) { + EnumSet set = FPConfig.valuesOf(e.CONFIG); + assertTrue(set.contains(e)); + } + for (GlobalMemCacheType e : GlobalMemCacheType.values()) { + assertEquals(e, GlobalMemCacheType.valueOf(e.TYPE)); + } + for (LocalMemType e : LocalMemType.values()) { + assertEquals(e, LocalMemType.valueOf(e.TYPE)); + } + for (Type e : Type.values()) { + assertEquals(e, Type.valueOf(e.TYPE)); + } + for (Capabilities e : Capabilities.values()) { + assertEquals(e, Capabilities.valueOf(e.CAPS)); + } + + // CLMemory enums + for (Mem e : Mem.values()) { + assertEquals(e, Mem.valueOf(e.CONFIG)); + } + + for (GLObjectType e : GLObjectType.values()) { + assertEquals(e, GLObjectType.valueOf(e.TYPE)); + } + + // CLSampler enums + for (AddressingMode e : AddressingMode.values()) { + assertEquals(e, AddressingMode.valueOf(e.MODE)); + } + for (FilteringMode e : FilteringMode.values()) { + assertEquals(e, FilteringMode.valueOf(e.MODE)); + } + + // CLImage enums + for (ChannelOrder e : ChannelOrder.values()) { + assertEquals(e, ChannelOrder.valueOf(e.ORDER)); + } + for (ChannelType e : ChannelType.values()) { + assertEquals(e, ChannelType.valueOf(e.TYPE)); + } + + } + + + + @Test + public void contextlessTest() { + + out.println(" - - - highLevelTest; contextless - - - "); + + // platform/device info tests + CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); + + for (CLPlatform platform : clPlatforms) { + + out.println("platform info:"); + out.println(" name: "+platform.getName()); + out.println(" id: "+platform.ID); + out.println(" profile: "+platform.getProfile()); + out.println(" version: "+platform.getVersion()); + out.println(" vendor: "+platform.getVendor()); + out.println(" max FLOPS device: "+platform.getMaxFlopsDevice()); + out.println(" extensions: "+platform.getExtensions()); + + CLDevice[] clDevices = platform.listCLDevices(); + for (CLDevice device : clDevices) { + out.println("device info:"); + out.println(" name: "+device.getName()); + out.println(" profile: "+device.getProfile()); + out.println(" vendor: "+device.getVendor()); + out.println(" vendor id: "+device.getVendorID()); + out.println(" version: "+device.getVersion()); + out.println(" driver version: "+device.getDriverVersion()); + out.println(" type: "+device.getType()); + out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); + out.println(" max alloc mem: "+device.getMaxMemAllocSize()/(1024*1024)+" MB"); + out.println(" max param size: "+device.getMaxParameterSize()+" byte"); + out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); + out.println(" local mem type: "+device.getLocalMemType()); + out.println(" global mem cache size: "+device.getGlobalMemCacheSize()); + out.println(" global mem cacheline size: "+device.getGlobalMemCachelineSize()); + out.println(" global mem cache type: "+device.getGlobalMemCacheType()); + out.println(" constant buffer size: "+device.getMaxConstantBufferSize()); + out.println(" error correction support: "+device.isErrorCorrectionSupported()); + out.println(" queue properties: "+device.getQueueProperties()); + out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); + out.println(" timer res: "+device.getProfilingTimerResolution()+" ns"); + out.println(" max work group size: "+device.getMaxWorkGroupSize()); + out.println(" max compute units: "+device.getMaxComputeUnits()); + out.println(" max work item dimensions: "+device.getMaxWorkItemDimensions()); + out.println(" max work item sizes: "+Arrays.toString(device.getMaxWorkItemSizes())); + out.println(" compiler available: "+device.isCompilerAvailable()); + out.println(" image support: "+device.isImageSupportAvailable()); + out.println(" max read image args: "+device.getMaxReadImageArgs()); + out.println(" max write image args: "+device.getMaxWriteImageArgs()); + out.println(" max image2d dimensions: "+Arrays.asList(device.getMaxImage2dWidth(), device.getMaxImage2dHeight())); + out.println(" max image3d dimensions: "+Arrays.asList(device.getMaxImage2dWidth(), device.getMaxImage2dHeight(), device.getMaxImage3dDepth())); + out.println(" number of address bits: "+device.getAddressBits()); + out.println(" half FP available: "+device.isHalfFPAvailable()); + out.println(" double FP available: "+device.isDoubleFPAvailable()); + out.println(" little endian: "+device.isLittleEndian()); + out.println(" half FP config: "+device.getHalfFPConfig()); + out.println(" single FP config: "+device.getSingleFPConfig()); + out.println(" double FP config: "+device.getDoubleFPConfig()); + out.println(" execution capabilities: "+device.getExecutionCapabilities()); + out.println(" gl memory sharing: "+device.isGLMemorySharingSupported()); + out.println(" extensions: "+device.getExtensions()); + } + } + + } + + @Test + public void createContextTest() { + + out.println(" - - - highLevelTest; create context - - - "); + + CLPlatform platform = CLPlatform.getDefault(); + int deviceCount = platform.listCLDevices().length; + CLDevice firstDevice = platform.listCLDevices()[0]; + + CLContext c = CLContext.create(); + assertNotNull(c); + assertEquals(deviceCount, c.getDevices().length); + c.release(); + + c = CLContext.create(platform); + assertNotNull(c); + assertEquals(deviceCount, c.getDevices().length); + c.release(); + + c = CLContext.create(firstDevice); + assertNotNull(c); + assertEquals(1, c.getDevices().length); + c.release(); + + c = CLContext.create(CLDevice.Type.ALL); + assertNotNull(c); + assertEquals(deviceCount, c.getDevices().length); + c.release(); + + c = CLContext.create(platform, firstDevice); + assertNotNull(c); + assertEquals(1, c.getDevices().length); + c.release(); + + c = CLContext.create(platform, CLDevice.Type.ALL); + assertNotNull(c); + assertEquals(deviceCount, c.getDevices().length); + c.release(); + + } + + @Test + public void vectorAddGMTest() throws IOException { + + out.println(" - - - highLevelTest; global memory kernel - - - "); + + CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); + CLContext context = CLContext.create(clPlatforms[0]); + + CLDevice[] contextDevices = context.getDevices(); + + out.println("context devices:"); + for (CLDevice device : contextDevices) { + out.println(" "+device.toString()); + } + + out.println("max FLOPS device: " + context.getMaxFlopsDevice()); + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + CLDevice[] programDevices = program.getCLDevices(); + + assertEquals(contextDevices.length, programDevices.length); + + out.println("build log:\n"+program.getBuildLog()); + out.println("build status:\n"+program.getBuildStatus()); + + String source = program.getSource(); + assertFalse(source.trim().isEmpty()); +// out.println("source:\n"+source); + + Map binaries = program.getBinaries(); + assertFalse(binaries.isEmpty()); + + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) + int localWorkSize = 256; // set and log Global and Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize + + out.println("allocateing buffers of size: "+globalWorkSize); + + ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + + fillBuffer(srcA, 23456); + fillBuffer(srcB, 46987); + + CLBuffer clBufferA = context.createBuffer(srcA, Mem.READ_ONLY); + CLBuffer clBufferB = context.createBuffer(srcB, Mem.READ_ONLY); + CLBuffer clBufferC = context.createBuffer(dest, Mem.WRITE_ONLY); + + Map kernels = program.createCLKernels(); + for (CLKernel kernel : kernels.values()) { + out.println("kernel: "+kernel.toString()); + } + + assertNotNull(kernels.get("VectorAddGM")); + assertNotNull(kernels.get("Test")); + + CLKernel vectorAddKernel = kernels.get("VectorAddGM"); + + vectorAddKernel.setArg(0, clBufferA) + .setArg(1, clBufferB) + .setArg(2, clBufferC) + .setArg(3, elementCount); + + CLCommandQueue queue = programDevices[0].createCommandQueue(); + + // Asynchronous write of data to GPU device, blocking read later + queue.putWriteBuffer(clBufferA, false) + .putWriteBuffer(clBufferB, false) + .put1DRangeKernel(vectorAddKernel, 0, globalWorkSize, localWorkSize) + .putReadBuffer(clBufferC, true) + .finish().release(); + + out.println("a+b=c result snapshot: "); + for(int i = 0; i < 10; i++) + out.print(dest.getInt()+", "); + out.println("...; "+dest.remaining()/SIZEOF_INT + " more"); + + assertTrue(3 == context.getMemoryObjects().size()); + clBufferA.release(); + assertTrue(2 == context.getMemoryObjects().size()); + + assertTrue(2 == context.getMemoryObjects().size()); + clBufferB.release(); + assertTrue(1 == context.getMemoryObjects().size()); + + assertTrue(1 == context.getMemoryObjects().size()); + clBufferC.release(); + assertTrue(0 == context.getMemoryObjects().size()); + + + assertTrue(1 == context.getPrograms().size()); + program.release(); + assertTrue(0 == context.getPrograms().size()); + + context.release(); + } + + +} diff --git a/test/com/jogamp/opencl/LowLevelBindingTest.java b/test/com/jogamp/opencl/LowLevelBindingTest.java new file mode 100644 index 00000000..2162bca0 --- /dev/null +++ b/test/com/jogamp/opencl/LowLevelBindingTest.java @@ -0,0 +1,364 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.impl.CLImpl; + +import java.nio.ByteBuffer; + +import org.junit.BeforeClass; +import org.junit.Test; + +import static java.lang.System.*; +import static com.jogamp.common.nio.Buffers.*; + +/** + * Test testing the low level bindings. + * @author Michael Bien + */ +public class LowLevelBindingTest { + + private final static String programSource = + " // OpenCL Kernel Function for element by element vector addition \n" + + "kernel void VectorAdd(global const int* a, global const int* b, global int* c, int iNumElements) { \n" + + " // get index into global data array \n" + + " int iGID = get_global_id(0); \n" + + " // bound check (equivalent to the limit on a 'for' loop for standard/serial C code \n" + + " if (iGID >= iNumElements) { \n" + + " return; \n" + + " } \n" + + " // add the vector elements \n" + + " c[iGID] = a[iGID] + b[iGID]; \n" + + "} \n" + + "kernel void Test(global const int* a, global const int* b, global int* c, int iNumElements) { \n" + + " // get index into global data array \n" + + " int iGID = get_global_id(0); \n" + + " // bound check (equivalent to the limit on a 'for' loop for standard/serial C code \n" + + " if (iGID >= iNumElements) { \n" + + " return; \n" + + " } \n" + + " c[iGID] = iGID; \n" + + "} \n"; + + + @BeforeClass + public static void setUpClass() throws Exception { + out.println("OS: " + System.getProperty("os.name")); + out.println("VM: " + System.getProperty("java.vm.name")); + } + + @Test + public void contextlessTest() { + out.println("low level tests temporary disabled"); + out.println(" - - - lowLevelTest; contextless binding - - - "); + + + CL cl = CLPlatform.getLowLevelCLInterface(); + + System.out.println(((CLImpl)cl).clGetExtensionFunctionAddress("clCreateFromGLBuffer").getLong()); + System.out.println(((CLImpl)cl).clGetExtensionFunctionAddress("clEnqueueAcquireGLObjects").getLong()); +/* + int ret = CL.CL_SUCCESS; + + int[] intBuffer = new int[1]; + // find all available OpenCL platforms + ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0); + checkForError(ret); + out.println("#platforms: "+intBuffer[0]); + + long[] platformId = new long[intBuffer[0]]; + ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0); + checkForError(ret); + + // print platform info + long[] longBuffer = new long[1]; + ByteBuffer bb = ByteBuffer.allocate(128); + bb.order(ByteOrder.nativeOrder()); + + for (int i = 0; i < platformId.length; i++) { + + long platform = platformId[i]; + out.println("platform id: "+platform); + + ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_PROFILE, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" profile: " + clString2JavaString(bb.array(), (int)longBuffer[0])); + + ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_VERSION, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" version: " + clString2JavaString(bb.array(), (int)longBuffer[0])); + + ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_NAME, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" name: " + clString2JavaString(bb.array(), (int)longBuffer[0])); + + ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_VENDOR, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" vendor: " + clString2JavaString(bb.array(), (int)longBuffer[0])); + + //find all devices + ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, 0, null, 0, intBuffer, 0); + checkForError(ret); + out.println("#devices: "+intBuffer[0]); + + long[] devices = new long[intBuffer[0]]; + ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, devices.length, devices, 0, null, 0); + + //print device info + for (int j = 0; j < devices.length; j++) { + long device = devices[j]; + ret = cl.clGetDeviceInfo(device, CL.CL_DEVICE_NAME, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" device: " + clString2JavaString(bb.array(), (int)longBuffer[0])); + + ret = cl.clGetDeviceInfo(device, CL.CL_DEVICE_TYPE, bb.capacity(), bb, longBuffer, 0); + checkForError(ret); + out.println(" type: " + CLDevice.Type.valueOf(bb.get())); + bb.rewind(); + + } + + } +*/ + } +/* + @Test + public void createContextTest() { + + out.println(" - - - createContextTest - - - "); + + CL cl = CLPlatform.getLowLevelBinding(); + + int[] intArray = new int[1]; + // find all available OpenCL platforms + int ret = cl.clGetPlatformIDs(0, null, 0, intArray, 0); + checkForError(ret); + out.println("#platforms: "+intArray[0]); + + long[] longArray = new long[intArray[0]]; + ret = cl.clGetPlatformIDs(longArray.length, longArray, 0, null, 0); + checkForError(ret); + + long platform = longArray[0]; + + //find all devices + ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, 0, null, 0, intArray, 0); + checkForError(ret); + out.println("#devices: "+intArray[0]); + + long[] devices = new long[intArray[0]]; + ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, devices.length, devices, 0, null, 0); + + IntBuffer intBuffer = IntBuffer.allocate(1); + long context = cl.clCreateContext(null, devices, null, null, intBuffer); + checkError("on clCreateContext", intBuffer.get()); + + //get number of devices + ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, 0, null, longArray, 0); + checkError("on clGetContextInfo", ret); + + int sizeofLong = (CPU.is32Bit()?4:8); + out.println("context created with " + longArray[0]/sizeofLong + " devices"); + + //check if equal + assertEquals("context was not created on all devices specified", devices.length, longArray[0]/sizeofLong); + + ret = cl.clReleaseContext(context); + checkError("on clReleaseContext", ret); + } + + + @Test + public void lowLevelVectorAddTest() { + + out.println(" - - - lowLevelTest2; VectorAdd kernel - - - "); + +// CreateContextCallback cb = new CreateContextCallback() { +// @Override +// public void createContextCallback(String errinfo, ByteBuffer private_info, long cb, Object user_data) { +// throw new RuntimeException("not yet implemented..."); +// } +// }; + + long[] longArray = new long[1]; + ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder()); + + CL cl = CLPlatform.getLowLevelBinding(); + + int ret = CL.CL_SUCCESS; + int[] intArray = new int[1]; + + //TODO properties not allowed to be null + long context = cl.clCreateContextFromType(null, CL.CL_DEVICE_TYPE_ALL, null, null, null); + out.println("context handle: "+context); + + ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, 0, null, longArray, 0); + checkError("on clGetContextInfo", ret); + + int sizeofLong = (CPU.is32Bit()?4:8); + out.println("context created with " + longArray[0]/sizeofLong + " devices"); + + ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, bb.capacity(), bb, null, 0); + checkError("on clGetContextInfo", ret); + + for (int i = 0; i < longArray[0]/sizeofLong; i++) { + out.println("device id: "+bb.getLong()); + } + + long firstDeviceID = bb.getLong(0); + + // Create a command-queue + long commandQueue = cl.clCreateCommandQueue(context, firstDeviceID, 0, intArray, 0); + checkError("on clCreateCommandQueue", intArray[0]); + + int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) + int localWorkSize = 256; // set and log Global and Local work size dimensions + int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize + + out.println("allocateing buffers of size: "+globalWorkSize); + + ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + + // Allocate the OpenCL buffer memory objects for source and result on the device GMEM + long devSrcA = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, srcA.capacity(), null, intArray, 0); + checkError("on clCreateBuffer", intArray[0]); + long devSrcB = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, srcB.capacity(), null, intArray, 0); + checkError("on clCreateBuffer", intArray[0]); + long devDst = cl.clCreateBuffer(context, CL.CL_MEM_WRITE_ONLY, dest.capacity(), null, intArray, 0); + checkError("on clCreateBuffer", intArray[0]); + + + // Create the program + long program = cl.clCreateProgramWithSource(context, 1, new String[] {programSource}, new long[]{programSource.length()}, 0, intArray, 0); + checkError("on clCreateProgramWithSource", intArray[0]); + + // Build the program + ret = cl.clBuildProgram(program, null, null, null, null); + checkError("on clBuildProgram", ret); + + // Read program infos + bb.rewind(); + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_NUM_DEVICES, bb.capacity(), bb, null, 0); + checkError("on clGetProgramInfo1", ret); + out.println("program associated with "+bb.getInt(0)+" device(s)"); + + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, 0, null, longArray, 0); + checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); + out.println("program source length (cl): "+longArray[0]); + out.println("program source length (java): "+programSource.length()); + + bb.rewind(); + ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, bb.capacity(), bb, null, 0); + checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); + out.println("program source:\n" + clString2JavaString(bb.array(), (int)longArray[0])); + + // Check program status + Arrays.fill(longArray, 42); + bb.rewind(); + ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_STATUS, bb.capacity(), bb, null, 0); + checkError("on clGetProgramBuildInfo1", ret); + + out.println("program build status: " + CLProgram.Status.valueOf(bb.getInt(0))); + assertEquals("build status", CL.CL_BUILD_SUCCESS, bb.getInt(0)); + + // Read build log + ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_LOG, 0, null, longArray, 0); + checkError("on clGetProgramBuildInfo2", ret); + out.println("program log length: " + longArray[0]); + + bb.rewind(); + ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_LOG, bb.capacity(), bb, null, 0); + checkError("on clGetProgramBuildInfo3", ret); + out.println("log:\n" + clString2JavaString(bb.array(), (int)longArray[0])); + + // Create the kernel + Arrays.fill(intArray, 42); + long kernel = cl.clCreateKernel(program, "VectorAdd", intArray, 0); + checkError("on clCreateKernel", intArray[0]); + +// srcA.limit(elementCount*SIZEOF_FLOAT); +// srcB.limit(elementCount*SIZEOF_FLOAT); + + fillBuffer(srcA, 23456); + fillBuffer(srcB, 46987); + + // Set the Argument values + ret = cl.clSetKernelArg(kernel, 0, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcA)); checkError("on clSetKernelArg0", ret); + ret = cl.clSetKernelArg(kernel, 1, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcB)); checkError("on clSetKernelArg1", ret); + ret = cl.clSetKernelArg(kernel, 2, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devDst)); checkError("on clSetKernelArg2", ret); + ret = cl.clSetKernelArg(kernel, 3, SIZEOF_INT, wrap(elementCount)); checkError("on clSetKernelArg3", ret); + + out.println("used device memory: "+ (srcA.capacity()+srcB.capacity()+dest.capacity())/1000000 +"MB"); + + // Asynchronous write of data to GPU device + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, null); + checkError("on clEnqueueWriteBuffer", ret); + ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, null); + checkError("on clEnqueueWriteBuffer", ret); + + // Launch kernel + PointerBuffer gWS = PointerBuffer.allocateDirect(1).put(globalWorkSize).rewind(); + PointerBuffer lWS = PointerBuffer.allocateDirect(1).put(localWorkSize).rewind(); + ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, gWS, lWS, 0, null, null); + checkError("on clEnqueueNDRangeKernel", ret); + + // Synchronous/blocking read of results + ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, null); + checkError("on clEnqueueReadBuffer", ret); + + out.println("a+b=c result snapshot: "); + for(int i = 0; i < 10; i++) + out.print(dest.getInt()+", "); + out.println("...; "+dest.remaining()/SIZEOF_INT + " more"); + + + // cleanup + ret = cl.clReleaseCommandQueue(commandQueue); + checkError("on clReleaseCommandQueue", ret); + + ret = cl.clReleaseMemObject(devSrcA); + checkError("on clReleaseMemObject", ret); + ret = cl.clReleaseMemObject(devSrcB); + checkError("on clReleaseMemObject", ret); + ret = cl.clReleaseMemObject(devDst); + checkError("on clReleaseMemObject", ret); + + ret = cl.clReleaseProgram(program); + checkError("on clReleaseProgram", ret); + + ret = cl.clReleaseKernel(kernel); + checkError("on clReleaseKernel", ret); + + ret = cl.clUnloadCompiler(); + checkError("on clUnloadCompiler", ret); + + ret = cl.clReleaseContext(context); + checkError("on clReleaseContext", ret); + + } + + @Test + public void loadTest() { + //for memory leak detection; e.g watch out for "out of host memory" errors + out.println(" - - - loadTest - - - "); + for(int i = 0; i < 100; i++) { + out.println("###iteration "+i); + lowLevelVectorAddTest(); + } + } +*/ + private ByteBuffer wrap(long value) { + return (ByteBuffer) newDirectByteBuffer(8).putLong(value).rewind(); + } + + private final void checkForError(int ret) { + this.checkError("", ret); + } + + private final void checkError(String msg, int ret) { + if(ret != CL.CL_SUCCESS) + throw CLException.newException(ret, msg); + } + + +} \ No newline at end of file diff --git a/test/com/jogamp/opencl/TestUtils.java b/test/com/jogamp/opencl/TestUtils.java new file mode 100644 index 00000000..e2ef16f3 --- /dev/null +++ b/test/com/jogamp/opencl/TestUtils.java @@ -0,0 +1,52 @@ +package com.jogamp.opencl; + +import java.nio.ByteBuffer; +import java.util.Random; + +import static java.lang.System.*; +import static org.junit.Assert.*; + +/** + * @author Michael Bien + */ +public class TestUtils { + + //decrease this value on systems with few memory. + final static int ONE_MB = 1048576; + + final static int NUM_ELEMENTS = 10000000; + + public static final void fillBuffer(ByteBuffer buffer, int seed) { + + Random rnd = new Random(seed); + + while(buffer.remaining() != 0) + buffer.putInt(rnd.nextInt()); + + buffer.rewind(); + } + + public static final int roundUp(int groupSize, int globalSize) { + int r = globalSize % groupSize; + if (r == 0) { + return globalSize; + } else { + return globalSize + groupSize - r; + } + } + + public static final void checkIfEqual(ByteBuffer a, ByteBuffer b, int elements) { + for(int i = 0; i < elements; i++) { + int aVal = a.getInt(); + int bVal = b.getInt(); + if(aVal != bVal) { + out.println("a: "+aVal); + out.println("b: "+bVal); + out.println("position: "+a.position()); + fail("a!=b"); + } + } + a.rewind(); + b.rewind(); + } +} diff --git a/test/com/jogamp/opencl/testkernels.cl b/test/com/jogamp/opencl/testkernels.cl new file mode 100644 index 00000000..ec7e8bf6 --- /dev/null +++ b/test/com/jogamp/opencl/testkernels.cl @@ -0,0 +1,22 @@ + + // OpenCL Kernel Function for element by element vector addition + kernel void VectorAddGM(global const int* a, global const int* b, global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + // add the vector elements + c[iGID] = a[iGID] + b[iGID]; + } + + kernel void Test(global const int* a, global const int* b, global int* c, int iNumElements) { + // get index into global data array + int iGID = get_global_id(0); + // bound check (equivalent to the limit on a 'for' loop for standard/serial C code + if (iGID >= iNumElements) { + return; + } + c[iGID] = iGID; + } diff --git a/test/com/mbien/opencl/CLBufferTest.java b/test/com/mbien/opencl/CLBufferTest.java deleted file mode 100644 index a99db23c..00000000 --- a/test/com/mbien/opencl/CLBufferTest.java +++ /dev/null @@ -1,149 +0,0 @@ -package com.mbien.opencl; - -import com.mbien.opencl.CLMemory.Mem; -import com.mbien.opencl.CLMemory.Map; -import com.jogamp.common.nio.Buffers; -import java.nio.ByteBuffer; -import org.junit.Test; - -import static org.junit.Assert.*; -import static java.lang.System.*; -import static com.mbien.opencl.TestUtils.*; -import static com.jogamp.common.nio.Buffers.*; - -/** - * - * @author Michael Bien - */ -public class CLBufferTest { - - @Test - public void writeCopyReadBufferTest() { - - out.println(" - - - highLevelTest; copy buffer test - - - "); - - final int elements = NUM_ELEMENTS; - - CLContext context = CLContext.create(); - - // the CL.MEM_* flag is probably completely irrelevant in our case since we do not use a kernel in this test - CLBuffer clBufferA = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferB = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); - - // fill only first read buffer -> we will copy the payload to the second later. - fillBuffer(clBufferA.buffer, 12345); - - CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); - - // asynchronous write of data to GPU device, blocking read later to get the computed results back. - queue.putWriteBuffer(clBufferA, false) // write A - .putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B - .putReadBuffer(clBufferB, true) // read B - .finish(); - - context.release(); - - out.println("validating computed results..."); - checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); - out.println("results are valid"); - - } - - @Test - public void bufferWithHostPointerTest() { - - out.println(" - - - highLevelTest; host pointer test - - - "); - - final int elements = NUM_ELEMENTS; - - CLContext context = CLContext.create(); - - ByteBuffer buffer = Buffers.newDirectByteBuffer(elements*SIZEOF_INT); - // fill only first read buffer -> we will copy the payload to the second later. - fillBuffer(buffer, 12345); - - CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); - - Mem[] bufferConfig = new Mem[] {Mem.COPY_BUFFER, Mem.USE_BUFFER}; - - for(int i = 0; i < bufferConfig.length; i++) { - - out.println("testing with "+bufferConfig[i] + " config"); - - CLBuffer clBufferA = context.createBuffer(buffer, Mem.READ_ONLY, bufferConfig[i]); - CLBuffer clBufferB = context.createByteBuffer(elements*SIZEOF_INT, Mem.READ_ONLY); - - // asynchronous write of data to GPU device, blocking read later to get the computed results back. - queue.putCopyBuffer(clBufferA, clBufferB, clBufferA.buffer.capacity()) // copy A -> B - .putReadBuffer(clBufferB, true) // read B - .finish(); - - assertEquals(2, context.getMemoryObjects().size()); - clBufferA.release(); - assertEquals(1, context.getMemoryObjects().size()); - clBufferB.release(); - assertEquals(0, context.getMemoryObjects().size()); - - // uploading worked when a==b. - out.println("validating computed results..."); - checkIfEqual(clBufferA.buffer, clBufferB.buffer, elements); - out.println("results are valid"); - } - - context.release(); - } - - @Test - public void mapBufferTest() { - - out.println(" - - - highLevelTest; map buffer test - - - "); - - final int elements = NUM_ELEMENTS; - final int sizeInBytes = elements*SIZEOF_INT; - - CLContext context; - CLBuffer clBufferA; - CLBuffer clBufferB; - - // We will have to allocate mappable NIO memory on non CPU contexts - // since we can't map e.g GPU memory. - if(CLPlatform.getDefault().listCLDevices(CLDevice.Type.CPU).length > 0) { - - context = CLContext.create(CLDevice.Type.CPU); - - clBufferA = context.createBuffer(sizeInBytes, Mem.READ_WRITE); - clBufferB = context.createBuffer(sizeInBytes, Mem.READ_WRITE); - }else{ - - context = CLContext.create(); - - clBufferA = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); - clBufferB = context.createByteBuffer(sizeInBytes, Mem.READ_WRITE, Mem.USE_BUFFER); - } - - CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); - - // fill only first buffer -> we will copy the payload to the second later. - ByteBuffer mappedBufferA = queue.putMapBuffer(clBufferA, Map.READ_WRITE, true); - assertEquals(sizeInBytes, mappedBufferA.capacity()); - - fillBuffer(mappedBufferA, 12345); // write to A - - queue.putUnmapMemory(clBufferA) // unmap A - .putCopyBuffer(clBufferA, clBufferB); // copy A -> B - - // map B for read operations - ByteBuffer mappedBufferB = queue.putMapBuffer(clBufferB, Map.READ, true); - assertEquals(sizeInBytes, mappedBufferB.capacity()); - - out.println("validating computed results..."); - checkIfEqual(mappedBufferA, mappedBufferB, elements); // A == B ? - out.println("results are valid"); - - queue.putUnmapMemory(clBufferB); // unmap B - - context.release(); - - } - -} diff --git a/test/com/mbien/opencl/CLCommandQueueTest.java b/test/com/mbien/opencl/CLCommandQueueTest.java deleted file mode 100644 index cbfc2f3c..00000000 --- a/test/com/mbien/opencl/CLCommandQueueTest.java +++ /dev/null @@ -1,266 +0,0 @@ -package com.mbien.opencl; - -import com.mbien.opencl.util.MultiQueueBarrier; -import com.mbien.opencl.CLCommandQueue.Mode; -import com.mbien.opencl.CLMemory.Mem; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.EnumSet; -import org.junit.Test; - -import static org.junit.Assert.*; -import static java.lang.System.*; -import static com.mbien.opencl.TestUtils.*; -import static com.mbien.opencl.CLEvent.*; -import static com.jogamp.common.nio.Buffers.*; - -/** - * - * @author Michael Bien - */ -public class CLCommandQueueTest { - - private final int groupSize = 256; - - @Test - public void enumsTest() { - - //CLCommandQueueEnums - EnumSet queueMode = Mode.valuesOf(CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL.CL_QUEUE_PROFILING_ENABLE); - assertTrue(queueMode.contains(Mode.OUT_OF_ORDER_MODE)); - assertTrue(queueMode.contains(Mode.PROFILING_MODE)); - - assertNotNull(Mode.valuesOf(0)); - assertEquals(0, Mode.valuesOf(0).size()); - for (Mode mode : Mode.values()) { - assertEquals(mode, Mode.valueOf(mode.QUEUE_MODE)); - } - - // CLEvent enums - for (ProfilingCommand cmd : ProfilingCommand.values()) { - assertEquals(cmd, ProfilingCommand.valueOf(cmd.COMMAND)); - } - - for (CommandType type : CommandType.values()) { - assertEquals(type, CommandType.valueOf(type.TYPE)); - } - - for (ExecutionStatus status : ExecutionStatus.values()) { - assertEquals(status, ExecutionStatus.valueOf(status.STATUS)); - } - - } - - @Test - public void eventsTest() throws IOException { - - out.println(" - - - event synchronization test - - - "); - - final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer - - CLContext context = CLContext.create(); - - CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - - fillBuffer(clBufferA.buffer, 12345); - fillBuffer(clBufferB.buffer, 67890); - - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); - CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); - CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); - - final CLEventList events = new CLEventList(2); - - assertEquals(0, events.size()); - - queue.putWriteBuffer(clBufferA, false, events) // write A - .putWriteBuffer(clBufferB, false, events);// write B - - assertEquals(2, events.size()); - queue.putWaitForEvents(events, true); - - events.release(); - assertEquals(0, events.size()); - - vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B - queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); - - vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B - queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); - - assertEquals(2, events.size()); - queue.putWaitForEvent(events, 0, false) - .putWaitForEvent(events, 1, true); - - queue.putReadBuffer(clBufferC, false) - .putReadBuffer(clBufferD, true); - - events.release(); - - checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); - - - context.release(); - - - out.println("results are valid"); - - } - @Test - public void profilingEventsTest() throws IOException { - - out.println(" - - - event synchronization test - - - "); - - final int elements = roundUp(groupSize, ONE_MB / SIZEOF_INT * 5); // 5MB per buffer - - CLContext context = CLContext.create(); - - CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - - fillBuffer(clBufferA.buffer, 12345); - fillBuffer(clBufferB.buffer, 67890); - - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); - CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); - CLCommandQueue queue = context.getDevices()[0].createCommandQueue(Mode.PROFILING_MODE); - - queue.putWriteBuffer(clBufferA, true) // write A - .putWriteBuffer(clBufferB, true);// write B - - final CLEventList events = new CLEventList(1); - - assertEquals(0, events.size()); - - vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B - queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); - - assertEquals(1, events.size()); - CLEvent probe = events.getEvent(0); - out.println(probe); - - queue.putWaitForEvents(events, true); - assertEquals(CLEvent.ExecutionStatus.COMPLETE, probe.getStatus()); - - out.println(probe); - long time = probe.getProfilingInfo(CLEvent.ProfilingCommand.END) - - probe.getProfilingInfo(CLEvent.ProfilingCommand.START); - out.println("time: "+time); - assertTrue(time > 0); - - events.release(); - context.release(); - - } - - @Test - public void concurrencyTest() throws IOException, InterruptedException { - - out.println(" - - - QueueBarrier test - - - "); - - final int elements = ONE_MB / SIZEOF_INT * 10; // 20MB per buffer - - CLContext context = CLContext.create(); - - CLDevice[] devices = context.getDevices(); - - if (devices.length < 2) { - out.println("aborting test... need at least 2 devices"); - context.release(); - return; - } - - final CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - final CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - - final CLBuffer clBufferA1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - final CLBuffer clBufferB1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - final CLBuffer clBufferA2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - final CLBuffer clBufferB2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); - - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); - - final CLKernel vectorAddKernel1 = program.createCLKernel("VectorAddGM").setArg(3, elements); - final CLKernel vectorAddKernel2 = program.createCLKernel("VectorAddGM").setArg(3, elements); - - int secondDevice = devices.length > 1 ? 1 : 0; - - final CLCommandQueue queue1 = devices[0 ].createCommandQueue(); - final CLCommandQueue queue2 = devices[secondDevice].createCommandQueue(); - - fillBuffer(clBufferC.buffer, 12345); - - if (secondDevice > 0) { - System.out.println("using two devices"); - } - - final MultiQueueBarrier barrier = new MultiQueueBarrier(2); - - Thread thread1 = new Thread("C") { - - @Override - public void run() { - - fillBuffer(clBufferA1.buffer, 12345); - fillBuffer(clBufferB1.buffer, 67890); - -// System.out.println("C buffer"); - queue1.putWriteBuffer(clBufferA1, false) // write A - .putWriteBuffer(clBufferB1, false); // write B - -// System.out.println("C args"); - vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B - -// System.out.println("C kernels"); - CLEventList events1 = new CLEventList(2); - queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, groupSize, events1) - .putReadBuffer(clBufferC, false, events1); - - barrier.waitFor(queue1, events1); - - } - }; - - Thread thread2 = new Thread("D") { - - @Override - public void run() { - - fillBuffer(clBufferA2.buffer, 12345); - fillBuffer(clBufferB2.buffer, 67890); - -// System.out.println("D buffer"); - queue2.putWriteBuffer(clBufferA2, false) // write A - .putWriteBuffer(clBufferB2, false); // write B - -// System.out.println("D args"); - vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B - -// System.out.println("D kernels"); - CLEventList events2 = new CLEventList(2); - queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, groupSize, events2) - .putReadBuffer(clBufferD, false, events2); - - barrier.waitFor(queue2, events2); - - } - }; - - out.println("starting threads"); - thread1.start(); - thread2.start(); - barrier.await(); - out.println("done"); - - checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); - - context.release(); - - out.println("results are valid"); - - } -} diff --git a/test/com/mbien/opencl/CLProgramTest.java b/test/com/mbien/opencl/CLProgramTest.java deleted file mode 100644 index f46933c6..00000000 --- a/test/com/mbien/opencl/CLProgramTest.java +++ /dev/null @@ -1,224 +0,0 @@ -package com.mbien.opencl; - -import com.mbien.opencl.util.CLBuildConfiguration; -import com.mbien.opencl.util.CLProgramConfiguration; -import com.mbien.opencl.CLProgram.Status; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.Map; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; - -import static org.junit.Assert.*; -import static java.lang.System.*; -import static com.mbien.opencl.CLProgram.CompilerOptions.*; - -/** - * - * @author Michael Bien - */ -public class CLProgramTest { - - @Rule - public TemporaryFolder tmpFolder = new TemporaryFolder(); - - - @Test - public void enumsTest() { - - // CLProgram enums - for (Status e : Status.values()) { - assertEquals(e, Status.valueOf(e.STATUS)); - } - } - - @Test - public void rebuildProgramTest() throws IOException { - - out.println(" - - - CLProgramTest; rebuild program test - - - "); - - CLContext context = CLContext.create(); - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")); - - try{ - program.createCLKernels(); - fail("expected exception but got none :("); - }catch(CLException ex) { - out.println("got expected exception: "+ex.getCLErrorString()); - assertEquals(ex.errorcode, CL.CL_INVALID_PROGRAM_EXECUTABLE); - } - - out.println(program.getBuildStatus()); - program.build(); - out.println(program.getBuildStatus()); - - assertTrue(program.isExecutable()); - - Map kernels = program.createCLKernels(); - assertNotNull(kernels); - assertTrue("kernel map is empty", kernels.size() > 0); - - // rebuild - // 1. release kernels (internally) - // 2. build program - program.build(); - assertTrue(program.isExecutable()); - out.println(program.getBuildStatus()); - - // try again with rebuilt program - kernels = program.createCLKernels(); - assertNotNull(kernels); - assertTrue("kernel map is empty", kernels.size() > 0); - assertTrue(kernels.size() > 0); - - context.release(); - } - - @Test - public void programBinariesTest() throws IOException { - - out.println(" - - - CLProgramTest; down-/upload binaries test - - - "); - - CLContext context = CLContext.create(); - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")) - .build(ENABLE_MAD, WARNINGS_ARE_ERRORS); - - // optain binaries - Map binaries = program.getBinaries(); - assertFalse(binaries.isEmpty()); - - CLDevice[] devices = program.getCLDevices(); - for (CLDevice device : devices) { - assertTrue(binaries.containsKey(device)); - } - - // 1. release program - // 2. re-create program with old binaries - program.release(); - - assertFalse(program.isExecutable()); - - assertNotNull(program.getBinaries()); - assertEquals(program.getBinaries().size(), 0); - - assertNotNull(program.getBuildLog()); - assertEquals(program.getBuildLog().length(), 0); - - assertNotNull(program.getSource()); - assertEquals(program.getSource().length(), 0); - - assertNotNull(program.getCLDevices()); - assertEquals(program.getCLDevices().length, 0); - - { - Map kernels = program.createCLKernels(); - assertNotNull(kernels); - assertEquals(kernels.size(), 0); - } - assertNull(program.createCLKernel("foo")); - - program = context.createProgram(binaries); - - assertFalse(program.isExecutable()); - - assertNotNull(program.getCLDevices()); - assertTrue(program.getCLDevices().length != 0); - - assertNotNull(program.getBinaries()); - assertEquals(program.getBinaries().size(), 0); - - assertNotNull(program.getBuildLog()); - assertTrue(program.getBuildLog().length() != 0); - - assertNotNull(program.getSource()); - assertEquals(program.getSource().length(), 0); - - try{ - Map kernels = program.createCLKernels(); - fail("expected an exception from createCLKernels but got: "+kernels); - }catch(CLException ex) { - // expected, not build yet - } - - out.println(program.getBuildStatus()); - program.build(); - out.println(program.getBuildStatus()); - - assertNotNull(program.createCLKernel("Test")); - - assertTrue(program.isExecutable()); - - } - - @Test - public void builderTest() throws IOException, ClassNotFoundException { - out.println(" - - - CLProgramTest; program builder test - - - "); - - CLContext context = CLContext.create(); - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")); - - // same as program.build() - program.prepare().build(); - - assertTrue(program.isExecutable()); - - - // complex build - program.prepare().withOption(ENABLE_MAD) - .forDevice(context.getMaxFlopsDevice()) - .withDefine("RADIUS", 5) - .withDefine("ENABLE_FOOBAR") - .build(); - - assertTrue(program.isExecutable()); - - // reusable builder - CLBuildConfiguration builder = CLProgramBuilder.createConfiguration() - .withOption(ENABLE_MAD) - .forDevices(context.getDevices()) - .withDefine("RADIUS", 5) - .withDefine("ENABLE_FOOBAR"); - - out.println(builder); - - builder.setProgram(program).build(); - assertTrue(program.isExecutable()); - - // serialization test - File file = tmpFolder.newFile("foobar.builder"); - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file)); - builder.save(oos); - oos.close(); - - // build configuration - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); - CLBuildConfiguration buildConfig = CLProgramBuilder.loadConfiguration(ois); - ois.close(); - - assertEquals(builder, buildConfig); - - buildConfig.build(program); - assertTrue(program.isExecutable()); - - // program configuration - ois = new ObjectInputStream(new FileInputStream(file)); - CLProgramConfiguration programConfig = CLProgramBuilder.loadConfiguration(ois, context); - assertNotNull(programConfig.getProgram()); - ois.close(); - program = programConfig.build(); - assertTrue(program.isExecutable()); - - - // cloneing - assertEquals(builder, builder.clone()); - - } - - - -} diff --git a/test/com/mbien/opencl/HighLevelBindingTest.java b/test/com/mbien/opencl/HighLevelBindingTest.java deleted file mode 100644 index 07788f6c..00000000 --- a/test/com/mbien/opencl/HighLevelBindingTest.java +++ /dev/null @@ -1,305 +0,0 @@ -package com.mbien.opencl; - -import com.mbien.opencl.CLMemory.Mem; -import com.mbien.opencl.CLMemory.GLObjectType; -import com.mbien.opencl.CLSampler.AddressingMode; -import com.mbien.opencl.CLSampler.FilteringMode; -import com.mbien.opencl.CLImageFormat.ChannelOrder; -import com.mbien.opencl.CLImageFormat.ChannelType; -import com.mbien.opencl.CLDevice.FPConfig; -import com.mbien.opencl.CLDevice.GlobalMemCacheType; -import com.mbien.opencl.CLDevice.LocalMemType; -import com.mbien.opencl.CLDevice.Type; -import com.mbien.opencl.CLDevice.Capabilities; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.Map; -import org.junit.BeforeClass; -import org.junit.Test; - -import static org.junit.Assert.*; -import static java.lang.System.*; -import static com.mbien.opencl.TestUtils.*; -import static com.jogamp.common.nio.Buffers.*; - -/** - * Test testing the high level bindings. - * @author Michael Bien - */ -public class HighLevelBindingTest { - - @BeforeClass - public static void setUpClass() throws Exception { - out.println("OS: " + System.getProperty("os.name")); - out.println("ARCH: " + System.getProperty("os.arch")); - out.println("VM: " + System.getProperty("java.vm.name")); - out.println("lib path: " + System.getProperty("java.library.path")); - } - - @Test - public void enumsTest() { - - // enum tests - final EnumSet singleFPConfig = FPConfig.valuesOf(CL.CL_FP_DENORM | CL.CL_FP_ROUND_TO_INF); - assertEquals(0, FPConfig.valuesOf(0).size()); - assertTrue(singleFPConfig.contains(FPConfig.DENORM)); - assertTrue(singleFPConfig.contains(FPConfig.ROUND_TO_INF)); - - // CLDevice enums - for (FPConfig e : FPConfig.values()) { - EnumSet set = FPConfig.valuesOf(e.CONFIG); - assertTrue(set.contains(e)); - } - for (GlobalMemCacheType e : GlobalMemCacheType.values()) { - assertEquals(e, GlobalMemCacheType.valueOf(e.TYPE)); - } - for (LocalMemType e : LocalMemType.values()) { - assertEquals(e, LocalMemType.valueOf(e.TYPE)); - } - for (Type e : Type.values()) { - assertEquals(e, Type.valueOf(e.TYPE)); - } - for (Capabilities e : Capabilities.values()) { - assertEquals(e, Capabilities.valueOf(e.CAPS)); - } - - // CLMemory enums - for (Mem e : Mem.values()) { - assertEquals(e, Mem.valueOf(e.CONFIG)); - } - - for (GLObjectType e : GLObjectType.values()) { - assertEquals(e, GLObjectType.valueOf(e.TYPE)); - } - - // CLSampler enums - for (AddressingMode e : AddressingMode.values()) { - assertEquals(e, AddressingMode.valueOf(e.MODE)); - } - for (FilteringMode e : FilteringMode.values()) { - assertEquals(e, FilteringMode.valueOf(e.MODE)); - } - - // CLImage enums - for (ChannelOrder e : ChannelOrder.values()) { - assertEquals(e, ChannelOrder.valueOf(e.ORDER)); - } - for (ChannelType e : ChannelType.values()) { - assertEquals(e, ChannelType.valueOf(e.TYPE)); - } - - } - - - - @Test - public void contextlessTest() { - - out.println(" - - - highLevelTest; contextless - - - "); - - // platform/device info tests - CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); - - for (CLPlatform platform : clPlatforms) { - - out.println("platform info:"); - out.println(" name: "+platform.getName()); - out.println(" id: "+platform.ID); - out.println(" profile: "+platform.getProfile()); - out.println(" version: "+platform.getVersion()); - out.println(" vendor: "+platform.getVendor()); - out.println(" max FLOPS device: "+platform.getMaxFlopsDevice()); - out.println(" extensions: "+platform.getExtensions()); - - CLDevice[] clDevices = platform.listCLDevices(); - for (CLDevice device : clDevices) { - out.println("device info:"); - out.println(" name: "+device.getName()); - out.println(" profile: "+device.getProfile()); - out.println(" vendor: "+device.getVendor()); - out.println(" vendor id: "+device.getVendorID()); - out.println(" version: "+device.getVersion()); - out.println(" driver version: "+device.getDriverVersion()); - out.println(" type: "+device.getType()); - out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); - out.println(" max alloc mem: "+device.getMaxMemAllocSize()/(1024*1024)+" MB"); - out.println(" max param size: "+device.getMaxParameterSize()+" byte"); - out.println(" local mem: "+device.getLocalMemSize()/1024+" KB"); - out.println(" local mem type: "+device.getLocalMemType()); - out.println(" global mem cache size: "+device.getGlobalMemCacheSize()); - out.println(" global mem cacheline size: "+device.getGlobalMemCachelineSize()); - out.println(" global mem cache type: "+device.getGlobalMemCacheType()); - out.println(" constant buffer size: "+device.getMaxConstantBufferSize()); - out.println(" error correction support: "+device.isErrorCorrectionSupported()); - out.println(" queue properties: "+device.getQueueProperties()); - out.println(" clock: "+device.getMaxClockFrequency()+" MHz"); - out.println(" timer res: "+device.getProfilingTimerResolution()+" ns"); - out.println(" max work group size: "+device.getMaxWorkGroupSize()); - out.println(" max compute units: "+device.getMaxComputeUnits()); - out.println(" max work item dimensions: "+device.getMaxWorkItemDimensions()); - out.println(" max work item sizes: "+Arrays.toString(device.getMaxWorkItemSizes())); - out.println(" compiler available: "+device.isCompilerAvailable()); - out.println(" image support: "+device.isImageSupportAvailable()); - out.println(" max read image args: "+device.getMaxReadImageArgs()); - out.println(" max write image args: "+device.getMaxWriteImageArgs()); - out.println(" max image2d dimensions: "+Arrays.asList(device.getMaxImage2dWidth(), device.getMaxImage2dHeight())); - out.println(" max image3d dimensions: "+Arrays.asList(device.getMaxImage2dWidth(), device.getMaxImage2dHeight(), device.getMaxImage3dDepth())); - out.println(" number of address bits: "+device.getAddressBits()); - out.println(" half FP available: "+device.isHalfFPAvailable()); - out.println(" double FP available: "+device.isDoubleFPAvailable()); - out.println(" little endian: "+device.isLittleEndian()); - out.println(" half FP config: "+device.getHalfFPConfig()); - out.println(" single FP config: "+device.getSingleFPConfig()); - out.println(" double FP config: "+device.getDoubleFPConfig()); - out.println(" execution capabilities: "+device.getExecutionCapabilities()); - out.println(" gl memory sharing: "+device.isGLMemorySharingSupported()); - out.println(" extensions: "+device.getExtensions()); - } - } - - } - - @Test - public void createContextTest() { - - out.println(" - - - highLevelTest; create context - - - "); - - CLPlatform platform = CLPlatform.getDefault(); - int deviceCount = platform.listCLDevices().length; - CLDevice firstDevice = platform.listCLDevices()[0]; - - CLContext c = CLContext.create(); - assertNotNull(c); - assertEquals(deviceCount, c.getDevices().length); - c.release(); - - c = CLContext.create(platform); - assertNotNull(c); - assertEquals(deviceCount, c.getDevices().length); - c.release(); - - c = CLContext.create(firstDevice); - assertNotNull(c); - assertEquals(1, c.getDevices().length); - c.release(); - - c = CLContext.create(CLDevice.Type.ALL); - assertNotNull(c); - assertEquals(deviceCount, c.getDevices().length); - c.release(); - - c = CLContext.create(platform, firstDevice); - assertNotNull(c); - assertEquals(1, c.getDevices().length); - c.release(); - - c = CLContext.create(platform, CLDevice.Type.ALL); - assertNotNull(c); - assertEquals(deviceCount, c.getDevices().length); - c.release(); - - } - - @Test - public void vectorAddGMTest() throws IOException { - - out.println(" - - - highLevelTest; global memory kernel - - - "); - - CLPlatform[] clPlatforms = CLPlatform.listCLPlatforms(); - CLContext context = CLContext.create(clPlatforms[0]); - - CLDevice[] contextDevices = context.getDevices(); - - out.println("context devices:"); - for (CLDevice device : contextDevices) { - out.println(" "+device.toString()); - } - - out.println("max FLOPS device: " + context.getMaxFlopsDevice()); - - CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); - - CLDevice[] programDevices = program.getCLDevices(); - - assertEquals(contextDevices.length, programDevices.length); - - out.println("build log:\n"+program.getBuildLog()); - out.println("build status:\n"+program.getBuildStatus()); - - String source = program.getSource(); - assertFalse(source.trim().isEmpty()); -// out.println("source:\n"+source); - - Map binaries = program.getBinaries(); - assertFalse(binaries.isEmpty()); - - int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) - int localWorkSize = 256; // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - - fillBuffer(srcA, 23456); - fillBuffer(srcB, 46987); - - CLBuffer clBufferA = context.createBuffer(srcA, Mem.READ_ONLY); - CLBuffer clBufferB = context.createBuffer(srcB, Mem.READ_ONLY); - CLBuffer clBufferC = context.createBuffer(dest, Mem.WRITE_ONLY); - - Map kernels = program.createCLKernels(); - for (CLKernel kernel : kernels.values()) { - out.println("kernel: "+kernel.toString()); - } - - assertNotNull(kernels.get("VectorAddGM")); - assertNotNull(kernels.get("Test")); - - CLKernel vectorAddKernel = kernels.get("VectorAddGM"); - - vectorAddKernel.setArg(0, clBufferA) - .setArg(1, clBufferB) - .setArg(2, clBufferC) - .setArg(3, elementCount); - - CLCommandQueue queue = programDevices[0].createCommandQueue(); - - // Asynchronous write of data to GPU device, blocking read later - queue.putWriteBuffer(clBufferA, false) - .putWriteBuffer(clBufferB, false) - .put1DRangeKernel(vectorAddKernel, 0, globalWorkSize, localWorkSize) - .putReadBuffer(clBufferC, true) - .finish().release(); - - out.println("a+b=c result snapshot: "); - for(int i = 0; i < 10; i++) - out.print(dest.getInt()+", "); - out.println("...; "+dest.remaining()/SIZEOF_INT + " more"); - - assertTrue(3 == context.getMemoryObjects().size()); - clBufferA.release(); - assertTrue(2 == context.getMemoryObjects().size()); - - assertTrue(2 == context.getMemoryObjects().size()); - clBufferB.release(); - assertTrue(1 == context.getMemoryObjects().size()); - - assertTrue(1 == context.getMemoryObjects().size()); - clBufferC.release(); - assertTrue(0 == context.getMemoryObjects().size()); - - - assertTrue(1 == context.getPrograms().size()); - program.release(); - assertTrue(0 == context.getPrograms().size()); - - context.release(); - } - - -} diff --git a/test/com/mbien/opencl/LowLevelBindingTest.java b/test/com/mbien/opencl/LowLevelBindingTest.java deleted file mode 100644 index 1a57bec5..00000000 --- a/test/com/mbien/opencl/LowLevelBindingTest.java +++ /dev/null @@ -1,368 +0,0 @@ -package com.mbien.opencl; - -import com.mbien.opencl.impl.CLImpl; -import com.jogamp.common.nio.PointerBuffer; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.IntBuffer; -import java.util.Arrays; -import org.junit.BeforeClass; -import org.junit.Test; -import static org.junit.Assert.*; -import static java.lang.System.*; -import static com.mbien.opencl.TestUtils.*; -import static com.mbien.opencl.util.CLUtil.*; -import static com.jogamp.common.nio.Buffers.*; - -/** - * Test testing the low level bindings. - * @author Michael Bien - */ -public class LowLevelBindingTest { - - private final static String programSource = - " // OpenCL Kernel Function for element by element vector addition \n" - + "kernel void VectorAdd(global const int* a, global const int* b, global int* c, int iNumElements) { \n" - + " // get index into global data array \n" - + " int iGID = get_global_id(0); \n" - + " // bound check (equivalent to the limit on a 'for' loop for standard/serial C code \n" - + " if (iGID >= iNumElements) { \n" - + " return; \n" - + " } \n" - + " // add the vector elements \n" - + " c[iGID] = a[iGID] + b[iGID]; \n" - + "} \n" - + "kernel void Test(global const int* a, global const int* b, global int* c, int iNumElements) { \n" - + " // get index into global data array \n" - + " int iGID = get_global_id(0); \n" - + " // bound check (equivalent to the limit on a 'for' loop for standard/serial C code \n" - + " if (iGID >= iNumElements) { \n" - + " return; \n" - + " } \n" - + " c[iGID] = iGID; \n" - + "} \n"; - - - @BeforeClass - public static void setUpClass() throws Exception { - out.println("OS: " + System.getProperty("os.name")); - out.println("VM: " + System.getProperty("java.vm.name")); - } - - @Test - public void contextlessTest() { - out.println("low level tests temporary disabled"); - out.println(" - - - lowLevelTest; contextless binding - - - "); - - - CL cl = CLPlatform.getLowLevelCLInterface(); - - System.out.println(((CLImpl)cl).clGetExtensionFunctionAddress("clCreateFromGLBuffer").getLong()); - System.out.println(((CLImpl)cl).clGetExtensionFunctionAddress("clEnqueueAcquireGLObjects").getLong()); -/* - int ret = CL.CL_SUCCESS; - - int[] intBuffer = new int[1]; - // find all available OpenCL platforms - ret = cl.clGetPlatformIDs(0, null, 0, intBuffer, 0); - checkForError(ret); - out.println("#platforms: "+intBuffer[0]); - - long[] platformId = new long[intBuffer[0]]; - ret = cl.clGetPlatformIDs(platformId.length, platformId, 0, null, 0); - checkForError(ret); - - // print platform info - long[] longBuffer = new long[1]; - ByteBuffer bb = ByteBuffer.allocate(128); - bb.order(ByteOrder.nativeOrder()); - - for (int i = 0; i < platformId.length; i++) { - - long platform = platformId[i]; - out.println("platform id: "+platform); - - ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_PROFILE, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" profile: " + clString2JavaString(bb.array(), (int)longBuffer[0])); - - ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_VERSION, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" version: " + clString2JavaString(bb.array(), (int)longBuffer[0])); - - ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_NAME, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" name: " + clString2JavaString(bb.array(), (int)longBuffer[0])); - - ret = cl.clGetPlatformInfo(platform, CL.CL_PLATFORM_VENDOR, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" vendor: " + clString2JavaString(bb.array(), (int)longBuffer[0])); - - //find all devices - ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, 0, null, 0, intBuffer, 0); - checkForError(ret); - out.println("#devices: "+intBuffer[0]); - - long[] devices = new long[intBuffer[0]]; - ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, devices.length, devices, 0, null, 0); - - //print device info - for (int j = 0; j < devices.length; j++) { - long device = devices[j]; - ret = cl.clGetDeviceInfo(device, CL.CL_DEVICE_NAME, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" device: " + clString2JavaString(bb.array(), (int)longBuffer[0])); - - ret = cl.clGetDeviceInfo(device, CL.CL_DEVICE_TYPE, bb.capacity(), bb, longBuffer, 0); - checkForError(ret); - out.println(" type: " + CLDevice.Type.valueOf(bb.get())); - bb.rewind(); - - } - - } -*/ - } -/* - @Test - public void createContextTest() { - - out.println(" - - - createContextTest - - - "); - - CL cl = CLPlatform.getLowLevelBinding(); - - int[] intArray = new int[1]; - // find all available OpenCL platforms - int ret = cl.clGetPlatformIDs(0, null, 0, intArray, 0); - checkForError(ret); - out.println("#platforms: "+intArray[0]); - - long[] longArray = new long[intArray[0]]; - ret = cl.clGetPlatformIDs(longArray.length, longArray, 0, null, 0); - checkForError(ret); - - long platform = longArray[0]; - - //find all devices - ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, 0, null, 0, intArray, 0); - checkForError(ret); - out.println("#devices: "+intArray[0]); - - long[] devices = new long[intArray[0]]; - ret = cl.clGetDeviceIDs(platform, CL.CL_DEVICE_TYPE_ALL, devices.length, devices, 0, null, 0); - - IntBuffer intBuffer = IntBuffer.allocate(1); - long context = cl.clCreateContext(null, devices, null, null, intBuffer); - checkError("on clCreateContext", intBuffer.get()); - - //get number of devices - ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, 0, null, longArray, 0); - checkError("on clGetContextInfo", ret); - - int sizeofLong = (CPU.is32Bit()?4:8); - out.println("context created with " + longArray[0]/sizeofLong + " devices"); - - //check if equal - assertEquals("context was not created on all devices specified", devices.length, longArray[0]/sizeofLong); - - ret = cl.clReleaseContext(context); - checkError("on clReleaseContext", ret); - } - - - @Test - public void lowLevelVectorAddTest() { - - out.println(" - - - lowLevelTest2; VectorAdd kernel - - - "); - -// CreateContextCallback cb = new CreateContextCallback() { -// @Override -// public void createContextCallback(String errinfo, ByteBuffer private_info, long cb, Object user_data) { -// throw new RuntimeException("not yet implemented..."); -// } -// }; - - long[] longArray = new long[1]; - ByteBuffer bb = ByteBuffer.allocate(4096).order(ByteOrder.nativeOrder()); - - CL cl = CLPlatform.getLowLevelBinding(); - - int ret = CL.CL_SUCCESS; - int[] intArray = new int[1]; - - //TODO properties not allowed to be null - long context = cl.clCreateContextFromType(null, CL.CL_DEVICE_TYPE_ALL, null, null, null); - out.println("context handle: "+context); - - ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, 0, null, longArray, 0); - checkError("on clGetContextInfo", ret); - - int sizeofLong = (CPU.is32Bit()?4:8); - out.println("context created with " + longArray[0]/sizeofLong + " devices"); - - ret = cl.clGetContextInfo(context, CL.CL_CONTEXT_DEVICES, bb.capacity(), bb, null, 0); - checkError("on clGetContextInfo", ret); - - for (int i = 0; i < longArray[0]/sizeofLong; i++) { - out.println("device id: "+bb.getLong()); - } - - long firstDeviceID = bb.getLong(0); - - // Create a command-queue - long commandQueue = cl.clCreateCommandQueue(context, firstDeviceID, 0, intArray, 0); - checkError("on clCreateCommandQueue", intArray[0]); - - int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) - int localWorkSize = 256; // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - - // Allocate the OpenCL buffer memory objects for source and result on the device GMEM - long devSrcA = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, srcA.capacity(), null, intArray, 0); - checkError("on clCreateBuffer", intArray[0]); - long devSrcB = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, srcB.capacity(), null, intArray, 0); - checkError("on clCreateBuffer", intArray[0]); - long devDst = cl.clCreateBuffer(context, CL.CL_MEM_WRITE_ONLY, dest.capacity(), null, intArray, 0); - checkError("on clCreateBuffer", intArray[0]); - - - // Create the program - long program = cl.clCreateProgramWithSource(context, 1, new String[] {programSource}, new long[]{programSource.length()}, 0, intArray, 0); - checkError("on clCreateProgramWithSource", intArray[0]); - - // Build the program - ret = cl.clBuildProgram(program, null, null, null, null); - checkError("on clBuildProgram", ret); - - // Read program infos - bb.rewind(); - ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_NUM_DEVICES, bb.capacity(), bb, null, 0); - checkError("on clGetProgramInfo1", ret); - out.println("program associated with "+bb.getInt(0)+" device(s)"); - - ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, 0, null, longArray, 0); - checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); - out.println("program source length (cl): "+longArray[0]); - out.println("program source length (java): "+programSource.length()); - - bb.rewind(); - ret = cl.clGetProgramInfo(program, CL.CL_PROGRAM_SOURCE, bb.capacity(), bb, null, 0); - checkError("on clGetProgramInfo CL_PROGRAM_SOURCE", ret); - out.println("program source:\n" + clString2JavaString(bb.array(), (int)longArray[0])); - - // Check program status - Arrays.fill(longArray, 42); - bb.rewind(); - ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_STATUS, bb.capacity(), bb, null, 0); - checkError("on clGetProgramBuildInfo1", ret); - - out.println("program build status: " + CLProgram.Status.valueOf(bb.getInt(0))); - assertEquals("build status", CL.CL_BUILD_SUCCESS, bb.getInt(0)); - - // Read build log - ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_LOG, 0, null, longArray, 0); - checkError("on clGetProgramBuildInfo2", ret); - out.println("program log length: " + longArray[0]); - - bb.rewind(); - ret = cl.clGetProgramBuildInfo(program, firstDeviceID, CL.CL_PROGRAM_BUILD_LOG, bb.capacity(), bb, null, 0); - checkError("on clGetProgramBuildInfo3", ret); - out.println("log:\n" + clString2JavaString(bb.array(), (int)longArray[0])); - - // Create the kernel - Arrays.fill(intArray, 42); - long kernel = cl.clCreateKernel(program, "VectorAdd", intArray, 0); - checkError("on clCreateKernel", intArray[0]); - -// srcA.limit(elementCount*SIZEOF_FLOAT); -// srcB.limit(elementCount*SIZEOF_FLOAT); - - fillBuffer(srcA, 23456); - fillBuffer(srcB, 46987); - - // Set the Argument values - ret = cl.clSetKernelArg(kernel, 0, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcA)); checkError("on clSetKernelArg0", ret); - ret = cl.clSetKernelArg(kernel, 1, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcB)); checkError("on clSetKernelArg1", ret); - ret = cl.clSetKernelArg(kernel, 2, CPU.is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devDst)); checkError("on clSetKernelArg2", ret); - ret = cl.clSetKernelArg(kernel, 3, SIZEOF_INT, wrap(elementCount)); checkError("on clSetKernelArg3", ret); - - out.println("used device memory: "+ (srcA.capacity()+srcB.capacity()+dest.capacity())/1000000 +"MB"); - - // Asynchronous write of data to GPU device - ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcA, CL.CL_FALSE, 0, srcA.capacity(), srcA, 0, null, null); - checkError("on clEnqueueWriteBuffer", ret); - ret = cl.clEnqueueWriteBuffer(commandQueue, devSrcB, CL.CL_FALSE, 0, srcB.capacity(), srcB, 0, null, null); - checkError("on clEnqueueWriteBuffer", ret); - - // Launch kernel - PointerBuffer gWS = PointerBuffer.allocateDirect(1).put(globalWorkSize).rewind(); - PointerBuffer lWS = PointerBuffer.allocateDirect(1).put(localWorkSize).rewind(); - ret = cl.clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, gWS, lWS, 0, null, null); - checkError("on clEnqueueNDRangeKernel", ret); - - // Synchronous/blocking read of results - ret = cl.clEnqueueReadBuffer(commandQueue, devDst, CL.CL_TRUE, 0, dest.capacity(), dest, 0, null, null); - checkError("on clEnqueueReadBuffer", ret); - - out.println("a+b=c result snapshot: "); - for(int i = 0; i < 10; i++) - out.print(dest.getInt()+", "); - out.println("...; "+dest.remaining()/SIZEOF_INT + " more"); - - - // cleanup - ret = cl.clReleaseCommandQueue(commandQueue); - checkError("on clReleaseCommandQueue", ret); - - ret = cl.clReleaseMemObject(devSrcA); - checkError("on clReleaseMemObject", ret); - ret = cl.clReleaseMemObject(devSrcB); - checkError("on clReleaseMemObject", ret); - ret = cl.clReleaseMemObject(devDst); - checkError("on clReleaseMemObject", ret); - - ret = cl.clReleaseProgram(program); - checkError("on clReleaseProgram", ret); - - ret = cl.clReleaseKernel(kernel); - checkError("on clReleaseKernel", ret); - - ret = cl.clUnloadCompiler(); - checkError("on clUnloadCompiler", ret); - - ret = cl.clReleaseContext(context); - checkError("on clReleaseContext", ret); - - } - - @Test - public void loadTest() { - //for memory leak detection; e.g watch out for "out of host memory" errors - out.println(" - - - loadTest - - - "); - for(int i = 0; i < 100; i++) { - out.println("###iteration "+i); - lowLevelVectorAddTest(); - } - } -*/ - private ByteBuffer wrap(long value) { - return (ByteBuffer) newDirectByteBuffer(8).putLong(value).rewind(); - } - - private final void checkForError(int ret) { - this.checkError("", ret); - } - - private final void checkError(String msg, int ret) { - if(ret != CL.CL_SUCCESS) - throw CLException.newException(ret, msg); - } - - -} \ No newline at end of file diff --git a/test/com/mbien/opencl/TestUtils.java b/test/com/mbien/opencl/TestUtils.java deleted file mode 100644 index 803474e5..00000000 --- a/test/com/mbien/opencl/TestUtils.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.mbien.opencl; - -import java.nio.ByteBuffer; -import java.util.Random; - -import static java.lang.System.*; -import static org.junit.Assert.*; - -/** - * @author Michael Bien - */ -public class TestUtils { - - //decrease this value on systems with few memory. - final static int ONE_MB = 1048576; - - final static int NUM_ELEMENTS = 10000000; - - public static final void fillBuffer(ByteBuffer buffer, int seed) { - - Random rnd = new Random(seed); - - while(buffer.remaining() != 0) - buffer.putInt(rnd.nextInt()); - - buffer.rewind(); - } - - public static final int roundUp(int groupSize, int globalSize) { - int r = globalSize % groupSize; - if (r == 0) { - return globalSize; - } else { - return globalSize + groupSize - r; - } - } - - public static final void checkIfEqual(ByteBuffer a, ByteBuffer b, int elements) { - for(int i = 0; i < elements; i++) { - int aVal = a.getInt(); - int bVal = b.getInt(); - if(aVal != bVal) { - out.println("a: "+aVal); - out.println("b: "+bVal); - out.println("position: "+a.position()); - fail("a!=b"); - } - } - a.rewind(); - b.rewind(); - } -} diff --git a/test/com/mbien/opencl/testkernels.cl b/test/com/mbien/opencl/testkernels.cl deleted file mode 100644 index ec7e8bf6..00000000 --- a/test/com/mbien/opencl/testkernels.cl +++ /dev/null @@ -1,22 +0,0 @@ - - // OpenCL Kernel Function for element by element vector addition - kernel void VectorAddGM(global const int* a, global const int* b, global int* c, int iNumElements) { - // get index into global data array - int iGID = get_global_id(0); - // bound check (equivalent to the limit on a 'for' loop for standard/serial C code - if (iGID >= iNumElements) { - return; - } - // add the vector elements - c[iGID] = a[iGID] + b[iGID]; - } - - kernel void Test(global const int* a, global const int* b, global int* c, int iNumElements) { - // get index into global data array - int iGID = get_global_id(0); - // bound check (equivalent to the limit on a 'for' loop for standard/serial C code - if (iGID >= iNumElements) { - return; - } - c[iGID] = iGID; - } -- cgit v1.2.3