/* * Copyright 2010 JogAmp Community. All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this list of * conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, this list * of conditions and the following disclaimer in the documentation and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY JogAmp Community ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JogAmp Community OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are those of the * authors and should not be interpreted as representing official policies, either expressed * or implied, of JogAmp Community. */ package com.jogamp.opencl; import org.junit.Rule; import org.junit.rules.MethodRule; import org.junit.rules.Timeout; import java.util.concurrent.CountDownLatch; import com.jogamp.opencl.util.MultiQueueBarrier; import com.jogamp.opencl.CLCommandQueue.Mode; import com.jogamp.opencl.CLMemory.Mem; import com.jogamp.opencl.util.CLDeviceFilters; import com.jogamp.opencl.util.CLPlatformFilters; import com.jogamp.opencl.llb.CL; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.EnumSet; import java.util.concurrent.TimeUnit; import org.junit.Test; import static org.junit.Assert.*; import static java.lang.System.*; import static com.jogamp.opencl.TestUtils.*; import static com.jogamp.opencl.CLEvent.*; import static com.jogamp.opencl.CLVersion.*; import static com.jogamp.common.nio.Buffers.*; import static com.jogamp.opencl.util.CLUtil.*; import static com.jogamp.opencl.CLCommandQueue.Mode.*; /** * * @author Michael Bien */ public class CLCommandQueueTest { @Rule public MethodRule methodTimeout= new Timeout(20000); @Test public void enumsTest() { //CLCommandQueueEnums EnumSet queueMode = Mode.valuesOf(CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL.CL_QUEUE_PROFILING_ENABLE); assertTrue(queueMode.contains(OUT_OF_ORDER_MODE)); assertTrue(queueMode.contains(PROFILING_MODE)); assertNotNull(Mode.valuesOf(0)); assertEquals(0, Mode.valuesOf(0).size()); for (Mode mode : Mode.values()) { assertEquals(mode, Mode.valueOf(mode.QUEUE_MODE)); } // CLEvent enums for (ProfilingCommand cmd : ProfilingCommand.values()) { assertEquals(cmd, ProfilingCommand.valueOf(cmd.COMMAND)); } for (CommandType type : CommandType.values()) { assertEquals(type, CommandType.valueOf(type.TYPE)); } for (ExecutionStatus status : ExecutionStatus.values()) { assertEquals(status, ExecutionStatus.valueOf(status.STATUS)); } } @Test public void eventsTest() throws IOException { out.println(" - - - event synchronization test - - - "); CLContext context = CLContext.create(); try{ CLDevice device = context.getDevices()[0]; int groupSize = device.getMaxWorkItemSizes()[0]; final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); fillBuffer(clBufferA.buffer, 12345); fillBuffer(clBufferB.buffer, 67890); CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); CLCommandQueue queue = device.createCommandQueue(); out.println(queue); final CLEventList events = new CLEventList(2); out.println(events); assertEquals(0, events.size()); queue.putWriteBuffer(clBufferA, false, events) // write A .putWriteBuffer(clBufferB, false, events);// write B out.println(events); assertEquals(2, events.size()); queue.putWaitForEvents(events, true); events.release(); assertEquals(0, events.size()); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferD); // D = A+B queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); assertEquals(2, events.size()); queue.putWaitForEvent(events, 0, true) .putWaitForEvent(events, 1, true); events.release(); queue.putReadBuffer(clBufferC, false, events) .putReadBuffer(clBufferD, false, events); queue.putWaitForEvents(events, true); events.release(); checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); out.println("results are valid"); }finally{ context.release(); } } @Test public void eventConditionsTest() throws IOException { out.println(" - - - event conditions test - - - "); CLPlatform platform = CLPlatform.getDefault(CLPlatformFilters.queueMode(OUT_OF_ORDER_MODE)); CLDevice device = null; // we can still test this with in-order queues if(platform == null) { device = CLPlatform.getDefault().getMaxFlopsDevice(); }else{ device = platform.getMaxFlopsDevice(CLDeviceFilters.queueMode(OUT_OF_ORDER_MODE)); } CLContext context = CLContext.create(device); try{ CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); CLBuffer buffer = context.createBuffer(newDirectIntBuffer(new int[]{ 1,1,1, 1,1,1, 1,1,1 })); int elements = buffer.getNIOCapacity(); CLCommandQueue queue; if(device.getQueueProperties().contains(OUT_OF_ORDER_MODE)) { queue = device.createCommandQueue(OUT_OF_ORDER_MODE); }else{ queue = device.createCommandQueue(); } // simulate in-order queue by accumulating events of prior commands CLEventList events = new CLEventList(3); // (1+1)*2 = 4; conditions enforce propper order CLKernel addKernel = program.createCLKernel("add").putArg(buffer).putArg(1).putArg(elements); CLKernel mulKernel = program.createCLKernel("mul").putArg(buffer).putArg(2).putArg(elements); queue.putWriteBuffer(buffer, false, events); queue.put1DRangeKernel(addKernel, 0, elements, 1, events, events); queue.put1DRangeKernel(mulKernel, 0, elements, 1, events, events); queue.putReadBuffer(buffer, false, events, null); queue.finish(); events.release(); for (int i = 0; i < elements; i++) { assertEquals(4, buffer.getBuffer().get(i)); } }finally{ context.release(); } } @Test public void profilingEventsTest() throws IOException { out.println(" - - - event synchronization test - - - "); CLContext context = CLContext.create(); try { CLDevice device = context.getDevices()[0]; int groupSize = device.getMaxWorkItemSizes()[0]; final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); fillBuffer(clBufferA.buffer, 12345); fillBuffer(clBufferB.buffer, 67890); CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); CLCommandQueue queue = device.createCommandQueue(PROFILING_MODE); out.println(queue); queue.putWriteBuffer(clBufferA, true) // write A .putWriteBuffer(clBufferB, true);// write B final CLEventList events = new CLEventList(1); assertEquals(0, events.size()); vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); assertEquals(1, events.size()); CLEvent probe = events.getEvent(0); out.println(probe); queue.putWaitForEvents(events, true); assertEquals(CLEvent.ExecutionStatus.COMPLETE, probe.getStatus()); out.println(probe); long time = probe.getProfilingInfo(CLEvent.ProfilingCommand.END) - probe.getProfilingInfo(CLEvent.ProfilingCommand.START); out.println("time: "+time); assertTrue(time > 0); events.release(); }finally{ context.release(); } } @Test public void customEventsTest() throws IOException, InterruptedException { out.println(" - - - user events test - - - "); CLPlatform[] platforms = CLPlatform.listCLPlatforms(); CLPlatform theChosenOne = platforms[0]; for (CLPlatform platform : platforms) { if(platform.isAtLeast(CL_1_1)) { theChosenOne = platform; break; } } if(!theChosenOne.isAtLeast(CL_1_1)) { out.println("test disabled, required CLVersion: "+CL_1_1+" available: "+theChosenOne.getVersion()); return; } final CLContext context = CLContext.create(theChosenOne); try{ CLDevice device = context.getDevices()[0]; int groupSize = device.getMaxWorkItemSizes()[0]; final int elements = roundUp(ONE_MB / SIZEOF_INT * 5, groupSize); // 5MB per buffer CLBuffer clBufferA = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferB = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); fillBuffer(clBufferA.buffer, 12345); fillBuffer(clBufferB.buffer, 67890); CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); CLCommandQueue queue = device.createCommandQueue(); queue.putWriteBuffer(clBufferA, true) // write A .putWriteBuffer(clBufferB, true);// write B vectorAddKernel.setArgs(clBufferA, clBufferB, clBufferC); // C = A+B // the interesting part... CLUserEvent condition = CLUserEvent.create(context); assertEquals(CommandType.USER, condition.getType()); assertEquals(ExecutionStatus.SUBMITTED, condition.getStatus()); out.println(condition); final CLEventList conditions = new CLEventList(condition); final CLEventList events = new CLEventList(1); assertEquals(1, conditions.size()); assertEquals(1, conditions.capacity()); assertEquals(0, events.size()); assertEquals(1, events.capacity()); queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, conditions, events); assertEquals(1, events.size()); Thread.sleep(1000); final CLEvent status = events.getEvent(0); assertEquals(ExecutionStatus.QUEUED, status.getStatus()); condition.setComplete(); assertTrue(condition.isComplete()); queue.finish(); assertTrue(status.isComplete()); }finally{ context.release(); } } @Test public void eventCallbackTest() throws InterruptedException { out.println(" - - - event callback test - - - "); CLPlatform platform = CLPlatform.getDefault(); if(!platform.isAtLeast(CL_1_1)) { out.println("test disabled, required CLVersion: "+CL_1_1+" available: "+platform.getVersion()); return; } CLContext context = CLContext.create(); try{ final CLUserEvent customEvent = CLUserEvent.create(context); final CountDownLatch countdown = new CountDownLatch(1); customEvent.registerCallback(new CLEventListener() { @Override public void eventStateChanged(CLEvent event, int status) { out.println("event received: "+event); assertEquals(event, customEvent); countdown.countDown(); } }); customEvent.setStatus(ExecutionStatus.COMPLETE); countdown.await(2, TimeUnit.SECONDS); assertEquals(countdown.getCount(), 0); customEvent.release(); }finally{ context.release(); } } @Test public void concurrencyTest() throws IOException, InterruptedException { out.println(" - - - QueueBarrier test - - - "); final int elements = ONE_MB / SIZEOF_INT * 10; // 20MB per buffer CLContext context = CLContext.create(); try{ CLDevice[] devices = context.getDevices(); // ignore this test if we can't test in parallel if (devices.length < 2) { out.println("aborting test... need at least 2 devices"); return; } final CLBuffer clBufferC = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer clBufferD = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer clBufferA1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer clBufferB1 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer clBufferA2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); final CLBuffer clBufferB2 = context.createByteBuffer(elements * SIZEOF_INT, Mem.READ_ONLY); CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); //two independent kernel instances final CLKernel vectorAddKernel1 = program.createCLKernel("VectorAddGM").setArg(3, elements); final CLKernel vectorAddKernel2 = program.createCLKernel("VectorAddGM").setArg(3, elements); final CLCommandQueue queue1 = devices[0].createCommandQueue(); final CLCommandQueue queue2 = devices[1].createCommandQueue(); out.println(queue1); out.println(queue2); fillBuffer(clBufferC.buffer, 12345); final MultiQueueBarrier barrier = new MultiQueueBarrier(2); Thread thread1 = new Thread("C") { @Override public void run() { int groupSize = queue1.getDevice().getMaxWorkItemSizes()[0]; fillBuffer(clBufferA1.buffer, 12345); fillBuffer(clBufferB1.buffer, 67890); // System.out.println("C buffer"); queue1.putWriteBuffer(clBufferA1, false) // write A .putWriteBuffer(clBufferB1, false); // write B // System.out.println("C args"); vectorAddKernel1.setArgs(clBufferA1, clBufferB1, clBufferC); // C = A+B // System.out.println("C kernels"); CLEventList events1 = new CLEventList(2); queue1.put1DRangeKernel(vectorAddKernel1, 0, elements, groupSize, events1) .putReadBuffer(clBufferC, false, events1); barrier.waitFor(queue1, events1); } }; Thread thread2 = new Thread("D") { @Override public void run() { int groupSize = queue2.getDevice().getMaxWorkItemSizes()[0]; fillBuffer(clBufferA2.buffer, 12345); fillBuffer(clBufferB2.buffer, 67890); // System.out.println("D buffer"); queue2.putWriteBuffer(clBufferA2, false) // write A .putWriteBuffer(clBufferB2, false); // write B // System.out.println("D args"); vectorAddKernel2.setArgs(clBufferA2, clBufferB2, clBufferD); // D = A+B // System.out.println("D kernels"); CLEventList events2 = new CLEventList(2); queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, groupSize, events2); queue2.putReadBuffer(clBufferD, false, events2); barrier.waitFor(queue2, events2); } }; out.println("starting threads"); thread1.start(); thread2.start(); assertTrue(barrier.await(5, TimeUnit.SECONDS)); out.println("done"); checkIfEqual(clBufferC.buffer, clBufferD.buffer, elements); out.println("results are valid"); }finally{ context.release(); } } }