diff options
-rw-r--r-- | src/com/jogamp/opencl/CLCommandQueue.java | 24 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLKernel.java | 9 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLWork.java | 260 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLProgramTest.java | 36 |
4 files changed, 329 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/CLCommandQueue.java b/src/com/jogamp/opencl/CLCommandQueue.java index 4878b4fa..b5ea41b2 100644 --- a/src/com/jogamp/opencl/CLCommandQueue.java +++ b/src/com/jogamp/opencl/CLCommandQueue.java @@ -1628,6 +1628,30 @@ public class CLCommandQueue extends CLObjectResource { } /** + * Calls {@native clEnqueueNDRangeKernel}. + */ + public CLCommandQueue putWork(CLWork work) { + this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), null, null); + return this; + } + + /** + * Calls {@native clEnqueueNDRangeKernel}. + */ + public CLCommandQueue putWork(CLWork work, CLEventList events) { + this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), null, events); + return this; + } + + /** + * Calls {@native clEnqueueNDRangeKernel}. + */ + public CLCommandQueue putWork(CLWork work, CLEventList condition, CLEventList events) { + this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), condition, events); + return this; + } + + /** * Calls {@native clEnqueueAcquireGLObjects}. */ public CLCommandQueue putAcquireGLObject(CLGLObject glObject) { diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java index 0f7df9b3..db1b0642 100644 --- a/src/com/jogamp/opencl/CLKernel.java +++ b/src/com/jogamp/opencl/CLKernel.java @@ -609,6 +609,15 @@ public class CLKernel extends CLObjectResource implements Cloneable { } /** + * Returns the preferred multiple of workgroup size for launch on the supplied device. + * This is a performance hint. + * @since OpenCL 1.1 + */ + public long getPreferredWorkGroupSizeMultiple(CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + } + + /** * Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources. * If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned. * The returned array has always three elements. diff --git a/src/com/jogamp/opencl/CLWork.java b/src/com/jogamp/opencl/CLWork.java new file mode 100644 index 00000000..786a2378 --- /dev/null +++ b/src/com/jogamp/opencl/CLWork.java @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2011, Michael Bien + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * Created on Monday, July 25 2011 20:51 + */ +package com.jogamp.opencl; + +import com.jogamp.common.nio.CachedBufferFactory; +import com.jogamp.common.nio.NativeSizeBuffer; + + +/** + * CLWork represents a N dimensional range of work items using a specific {@link CLKernel}. + * + * @see #create1D(com.jogamp.opencl.CLKernel) + * @see #create2D(com.jogamp.opencl.CLKernel) + * @see #create2D(com.jogamp.opencl.CLKernel) + * + * @author Michael Bien + */ +public class CLWork { + + protected final int dimension; + protected final CLKernel kernel; + + protected final NativeSizeBuffer groupSize; + protected final NativeSizeBuffer workSize; + protected final NativeSizeBuffer workOffset; + + protected CLWork(CLKernel kernel, int dimension) { + + int size = dimension * NativeSizeBuffer.elementSize(); + + CachedBufferFactory factory = CachedBufferFactory.create(3*size, true); + this.workOffset = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); + this.workSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); + this.groupSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); + + this.dimension = dimension; + this.kernel = kernel; + } + + /** + * Creates work representing a 1D range of work items using the supplied kernel. + */ + public static CLWork1D create1D(CLKernel kernel) { + return new CLWork1D(kernel); + } + + /** + * Creates work representing a 2D range of work items using the supplied kernel. + */ + public static CLWork2D create2D(CLKernel kernel) { + return new CLWork2D(kernel); + } + + /** + * Creates work representing a 3D range of work items using the supplied kernel. + */ + public static CLWork3D create3D(CLKernel kernel) { + return new CLWork3D(kernel); + } + + public static long roundUp(long value, long multiple) { + long remaining = value % multiple; + if (remaining == 0) { + return value; + } else { + return value + multiple - remaining; + } + } + + protected void checkSize(long worksize, long groupsize) { + if(groupsize != 0 && worksize%groupsize != 0) { + throw new IllegalArgumentException("worksize must be a multiple of groupsize"); + } + } + + /** + * Optimizes the work sizes by rounding to the next device-specific preferred multiple. + * This optimization can break kernels if they where designed to be executed with a fixed + * group or worksize. This method will do nothing if no group size has been specified. + * @since OpenCL 1.1 + */ + public CLWork optimizeFor(CLDevice device) { + + long multiple = kernel.getPreferredWorkGroupSizeMultiple(device); + int[] maxWorkItemSizes = device.getMaxWorkItemSizes(); + + for (int i = 0; i < dimension; i++) { + long group = groupSize.get(i); + if(group > 0) { + group = roundUp(group, multiple); + if(group <= maxWorkItemSizes[i]) { + groupSize.put(i, group); + + long work = workSize.get(i); + workSize.put(i, roundUp(work, group)); + } + //else {can not optimize} + } + } + return this; + } + + public CLKernel getKernel() { + return kernel; + } + + public int getDimension() { + return dimension; + } + + public NativeSizeBuffer getGroupSize() { + return groupSize; + } + + public NativeSizeBuffer getWorkOffset() { + return workOffset; + } + + public NativeSizeBuffer getWorkSize() { + return workSize; + } + + /** + * 1 dimensional {@link CLWork}. + * @author Michael Bien + */ + public static class CLWork1D extends CLWork { + + private CLWork1D(CLKernel kernel) { + super(kernel, 1); + } + + public CLWork1D setWorkOffset(long offset) { + workOffset.put(0, offset); + return this; + } + + public CLWork1D setWorkSize(long worksize) { + setWorkSize(worksize, 0); + return this; + } + + public CLWork1D setWorkSize(long worksize, long groupsize) { + checkSize(worksize, groupsize); + workSize.put(0, worksize); + groupSize.put(0, groupsize); + return this; + } + + @Override + public CLWork1D optimizeFor(CLDevice device) { + super.optimizeFor(device); + return this; + } + + } + + + /** + * 2 dimensional {@link CLWork}. + * @author Michael Bien + */ + public static class CLWork2D extends CLWork { + + private CLWork2D(CLKernel kernel) { + super(kernel, 2); + } + + public CLWork2D setWorkOffset(long offsetX, long offsetY) { + workOffset.put(0, offsetX).put(1, offsetY); + return this; + } + + public CLWork2D setWorkSize(long worksizeX, long worksizeY) { + setWorkSize(worksizeX, worksizeY, 0, 0); + return this; + } + + public CLWork2D setWorkSize(long worksizeX, long worksizeY, long groupsizeX, long groupsizeY) { + checkSize(worksizeX, groupsizeX); + checkSize(worksizeY, groupsizeY); + workSize.put(0, worksizeX).put(1, worksizeY); + groupSize.put(0, groupsizeX).put(1, groupsizeY); + return this; + } + + @Override + public CLWork2D optimizeFor(CLDevice device) { + super.optimizeFor(device); + return this; + } + + } + + /** + * 3 dimensional {@link CLWork}. + * @author Michael Bien + */ + public static class CLWork3D extends CLWork { + + private CLWork3D(CLKernel kernel) { + super(kernel, 3); + } + + public CLWork3D setWorkOffset(long offsetX, long offsetY, long offsetZ) { + workOffset.put(0, offsetX).put(1, offsetY).put(2, offsetZ); + return this; + } + + public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ) { + setWorkSize(worksizeX, worksizeY, worksizeZ, 0, 0, 0); + return this; + } + + public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ, long groupsizeX, long groupsizeY, long groupsizeZ) { + checkSize(worksizeX, groupsizeX); + checkSize(worksizeY, groupsizeY); + checkSize(worksizeZ, groupsizeZ); + workSize.put(0, worksizeX).put(1, worksizeY).put(2, worksizeZ); + groupSize.put(0, groupsizeX).put(1, groupsizeY).put(2, groupsizeZ); + return this; + } + + @Override + public CLWork3D optimizeFor(CLDevice device) { + super.optimizeFor(device); + return this; + } + + } + + +} diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java index 3c8ef8ba..47eb42e0 100644 --- a/test/com/jogamp/opencl/CLProgramTest.java +++ b/test/com/jogamp/opencl/CLProgramTest.java @@ -29,6 +29,7 @@ package com.jogamp.opencl; import com.jogamp.common.nio.Buffers; +import com.jogamp.opencl.CLWork.CLWork1D; import com.jogamp.opencl.util.CLBuildConfiguration; import com.jogamp.opencl.util.CLProgramConfiguration; import com.jogamp.opencl.CLProgram.Status; @@ -41,6 +42,7 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.nio.FloatBuffer; +import java.nio.IntBuffer; import java.util.Map; import java.util.Random; import java.util.concurrent.CountDownLatch; @@ -51,6 +53,8 @@ import org.junit.rules.TemporaryFolder; import static org.junit.Assert.*; import static java.lang.System.*; import static com.jogamp.opencl.CLProgram.CompilerOptions.*; +import static com.jogamp.opencl.util.CLPlatformFilters.*; +import static com.jogamp.opencl.CLVersion.*; /** * @@ -416,6 +420,38 @@ public class CLProgramTest { } + @Test + public void workTest() throws IOException { + + CLContext context = CLContext.create(CLPlatform.getDefault(version(CL_1_1))); + + try{ + CLProgram program = context.createProgram(CLProgramTest.class.getResourceAsStream("testkernels.cl")).build(); + + CLDevice device = context.getMaxFlopsDevice(); + out.println(device); + CLCommandQueue queue = device.createCommandQueue(); + + CLBuffer<IntBuffer> buffer = context.createIntBuffer(20); + + CLWork1D work = CLWork.create1D(program.createCLKernel("add")); + work.getKernel().setArgs(buffer, 5, buffer.getNIOCapacity()); + work.setWorkSize(20, 1).optimizeFor(device); + + queue.putWriteBuffer(buffer, false) + .putWork(work) + .putReadBuffer(buffer, true); + + while(buffer.getBuffer().hasRemaining()) { + assertEquals(5, buffer.getBuffer().get()); + } + + }finally{ + context.release(); + } + + } + // @Test public void loadTest() throws IOException, ClassNotFoundException, InterruptedException { for(int i = 0; i < 100; i++) { |