summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/com/jogamp/opencl/CLCommandQueue.java24
-rw-r--r--src/com/jogamp/opencl/CLKernel.java9
-rw-r--r--src/com/jogamp/opencl/CLWork.java260
-rw-r--r--test/com/jogamp/opencl/CLProgramTest.java36
4 files changed, 329 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/CLCommandQueue.java b/src/com/jogamp/opencl/CLCommandQueue.java
index 4878b4fa..b5ea41b2 100644
--- a/src/com/jogamp/opencl/CLCommandQueue.java
+++ b/src/com/jogamp/opencl/CLCommandQueue.java
@@ -1628,6 +1628,30 @@ public class CLCommandQueue extends CLObjectResource {
}
/**
+ * Calls {@native clEnqueueNDRangeKernel}.
+ */
+ public CLCommandQueue putWork(CLWork work) {
+ this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), null, null);
+ return this;
+ }
+
+ /**
+ * Calls {@native clEnqueueNDRangeKernel}.
+ */
+ public CLCommandQueue putWork(CLWork work, CLEventList events) {
+ this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), null, events);
+ return this;
+ }
+
+ /**
+ * Calls {@native clEnqueueNDRangeKernel}.
+ */
+ public CLCommandQueue putWork(CLWork work, CLEventList condition, CLEventList events) {
+ this.putNDRangeKernel(work.getKernel(), work.getDimension(), work.getWorkOffset(), work.getWorkSize(), work.getGroupSize(), condition, events);
+ return this;
+ }
+
+ /**
* Calls {@native clEnqueueAcquireGLObjects}.
*/
public CLCommandQueue putAcquireGLObject(CLGLObject glObject) {
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java
index 0f7df9b3..db1b0642 100644
--- a/src/com/jogamp/opencl/CLKernel.java
+++ b/src/com/jogamp/opencl/CLKernel.java
@@ -609,6 +609,15 @@ public class CLKernel extends CLObjectResource implements Cloneable {
}
/**
+ * Returns the preferred multiple of workgroup size for launch on the supplied device.
+ * This is a performance hint.
+ * @since OpenCL 1.1
+ */
+ public long getPreferredWorkGroupSizeMultiple(CLDevice device) {
+ return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
+ }
+
+ /**
* Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources.
* If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned.
* The returned array has always three elements.
diff --git a/src/com/jogamp/opencl/CLWork.java b/src/com/jogamp/opencl/CLWork.java
new file mode 100644
index 00000000..786a2378
--- /dev/null
+++ b/src/com/jogamp/opencl/CLWork.java
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2011, Michael Bien
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Created on Monday, July 25 2011 20:51
+ */
+package com.jogamp.opencl;
+
+import com.jogamp.common.nio.CachedBufferFactory;
+import com.jogamp.common.nio.NativeSizeBuffer;
+
+
+/**
+ * CLWork represents a N dimensional range of work items using a specific {@link CLKernel}.
+ *
+ * @see #create1D(com.jogamp.opencl.CLKernel)
+ * @see #create2D(com.jogamp.opencl.CLKernel)
+ * @see #create2D(com.jogamp.opencl.CLKernel)
+ *
+ * @author Michael Bien
+ */
+public class CLWork {
+
+ protected final int dimension;
+ protected final CLKernel kernel;
+
+ protected final NativeSizeBuffer groupSize;
+ protected final NativeSizeBuffer workSize;
+ protected final NativeSizeBuffer workOffset;
+
+ protected CLWork(CLKernel kernel, int dimension) {
+
+ int size = dimension * NativeSizeBuffer.elementSize();
+
+ CachedBufferFactory factory = CachedBufferFactory.create(3*size, true);
+ this.workOffset = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size));
+ this.workSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size));
+ this.groupSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size));
+
+ this.dimension = dimension;
+ this.kernel = kernel;
+ }
+
+ /**
+ * Creates work representing a 1D range of work items using the supplied kernel.
+ */
+ public static CLWork1D create1D(CLKernel kernel) {
+ return new CLWork1D(kernel);
+ }
+
+ /**
+ * Creates work representing a 2D range of work items using the supplied kernel.
+ */
+ public static CLWork2D create2D(CLKernel kernel) {
+ return new CLWork2D(kernel);
+ }
+
+ /**
+ * Creates work representing a 3D range of work items using the supplied kernel.
+ */
+ public static CLWork3D create3D(CLKernel kernel) {
+ return new CLWork3D(kernel);
+ }
+
+ public static long roundUp(long value, long multiple) {
+ long remaining = value % multiple;
+ if (remaining == 0) {
+ return value;
+ } else {
+ return value + multiple - remaining;
+ }
+ }
+
+ protected void checkSize(long worksize, long groupsize) {
+ if(groupsize != 0 && worksize%groupsize != 0) {
+ throw new IllegalArgumentException("worksize must be a multiple of groupsize");
+ }
+ }
+
+ /**
+ * Optimizes the work sizes by rounding to the next device-specific preferred multiple.
+ * This optimization can break kernels if they where designed to be executed with a fixed
+ * group or worksize. This method will do nothing if no group size has been specified.
+ * @since OpenCL 1.1
+ */
+ public CLWork optimizeFor(CLDevice device) {
+
+ long multiple = kernel.getPreferredWorkGroupSizeMultiple(device);
+ int[] maxWorkItemSizes = device.getMaxWorkItemSizes();
+
+ for (int i = 0; i < dimension; i++) {
+ long group = groupSize.get(i);
+ if(group > 0) {
+ group = roundUp(group, multiple);
+ if(group <= maxWorkItemSizes[i]) {
+ groupSize.put(i, group);
+
+ long work = workSize.get(i);
+ workSize.put(i, roundUp(work, group));
+ }
+ //else {can not optimize}
+ }
+ }
+ return this;
+ }
+
+ public CLKernel getKernel() {
+ return kernel;
+ }
+
+ public int getDimension() {
+ return dimension;
+ }
+
+ public NativeSizeBuffer getGroupSize() {
+ return groupSize;
+ }
+
+ public NativeSizeBuffer getWorkOffset() {
+ return workOffset;
+ }
+
+ public NativeSizeBuffer getWorkSize() {
+ return workSize;
+ }
+
+ /**
+ * 1 dimensional {@link CLWork}.
+ * @author Michael Bien
+ */
+ public static class CLWork1D extends CLWork {
+
+ private CLWork1D(CLKernel kernel) {
+ super(kernel, 1);
+ }
+
+ public CLWork1D setWorkOffset(long offset) {
+ workOffset.put(0, offset);
+ return this;
+ }
+
+ public CLWork1D setWorkSize(long worksize) {
+ setWorkSize(worksize, 0);
+ return this;
+ }
+
+ public CLWork1D setWorkSize(long worksize, long groupsize) {
+ checkSize(worksize, groupsize);
+ workSize.put(0, worksize);
+ groupSize.put(0, groupsize);
+ return this;
+ }
+
+ @Override
+ public CLWork1D optimizeFor(CLDevice device) {
+ super.optimizeFor(device);
+ return this;
+ }
+
+ }
+
+
+ /**
+ * 2 dimensional {@link CLWork}.
+ * @author Michael Bien
+ */
+ public static class CLWork2D extends CLWork {
+
+ private CLWork2D(CLKernel kernel) {
+ super(kernel, 2);
+ }
+
+ public CLWork2D setWorkOffset(long offsetX, long offsetY) {
+ workOffset.put(0, offsetX).put(1, offsetY);
+ return this;
+ }
+
+ public CLWork2D setWorkSize(long worksizeX, long worksizeY) {
+ setWorkSize(worksizeX, worksizeY, 0, 0);
+ return this;
+ }
+
+ public CLWork2D setWorkSize(long worksizeX, long worksizeY, long groupsizeX, long groupsizeY) {
+ checkSize(worksizeX, groupsizeX);
+ checkSize(worksizeY, groupsizeY);
+ workSize.put(0, worksizeX).put(1, worksizeY);
+ groupSize.put(0, groupsizeX).put(1, groupsizeY);
+ return this;
+ }
+
+ @Override
+ public CLWork2D optimizeFor(CLDevice device) {
+ super.optimizeFor(device);
+ return this;
+ }
+
+ }
+
+ /**
+ * 3 dimensional {@link CLWork}.
+ * @author Michael Bien
+ */
+ public static class CLWork3D extends CLWork {
+
+ private CLWork3D(CLKernel kernel) {
+ super(kernel, 3);
+ }
+
+ public CLWork3D setWorkOffset(long offsetX, long offsetY, long offsetZ) {
+ workOffset.put(0, offsetX).put(1, offsetY).put(2, offsetZ);
+ return this;
+ }
+
+ public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ) {
+ setWorkSize(worksizeX, worksizeY, worksizeZ, 0, 0, 0);
+ return this;
+ }
+
+ public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ, long groupsizeX, long groupsizeY, long groupsizeZ) {
+ checkSize(worksizeX, groupsizeX);
+ checkSize(worksizeY, groupsizeY);
+ checkSize(worksizeZ, groupsizeZ);
+ workSize.put(0, worksizeX).put(1, worksizeY).put(2, worksizeZ);
+ groupSize.put(0, groupsizeX).put(1, groupsizeY).put(2, groupsizeZ);
+ return this;
+ }
+
+ @Override
+ public CLWork3D optimizeFor(CLDevice device) {
+ super.optimizeFor(device);
+ return this;
+ }
+
+ }
+
+
+}
diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java
index 3c8ef8ba..47eb42e0 100644
--- a/test/com/jogamp/opencl/CLProgramTest.java
+++ b/test/com/jogamp/opencl/CLProgramTest.java
@@ -29,6 +29,7 @@
package com.jogamp.opencl;
import com.jogamp.common.nio.Buffers;
+import com.jogamp.opencl.CLWork.CLWork1D;
import com.jogamp.opencl.util.CLBuildConfiguration;
import com.jogamp.opencl.util.CLProgramConfiguration;
import com.jogamp.opencl.CLProgram.Status;
@@ -41,6 +42,7 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.nio.FloatBuffer;
+import java.nio.IntBuffer;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.CountDownLatch;
@@ -51,6 +53,8 @@ import org.junit.rules.TemporaryFolder;
import static org.junit.Assert.*;
import static java.lang.System.*;
import static com.jogamp.opencl.CLProgram.CompilerOptions.*;
+import static com.jogamp.opencl.util.CLPlatformFilters.*;
+import static com.jogamp.opencl.CLVersion.*;
/**
*
@@ -416,6 +420,38 @@ public class CLProgramTest {
}
+ @Test
+ public void workTest() throws IOException {
+
+ CLContext context = CLContext.create(CLPlatform.getDefault(version(CL_1_1)));
+
+ try{
+ CLProgram program = context.createProgram(CLProgramTest.class.getResourceAsStream("testkernels.cl")).build();
+
+ CLDevice device = context.getMaxFlopsDevice();
+ out.println(device);
+ CLCommandQueue queue = device.createCommandQueue();
+
+ CLBuffer<IntBuffer> buffer = context.createIntBuffer(20);
+
+ CLWork1D work = CLWork.create1D(program.createCLKernel("add"));
+ work.getKernel().setArgs(buffer, 5, buffer.getNIOCapacity());
+ work.setWorkSize(20, 1).optimizeFor(device);
+
+ queue.putWriteBuffer(buffer, false)
+ .putWork(work)
+ .putReadBuffer(buffer, true);
+
+ while(buffer.getBuffer().hasRemaining()) {
+ assertEquals(5, buffer.getBuffer().get());
+ }
+
+ }finally{
+ context.release();
+ }
+
+ }
+
// @Test
public void loadTest() throws IOException, ClassNotFoundException, InterruptedException {
for(int i = 0; i < 100; i++) {