/* * Copyright (c) 2011, Michael Bien * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this list of * conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, this list * of conditions and the following disclaimer in the documentation and/or other materials * provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * Created on Monday, July 25 2011 20:51 */ package com.jogamp.opencl; import com.jogamp.common.nio.CachedBufferFactory; import com.jogamp.common.nio.NativeSizeBuffer; /** * CLWork represents a N dimensional range of work items using a specific {@link CLKernel}. * * @see #create1D(com.jogamp.opencl.CLKernel) * @see #create2D(com.jogamp.opencl.CLKernel) * @see #create2D(com.jogamp.opencl.CLKernel) * * @author Michael Bien */ public class CLWork { public final int dimension; public final CLKernel kernel; protected final NativeSizeBuffer groupSize; protected final NativeSizeBuffer workSize; protected final NativeSizeBuffer workOffset; protected CLWork(CLKernel kernel, int dimension) { int size = dimension * NativeSizeBuffer.elementSize(); CachedBufferFactory factory = CachedBufferFactory.create(3*size, true); this.workOffset = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); this.workSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); this.groupSize = NativeSizeBuffer.wrap(factory.newDirectByteBuffer(size)); this.dimension = dimension; this.kernel = kernel; } /** * Creates work representing a 1D range of work items using the supplied kernel. */ public static CLWork1D create1D(CLKernel kernel) { return new CLWork1D(kernel); } /** * Creates work representing a 2D range of work items using the supplied kernel. */ public static CLWork2D create2D(CLKernel kernel) { return new CLWork2D(kernel); } /** * Creates work representing a 3D range of work items using the supplied kernel. */ public static CLWork3D create3D(CLKernel kernel) { return new CLWork3D(kernel); } public static long roundUp(long value, long multiple) { long remaining = value % multiple; if (remaining == 0) { return value; } else { return value + multiple - remaining; } } protected void checkSize(long worksize, long groupsize) { if(groupsize != 0 && worksize%groupsize != 0) { throw new IllegalArgumentException("worksize must be a multiple of groupsize"); } } /** * Optimizes the work sizes by rounding to the next device-specific preferred multiple. * This optimization can break kernels if they where designed to be executed with a fixed * group or worksize. This method will do nothing if no group size has been specified. * @since OpenCL 1.1 */ public CLWork optimizeFor(CLDevice device) { long multiple = kernel.getPreferredWorkGroupSizeMultiple(device); int[] maxWorkItemSizes = device.getMaxWorkItemSizes(); for (int i = 0; i < dimension; i++) { long group = groupSize.get(i); if(group > 0) { group = roundUp(group, multiple); if(group <= maxWorkItemSizes[i]) { groupSize.put(i, group); long work = workSize.get(i); workSize.put(i, roundUp(work, group)); } //else {can not optimize} } } return this; } public CLKernel getKernel() { return kernel; } public int getDimension() { return dimension; } public NativeSizeBuffer getGroupSize() { return groupSize; } public NativeSizeBuffer getWorkOffset() { return workOffset; } public NativeSizeBuffer getWorkSize() { return workSize; } /** * 1 dimensional {@link CLWork}. * @author Michael Bien */ public static class CLWork1D extends CLWork { private CLWork1D(CLKernel kernel) { super(kernel, 1); } public CLWork1D setWorkOffset(long offset) { workOffset.put(0, offset); return this; } public CLWork1D setWorkSize(long worksize) { setWorkSize(worksize, 0); return this; } public CLWork1D setWorkSize(long worksize, long groupsize) { checkSize(worksize, groupsize); workSize.put(0, worksize); groupSize.put(0, groupsize); return this; } @Override public CLWork1D optimizeFor(CLDevice device) { super.optimizeFor(device); return this; } } /** * 2 dimensional {@link CLWork}. * @author Michael Bien */ public static class CLWork2D extends CLWork { private CLWork2D(CLKernel kernel) { super(kernel, 2); } public CLWork2D setWorkOffset(long offsetX, long offsetY) { workOffset.put(0, offsetX).put(1, offsetY); return this; } public CLWork2D setWorkSize(long worksizeX, long worksizeY) { setWorkSize(worksizeX, worksizeY, 0, 0); return this; } public CLWork2D setWorkSize(long worksizeX, long worksizeY, long groupsizeX, long groupsizeY) { checkSize(worksizeX, groupsizeX); checkSize(worksizeY, groupsizeY); workSize.put(0, worksizeX).put(1, worksizeY); groupSize.put(0, groupsizeX).put(1, groupsizeY); return this; } @Override public CLWork2D optimizeFor(CLDevice device) { super.optimizeFor(device); return this; } } /** * 3 dimensional {@link CLWork}. * @author Michael Bien */ public static class CLWork3D extends CLWork { private CLWork3D(CLKernel kernel) { super(kernel, 3); } public CLWork3D setWorkOffset(long offsetX, long offsetY, long offsetZ) { workOffset.put(0, offsetX).put(1, offsetY).put(2, offsetZ); return this; } public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ) { setWorkSize(worksizeX, worksizeY, worksizeZ, 0, 0, 0); return this; } public CLWork3D setWorkSize(long worksizeX, long worksizeY, long worksizeZ, long groupsizeX, long groupsizeY, long groupsizeZ) { checkSize(worksizeX, groupsizeX); checkSize(worksizeY, groupsizeY); checkSize(worksizeZ, groupsizeZ); workSize.put(0, worksizeX).put(1, worksizeY).put(2, worksizeZ); groupSize.put(0, groupsizeX).put(1, groupsizeY).put(2, groupsizeZ); return this; } @Override public CLWork3D optimizeFor(CLDevice device) { super.optimizeFor(device); return this; } } }