diff options
Diffstat (limited to 'src/com/jogamp/opencl/CLKernel.java')
-rw-r--r-- | src/com/jogamp/opencl/CLKernel.java | 306 |
1 files changed, 306 insertions, 0 deletions
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java new file mode 100644 index 00000000..ca7f6786 --- /dev/null +++ b/src/com/jogamp/opencl/CLKernel.java @@ -0,0 +1,306 @@ +package com.jogamp.opencl; + +import com.jogamp.opencl.util.CLUtil; +import com.jogamp.common.nio.Buffers; +import com.jogamp.common.os.Platform; +import com.jogamp.common.nio.Int64Buffer; +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +import static com.jogamp.opencl.CLException.*; +import static com.jogamp.opencl.CL.*; + +/** + * High level abstraction for an OpenCL Kernel. + * A kernel is a function declared in a program. A kernel is identified by the <code>kernel</code> qualifier + * applied to any function in a program. A kernel object encapsulates the specific <code>kernel</code> + * function declared in a program and the argument values to be used when executing this + * <code>kernel</code> function. + * CLKernel is not threadsafe. + * @see CLProgram#createCLKernel(java.lang.String) + * @see CLProgram#createCLKernels() + * @author Michael Bien + */ +public class CLKernel extends CLObject implements CLResource, Cloneable { + + public final String name; + public final int numArgs; + + private final CLProgram program; + + private final ByteBuffer buffer; + + private int argIndex; + private boolean force32BitArgs; + + CLKernel(CLProgram program, long id) { + super(program.getContext(), id); + this.program = program; + this.buffer = Buffers.newDirectByteBuffer(8); + + Int64Buffer size = Int64Buffer.allocateDirect(1); + + // get function name + int ret = cl.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, 0, null, size); + checkForError(ret, "error while asking for kernel function name"); + + ByteBuffer bb = ByteBuffer.allocateDirect((int)size.get(0)).order(ByteOrder.nativeOrder()); + + ret = cl.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, bb.capacity(), bb, null); + checkForError(ret, "error while asking for kernel function name"); + + this.name = CLUtil.clString2JavaString(bb, bb.capacity()); + + // get number of arguments + ret = cl.clGetKernelInfo(ID, CL_KERNEL_NUM_ARGS, bb.capacity(), bb, null); + checkForError(ret, "error while asking for number of function arguments."); + + numArgs = bb.getInt(0); + + } + +// public CLKernel putArg(Buffer value) { +// setArg(argIndex++, value); +// return this; +// } + + public CLKernel putArg(CLMemory<?> value) { + setArg(argIndex++, value); + return this; + } + + public CLKernel putArg(int value) { + setArg(argIndex++, value); + return this; + } + + public CLKernel putArg(long value) { + setArg(argIndex++, value); + return this; + } + + public CLKernel putArg(float value) { + setArg(argIndex++, value); + return this; + } + + public CLKernel putArg(double value) { + setArg(argIndex++, value); + return this; + } + + public CLKernel putNullArg(int size) { + setNullArg(argIndex++, size); + return this; + } + + public CLKernel putArgs(CLMemory<?>... values) { + setArgs(argIndex, values); + argIndex += values.length; + return this; + } + + public CLKernel rewind() { + argIndex = 0; + return this; + } + +// public CLKernel setArg(int argumentIndex, Buffer value) { +// setArgument(argumentIndex, CLMemory.sizeOfBufferElem(value)*value.capacity(), value); +// return this; +// } + + public CLKernel setArg(int argumentIndex, CLMemory<?> value) { + setArgument(argumentIndex, Platform.is32Bit()?4:8, wrap(value.ID)); + return this; + } + + public CLKernel setArg(int argumentIndex, int value) { + setArgument(argumentIndex, 4, wrap(value)); + return this; + } + + public CLKernel setArg(int argumentIndex, long value) { + if(force32BitArgs) { + setArgument(argumentIndex, 4, wrap((int)value)); + }else{ + setArgument(argumentIndex, 8, wrap(value)); + } + return this; + } + + public CLKernel setArg(int argumentIndex, float value) { + setArgument(argumentIndex, 4, wrap(value)); + return this; + } + + public CLKernel setArg(int argumentIndex, double value) { + if(force32BitArgs) { + setArgument(argumentIndex, 4, wrap((float)value)); + }else{ + setArgument(argumentIndex, 8, wrap(value)); + } + return this; + } + + public CLKernel setNullArg(int argumentIndex, int size) { + setArgument(argumentIndex, size, null); + return this; + } + + public CLKernel setArgs(CLMemory<?>... values) { + setArgs(0, values); + return this; + } + + private void setArgs(int startIndex, CLMemory<?>... values) { + for (int i = 0; i < values.length; i++) { + setArg(i+startIndex, values[i]); + } + } + + private void setArgument(int argumentIndex, int size, Buffer value) { + if(argumentIndex >= numArgs || argumentIndex < 0) { + throw new IndexOutOfBoundsException("kernel "+ toString() +" has "+numArgs+ + " arguments, can not set argument with index "+argumentIndex); + } + if(!program.isExecutable()) { + throw new IllegalStateException("can not set program" + + " arguments for a not executable program. "+program); + } + + int ret = cl.clSetKernelArg(ID, argumentIndex, size, value); + checkForError(ret, "error on clSetKernelArg"); + } + + /** + * Forces double and long arguments to be passed as float and int to the OpenCL kernel. + * This can be used in applications which want to mix kernels with different floating point precision. + */ + public CLKernel setForce32BitArgs(boolean force) { + this.force32BitArgs = force; + return this; + } + + public CLProgram getProgram() { + return program; + } + + /** + * @see #setForce32BitArgs(boolean) + */ + public boolean isForce32BitArgsEnabled() { + return force32BitArgs; + } + + private Buffer wrap(float value) { + return buffer.putFloat(value).rewind(); + } + + private Buffer wrap(double value) { + return buffer.putDouble(value).rewind(); + } + + private Buffer wrap(int value) { + return buffer.putInt(value).rewind(); + } + + private Buffer wrap(long value) { + return buffer.putLong(value).rewind(); + } + + /** + * Returns the amount of local memory in bytes being used by a kernel. + * This includes local memory that may be needed by an implementation to execute the kernel, + * variables declared inside the kernel with the <code>__local</code> address qualifier and local memory + * to be allocated for arguments to the kernel declared as pointers with the <code>__local</code> address + * qualifier and whose size is specified with clSetKernelArg. + * If the local memory size, for any pointer argument to the kernel declared with + * the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0. + */ + public long getLocalMemorySize(CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE); + } + + /** + * Returns the work group size for this kernel on the given device. + * This provides a mechanism for the application to query the work-group size + * that can be used to execute a kernel on a specific device given by device. + * The OpenCL implementation uses the resource requirements of the kernel + * (register usage etc.) to determine what this work-group size should be. + */ + public long getWorkGroupSize(CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE); + } + + /** + * Returns the work-group size specified by the <code>__attribute__((reqd_work_gr oup_size(X, Y, Z)))</code> qualifier in kernel sources. + * If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned. + */ + public long[] getCompileWorkGroupSize(CLDevice device) { + int ret = cl.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, 8*3, buffer, null); + checkForError(ret, "error while asking for clGetKernelWorkGroupInfo"); + return new long[] { buffer.getLong(0), buffer.getLong(1), buffer.getLong(2) }; + } + + private long getWorkGroupInfo(CLDevice device, int flag) { + int ret = cl.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null); + checkForError(ret, "error while asking for clGetKernelWorkGroupInfo"); + return buffer.getLong(0); + } + + /** + * Releases all resources of this kernel from its context. + */ + public void release() { + int ret = cl.clReleaseKernel(ID); + program.onKernelReleased(this); + checkForError(ret, "can not release kernel"); + } + + public void close() { + release(); + } + + @Override + public String toString() { + return "CLKernel [id: " + ID + + " name: " + name+"]"; + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final CLKernel other = (CLKernel) obj; + if (this.ID != other.ID) { + return false; + } + if (!this.program.equals(other.program)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 7; + hash = 43 * hash + (int) (this.ID ^ (this.ID >>> 32)); + hash = 43 * hash + (this.program != null ? this.program.hashCode() : 0); + return hash; + } + + /** + * Returns a new instance of this kernel with uninitialized arguments. + */ + @Override + public CLKernel clone() { + return program.createCLKernel(name).setForce32BitArgs(force32BitArgs); + } + +} |