/*
* Copyright (c) 2009 JogAmp Community. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY JogAmp Community ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JogAmp Community OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those of the
* authors and should not be interpreted as representing official policies, either expressed
* or implied, of JogAmp Community.
*/
package com.jogamp.opencl;
import com.jogamp.opencl.util.CLUtil;
import com.jogamp.common.nio.Buffers;
import com.jogamp.common.nio.PointerBuffer;
import com.jogamp.opencl.llb.CL;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import static com.jogamp.opencl.CLException.*;
import static com.jogamp.opencl.llb.CL.*;
import static com.jogamp.common.os.Platform.*;
/**
* High level abstraction for an OpenCL Kernel.
* A kernel is a function declared in a program. A kernel is identified by the kernel
qualifier
* applied to any function in a program. A kernel object encapsulates the specific kernel
* function declared in a program and the argument values to be used when executing this
* kernel
function.
* CLKernel is not threadsafe.
* @see CLProgram#createCLKernel(java.lang.String)
* @see CLProgram#createCLKernels()
* @author Michael Bien, et al.
*/
public class CLKernel extends CLObjectResource implements Cloneable {
public final String name;
public final int numArgs;
private final CLProgram program;
private final CL binding;
private final ByteBuffer buffer;
private int argIndex;
private boolean force32BitArgs;
CLKernel(final CLProgram program, final long id) {
this(program, null, id);
}
CLKernel(final CLProgram program, final String name, final long id) {
super(program.getContext(), id);
this.program = program;
this.buffer = Buffers.newDirectByteBuffer((is32Bit()?4:8)*3);
binding = program.getPlatform().getCLBinding();
if(name == null) {
// get function name
final PointerBuffer size = PointerBuffer.wrap(buffer);
int ret = binding.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, 0, null, size);
checkForError(ret, "error while asking for kernel function name");
final ByteBuffer bb = Buffers.newDirectByteBuffer((int)size.get(0));
ret = binding.clGetKernelInfo(ID, CL_KERNEL_FUNCTION_NAME, bb.capacity(), bb, null);
checkForError(ret, "error while asking for kernel function name");
this.name = CLUtil.clString2JavaString(bb, bb.capacity());
}else{
this.name = name;
}
// get number of arguments
final int ret = binding.clGetKernelInfo(ID, CL_KERNEL_NUM_ARGS, buffer.capacity(), buffer, null);
checkForError(ret, "error while asking for number of function arguments.");
numArgs = buffer.getInt(0);
}
// public CLKernel putArg(Buffer value) {
// setArg(argIndex++, value);
// return this;
// }
public CLKernel putArg(final CLMemory> value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putArg(final short value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putArg(final int value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putArg(final long value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putArg(final float value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putArg(final double value) {
setArg(argIndex, value);
argIndex++;
return this;
}
public CLKernel putNullArg(final int size) {
setNullArg(argIndex, size);
argIndex++;
return this;
}
public CLKernel putArgs(final CLMemory>... values) {
setArgs(argIndex, values);
argIndex += values.length;
return this;
}
/**
* Resets the argument index to 0.
*/
public CLKernel rewind() {
argIndex = 0;
return this;
}
/**
* Returns the argument index used in the relative putArt(...) methods.
*/
public int position() {
return argIndex;
}
// public CLKernel setArg(int argumentIndex, Buffer value) {
// setArgument(argumentIndex, CLMemory.sizeOfBufferElem(value)*value.capacity(), value);
// return this;
// }
public CLKernel setArg(final int argumentIndex, final CLMemory> value) {
setArgument(argumentIndex, is32Bit()?4:8, wrap(value.ID));
return this;
}
public CLKernel setArg(final int argumentIndex, final short value) {
setArgument(argumentIndex, 2, wrap(value));
return this;
}
public CLKernel setArg(final int argumentIndex, final int value) {
setArgument(argumentIndex, 4, wrap(value));
return this;
}
public CLKernel setArg(final int argumentIndex, final long value) {
if(force32BitArgs) {
setArgument(argumentIndex, 4, wrap((int)value));
}else{
setArgument(argumentIndex, 8, wrap(value));
}
return this;
}
public CLKernel setArg(final int argumentIndex, final float value) {
setArgument(argumentIndex, 4, wrap(value));
return this;
}
public CLKernel setArg(final int argumentIndex, final double value) {
if(force32BitArgs) {
setArgument(argumentIndex, 4, wrap((float)value));
}else{
setArgument(argumentIndex, 8, wrap(value));
}
return this;
}
public CLKernel setNullArg(final int argumentIndex, final int size) {
setArgument(argumentIndex, size, null);
return this;
}
public CLKernel setArgs(final CLMemory>... values) {
setArgs(0, values);
return this;
}
public CLKernel setArgs(final Object... values) {
if(values == null || values.length == 0) {
throw new IllegalArgumentException("values array was empty or null.");
}
for (int i = 0; i < values.length; i++) {
final Object value = values[i];
if(value instanceof CLMemory>) {
setArg(i, (CLMemory>)value);
}else if(value instanceof Short) {
setArg(i, (Short)value);
}else if(value instanceof Integer) {
setArg(i, (Integer)value);
}else if(value instanceof Long) {
setArg(i, (Long)value);
}else if(value instanceof Float) {
setArg(i, (Float)value);
}else if(value instanceof Double) {
setArg(i, (Double)value);
}else{
throw new IllegalArgumentException(value + " is not a valid argument.");
}
}
return this;
}
private void setArgs(final int startIndex, final CLMemory>... values) {
for (int i = 0; i < values.length; i++) {
setArg(i+startIndex, values[i]);
}
}
private void setArgument(final int argumentIndex, final int size, final Buffer value) {
if(argumentIndex >= numArgs || argumentIndex < 0) {
throw new IndexOutOfBoundsException("kernel "+ this +" has "+numArgs+
" arguments, can not set argument with index "+argumentIndex);
}
if(!program.isExecutable()) {
throw new IllegalStateException("can not set program" +
" arguments for a not executable program. "+program);
}
final int ret = binding.clSetKernelArg(ID, argumentIndex, size, value);
if(ret != CL_SUCCESS) {
throw newException(ret, "error setting arg "+argumentIndex+" to value "+value+" of size "+size+" of "+this);
}
}
/**
* Forces double and long arguments to be passed as float and int to the OpenCL kernel.
* This can be used in applications which want to mix kernels with different floating point precision.
*/
public CLKernel setForce32BitArgs(final boolean force) {
this.force32BitArgs = force;
return this;
}
public CLProgram getProgram() {
return program;
}
/**
* @see #setForce32BitArgs(boolean)
*/
public boolean isForce32BitArgsEnabled() {
return force32BitArgs;
}
private Buffer wrap(final float value) {
return buffer.putFloat(0, value);
}
private Buffer wrap(final double value) {
return buffer.putDouble(0, value);
}
private Buffer wrap(final short value) {
return buffer.putShort(0, value);
}
private Buffer wrap(final int value) {
return buffer.putInt(0, value);
}
private Buffer wrap(final long value) {
return buffer.putLong(0, value);
}
/**
* Returns the amount of local memory in bytes being used by a kernel.
* This includes local memory that may be needed by an implementation to execute the kernel,
* variables declared inside the kernel with the __local
address qualifier and local memory
* to be allocated for arguments to the kernel declared as pointers with the __local
address
* qualifier and whose size is specified with clSetKernelArg.
* If the local memory size, for any pointer argument to the kernel declared with
* the __local
address qualifier, is not specified, its size is assumed to be 0.
* @version 1.0
*/
public long getLocalMemorySize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE);
}
/**
* Returns the work group size for this kernel on the given device.
* This provides a mechanism for the application to query the work-group size
* that can be used to execute a kernel on a specific device given by device.
* The OpenCL implementation uses the resource requirements of the kernel
* (register usage etc.) to determine what this work-group size should be.
* @version 1.0
*/
public long getWorkGroupSize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE);
}
/**
* Returns the work-group size specified by the __attribute__((reqd_work_group_size(X, Y, Z)))
qualifier in kernel sources.
* If the work-group size is not specified using the above attribute qualifier new long[]{(0, 0, 0)}
is returned.
* The returned array has always three elements.
* @version 1.0
*/
public long[] getCompileWorkGroupSize(final CLDevice device) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null);
if(ret != CL_SUCCESS) {
throw newException(ret, "error while asking for CL_KERNEL_COMPILE_WORK_GROUP_SIZE of "+this+" on "+device);
}
if(is32Bit()) {
return new long[] { buffer.getInt(0), buffer.getInt(4), buffer.getInt(8) };
}else {
return new long[] { buffer.getLong(0), buffer.getLong(8), buffer.getLong(16) };
}
}
/**
* Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing
* with other sizes will still work, unless the size is more than the maximum allowed.
* @version 1.1
*/
public long getPreferredWorkGroupSizeMultiple(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
}
/**
* Returns the number of bytes of private memory used by each work item in the kernel.
* This includes private memory declared with the __private
qualifier, as
* well as other private memory used by the implementation.
* @version 1.1
*/
public long getPrivateMemSize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE);
}
private long getWorkGroupInfo(final CLDevice device, final int flag) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null);
if(ret != CL_SUCCESS) {
throw newException(ret, "error while asking for clGetKernelWorkGroupInfo of "+this+" on "+device);
}
return buffer.getLong(0);
}
/**
* Releases all resources of this kernel from its context.
*/
@Override
public void release() {
super.release();
final int ret = binding.clReleaseKernel(ID);
program.onKernelReleased(this);
if(ret != CL_SUCCESS) {
throw newException(ret, "can not release "+this);
}
}
@Override
public String toString() {
return "CLKernel [id: " + ID
+ " name: " + name+"]";
}
@Override
public boolean equals(final Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final CLKernel other = (CLKernel) obj;
if (this.ID != other.ID) {
return false;
}
if (!this.program.equals(other.program)) {
return false;
}
return true;
}
@Override
public int hashCode() {
int hash = 7;
hash = 43 * hash + (int) (this.ID ^ (this.ID >>> 32));
hash = 43 * hash + (this.program != null ? this.program.hashCode() : 0);
return hash;
}
/**
* Returns a new instance of this kernel with uninitialized arguments.
*/
@Override
public CLKernel clone() {
return program.createCLKernel(name).setForce32BitArgs(force32BitArgs);
}
}