aboutsummaryrefslogtreecommitdiffstats
path: root/src/com/jogamp/opencl/demos/fft/CLFFTPlan.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/jogamp/opencl/demos/fft/CLFFTPlan.java')
-rw-r--r--src/com/jogamp/opencl/demos/fft/CLFFTPlan.java113
1 files changed, 60 insertions, 53 deletions
diff --git a/src/com/jogamp/opencl/demos/fft/CLFFTPlan.java b/src/com/jogamp/opencl/demos/fft/CLFFTPlan.java
index 1c8e039..91a9925 100644
--- a/src/com/jogamp/opencl/demos/fft/CLFFTPlan.java
+++ b/src/com/jogamp/opencl/demos/fft/CLFFTPlan.java
@@ -38,11 +38,11 @@ public class CLFFTPlan {
int y;
int z;
- CLFFTDim3(int x, int y, int z) {
+/* CLFFTDim3(int x, int y, int z) {
this.x = x;
this.y = y;
this.z = z;
- }
+ }*/
CLFFTDim3(int[] size) {
x = size[0];
@@ -154,11 +154,11 @@ public class CLFFTPlan {
// temp buffer if same batch size is used again and again.
int last_batch_size;
// temporary buffer for interleaved plan
- CLMemory tempmemobj;
+ CLMemory<FloatBuffer> tempmemobj;
// temporary buffer for planner plan. Only one of tempmemobj or
// (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending
// data format of plan (plannar or interleaved)
- CLMemory tempmemobj_real, tempmemobj_imag;
+ CLMemory<FloatBuffer> tempmemobj_real, tempmemobj_imag;
// Maximum size of signal for which local memory transposed based
// fft is sufficient i.e. no global mem transpose (communication)
// is needed
@@ -178,27 +178,28 @@ public class CLFFTPlan {
// e.g. on NVidia it is 16.
int num_local_mem_banks;
- public class InvalidContextException extends Exception {
+ @SuppressWarnings("serial")
+ public class InvalidContextException extends Exception {
}
/**
* Create a new FFT plan.
*
* Use the matching executeInterleaved() or executePlanar() depending on the dataFormat specified.
- * @param context
+ * @param context Context to create program in and get devices from.
* @param sizes Array of sizes for each dimension. The length of array defines how many dimensions there are.
* @param dataFormat Data format, InterleavedComplex (array of complex) or SplitComplex (separate planar arrays).
- * @throws zephyr.cl.CLFFTPlan.InvalidContextException
+ * @throws InvalidContextException if we can't find any GPUs.
*/
public CLFFTPlan(CLContext context, int[] sizes, CLFFTDataFormat dataFormat) throws InvalidContextException {
int i;
- int err;
+// int err;
boolean isPow2 = true;
- String kString;
- int num_devices;
+// String kString;
+// int num_devices;
boolean gpu_found = false;
CLDevice[] devices;
- int ret_size;
+// int ret_size;
if (sizes.length < 1 || sizes.length > 3) {
throw new IllegalArgumentException("Dimensions must be between 1 and 3");
@@ -297,7 +298,7 @@ public class CLFFTPlan {
/**
* Calculate FFT on interleaved complex data.
- * @param queue
+ * @param queue Command queue to put kernels into.
* @param batchSize How many instances to calculate. Use 1 for a single FFT.
* @param dir Direction of calculation, Forward or Inverse.
* @param data_in Input buffer.
@@ -309,7 +310,7 @@ public class CLFFTPlan {
CLBuffer<FloatBuffer> data_in, CLBuffer<FloatBuffer> data_out,
CLEventList condition, CLEventList event) {
int s;
- if (format != format.InterleavedComplexFormat) {
+ if (format != CLFFTDataFormat.InterleavedComplexFormat) {
throw new IllegalArgumentException();
}
@@ -320,7 +321,8 @@ public class CLFFTPlan {
allocateTemporaryBufferInterleaved(batchSize);
- CLMemory[] memObj = new CLMemory[3];
+ @SuppressWarnings("rawtypes")
+ CLMemory[] memObj = new CLMemory[3];
memObj[0] = data_in;
memObj[1] = data_out;
memObj[2] = tempmemobj;
@@ -404,26 +406,26 @@ public class CLFFTPlan {
/**
* Calculate FFT of planar data.
- * @param queue
- * @param batchSize
- * @param dir
- * @param data_in_real
- * @param data_in_imag
- * @param data_out_real
- * @param data_out_imag
- * @param contition
- * @param event
+ * @param queue Command queue to put kernels into.
+ * @param batchSize Undocumented by original author.
+ * @param dir Undocumented by original author.
+ * @param data_in_real Undocumented by original author.
+ * @param data_in_imag Undocumented by original author.
+ * @param data_out_real Undocumented by original author.
+ * @param data_out_imag Undocumented by original author.
+ * @param contition Undocumented by original author.
+ * @param event Undocumented by original author.
*/
public void executePlanar(CLCommandQueue queue, int batchSize, CLFFTDirection dir,
CLBuffer<FloatBuffer> data_in_real, CLBuffer<FloatBuffer> data_in_imag, CLBuffer<FloatBuffer> data_out_real, CLBuffer<FloatBuffer> data_out_imag,
CLEventList contition, CLEventList event) {
int s;
- if (format != format.SplitComplexFormat) {
+ if (format != CLFFTDataFormat.SplitComplexFormat) {
throw new IllegalArgumentException();
}
- int err;
+// int err;
WorkDimensions wd;
boolean inPlaceDone = false;
@@ -431,8 +433,10 @@ public class CLFFTPlan {
allocateTemporaryBufferPlanar(batchSize);
- CLMemory[] memObj_real = new CLMemory[3];
- CLMemory[] memObj_imag = new CLMemory[3];
+ @SuppressWarnings("rawtypes")
+ CLMemory[] memObj_real = new CLMemory[3];
+ @SuppressWarnings("rawtypes")
+ CLMemory[] memObj_imag = new CLMemory[3];
memObj_real[0] = data_in_real;
memObj_real[1] = data_out_real;
memObj_real[2] = tempmemobj_real;
@@ -514,6 +518,7 @@ public class CLFFTPlan {
out.printf("Run kernel %s with global dim = {%d*BatchSize}, local dim={%d}\n", kInfo.kernel_name, wd.gWorkItems, wd.lWorkItems);
}
out.printf("%s\n", kernel_string.toString());
+ out.close();
}
WorkDimensions getKernelWorkDimensions(CLFFTKernelInfo kernelInfo, int batchSize) {
@@ -581,12 +586,12 @@ public class CLFFTPlan {
}
private void createKernelList() {
- CLFFTKernelInfo kern;
+// CLFFTKernelInfo kern;
for (CLFFTKernelInfo kinfo : this.kernel_list) {
kinfo.kernel = program.createCLKernel(kinfo.kernel_name);
}
- if (format == format.SplitComplexFormat) {
+ if (format == CLFFTDataFormat.SplitComplexFormat) {
twist_kernel = program.createCLKernel("clFFT_1DTwistSplit");
} else {
twist_kernel = program.createCLKernel("clFFT_1DTwistInterleaved");
@@ -749,7 +754,7 @@ public class CLFFTPlan {
}
void formattedLoad(StringBuilder kernelString, int aIndex, int gIndex, CLFFTDataFormat dataFormat) {
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" a[").append(aIndex).append("] = in[").append(gIndex).append("];\n");
} else {
kernelString.append(" a[").append(aIndex).append("].x = in_real[").append(gIndex).append("];\n");
@@ -758,7 +763,7 @@ public class CLFFTPlan {
}
void formattedStore(StringBuilder kernelString, int aIndex, int gIndex, CLFFTDataFormat dataFormat) {
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" out[").append(gIndex).append("] = a[").append(aIndex).append("];\n");
} else {
kernelString.append(" out_real[").append(gIndex).append("] = a[").append(aIndex).append("].x;\n");
@@ -767,7 +772,7 @@ public class CLFFTPlan {
}
int insertGlobalLoadsAndTranspose(StringBuilder kernelString, int N, int numWorkItemsPerXForm, int numXFormsPerWG, int R0, int mem_coalesce_width, CLFFTDataFormat dataFormat) {
- int log2NumWorkItemsPerXForm = (int) log2(numWorkItemsPerXForm);
+ int log2NumWorkItemsPerXForm = log2(numWorkItemsPerXForm);
int groupSize = numWorkItemsPerXForm * numXFormsPerWG;
int i, j;
int lMemSize = 0;
@@ -782,7 +787,7 @@ public class CLFFTPlan {
kernelString.append(" jj = lId >> ").append(log2NumWorkItemsPerXForm).append(";\n");
kernelString.append(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n");
kernelString.append(" offset = mad24( mad24(groupId, ").append(numXFormsPerWG).append(", jj), ").append(N).append(", ii );\n");
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" in += offset;\n");
kernelString.append(" out += offset;\n");
} else {
@@ -799,7 +804,7 @@ public class CLFFTPlan {
kernelString.append(" ii = lId;\n");
kernelString.append(" jj = 0;\n");
kernelString.append(" offset = mad24(groupId, ").append(N).append(", ii);\n");
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" in += offset;\n");
kernelString.append(" out += offset;\n");
} else {
@@ -817,11 +822,11 @@ public class CLFFTPlan {
int numOuterIter = numXFormsPerWG / (groupSize / mem_coalesce_width);
kernelString.append(" ii = lId & ").append(mem_coalesce_width - 1).append(";\n");
- kernelString.append(" jj = lId >> ").append((int) log2(mem_coalesce_width)).append(";\n");
+ kernelString.append(" jj = lId >> ").append(log2(mem_coalesce_width)).append(";\n");
kernelString.append(" lMemStore = sMem + mad24( jj, ").append(N + numWorkItemsPerXForm).append(", ii );\n");
kernelString.append(" offset = mad24( groupId, ").append(numXFormsPerWG).append(", jj);\n");
kernelString.append(" offset = mad24( offset, ").append(N).append(", ii );\n");
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" in += offset;\n");
kernelString.append(" out += offset;\n");
} else {
@@ -882,7 +887,7 @@ public class CLFFTPlan {
lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG;
} else {
kernelString.append(" offset = mad24( groupId, ").append(N * numXFormsPerWG).append(", lId );\n");
- if (dataFormat == dataFormat.InterleavedComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.InterleavedComplexFormat) {
kernelString.append(" in += offset;\n");
kernelString.append(" out += offset;\n");
} else {
@@ -893,7 +898,7 @@ public class CLFFTPlan {
}
kernelString.append(" ii = lId & ").append(N - 1).append(";\n");
- kernelString.append(" jj = lId >> ").append((int) log2(N)).append(";\n");
+ kernelString.append(" jj = lId >> ").append(log2(N)).append(";\n");
kernelString.append(" lMemStore = sMem + mad24( jj, ").append(N + numWorkItemsPerXForm).append(", ii );\n");
kernelString.append("if((groupId == get_num_groups(0)-1) && s) {\n");
@@ -953,12 +958,12 @@ public class CLFFTPlan {
int i, j, k, ind;
int lMemSize = 0;
int numIter = maxRadix / Nr;
- String indent = "";
+// String indent = "";
if (numWorkItemsPerXForm >= mem_coalesce_width) {
if (numXFormsPerWG > 1) {
kernelString.append(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n");
- indent = (" ");
+// indent = (" ");
}
for (i = 0; i < maxRadix; i++) {
j = i % numIter;
@@ -975,7 +980,7 @@ public class CLFFTPlan {
kernelString.append(" lMemLoad = sMem + mad24( jj, ").append(N + numWorkItemsPerXForm).append(", ii );\n");
kernelString.append(" ii = lId & ").append(mem_coalesce_width - 1).append(";\n");
- kernelString.append(" jj = lId >> ").append((int) log2(mem_coalesce_width)).append(";\n");
+ kernelString.append(" jj = lId >> ").append(log2(mem_coalesce_width)).append(";\n");
kernelString.append(" lMemStore = sMem + mad24( jj,").append(N + numWorkItemsPerXForm).append(", ii );\n");
for (i = 0; i < maxRadix; i++) {
@@ -1033,7 +1038,7 @@ public class CLFFTPlan {
kernelString.append(" lMemLoad = sMem + mad24( jj,").append(N + numWorkItemsPerXForm).append(", ii );\n");
kernelString.append(" ii = lId & ").append(N - 1).append(";\n");
- kernelString.append(" jj = lId >> ").append((int) log2(N)).append(";\n");
+ kernelString.append(" jj = lId >> ").append(log2(N)).append(";\n");
kernelString.append(" lMemStore = sMem + mad24( jj,").append(N + numWorkItemsPerXForm).append(", ii );\n");
for (i = 0; i < maxRadix; i++) {
@@ -1411,7 +1416,7 @@ public class CLFFTPlan {
int maxArrayLen = this.max_radix;
int batchSize = this.min_mem_coalesce_width;
CLFFTDataFormat dataFormat = this.format;
- boolean vertical = (dir == dir.X) ? false : true;
+ boolean vertical = (dir == CLFFTKernelDir.X) ? false : true;
numRadices = getGlobalRadixInfo(n, radixArr, R1Arr, R2Arr);
@@ -1432,7 +1437,7 @@ public class CLFFTPlan {
//}
int N = n;
- int m = (int) log2(n);
+ int m = log2(n);
int Rinit = vertical ? BS : 1;
batchSize = vertical ? Math.min(BS, batchSize) : batchSize;
int passNum;
@@ -1504,9 +1509,9 @@ public class CLFFTPlan {
insertVariables(localString, R1);
if (vertical) {
- localString.append("xNum = groupId >> ").append((int) log2(numBlocksPerXForm)).append(";\n");
+ localString.append("xNum = groupId >> ").append(log2(numBlocksPerXForm)).append(";\n");
localString.append("groupId = groupId & ").append(numBlocksPerXForm - 1).append(";\n");
- localString.append("indexIn = mad24(groupId, ").append(batchSize).append(", xNum << ").append((int) log2(n * BS)).append(");\n");
+ localString.append("indexIn = mad24(groupId, ").append(batchSize).append(", xNum << ").append(log2(n * BS)).append(");\n");
localString.append("tid = mul24(groupId, ").append(batchSize).append(");\n");
localString.append("i = tid >> ").append(lgStrideO).append(";\n");
localString.append("j = tid & ").append(strideO - 1).append(";\n");
@@ -1514,7 +1519,7 @@ public class CLFFTPlan {
for (i = 0; i < passNum; i++) {
stride *= radixArr[i];
}
- localString.append("indexOut = mad24(i, ").append(stride).append(", j + ").append("(xNum << ").append((int) log2(n * BS)).append("));\n");
+ localString.append("indexOut = mad24(i, ").append(stride).append(", j + ").append("(xNum << ").append(log2(n * BS)).append("));\n");
localString.append("bNum = groupId;\n");
} else {
int lgNumBlocksPerXForm = log2(numBlocksPerXForm);
@@ -1540,7 +1545,7 @@ public class CLFFTPlan {
localString.append("j = tid >> ").append(lgBatchSize).append(";\n");
localString.append("indexIn += mad24(j, ").append(strideI).append(", i);\n");
- if (dataFormat == dataFormat.SplitComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.SplitComplexFormat) {
localString.append("in_real += indexIn;\n");
localString.append("in_imag += indexIn;\n");
for (j = 0; j < R1; j++) {
@@ -1600,7 +1605,7 @@ public class CLFFTPlan {
// twiddle
if (passNum < (numPasses - 1)) {
localString.append("l = ((bNum << ").append(lgBatchSize).append(") + i) >> ").append(lgStrideO).append(";\n");
- localString.append("k = j << ").append((int) log2(R1 / R2)).append(";\n");
+ localString.append("k = j << ").append(log2(R1 / R2)).append(";\n");
localString.append("ang1 = dir*(2.0f*M_PI/").append(N).append(")*l;\n");
for (t = 0; t < R1; t++) {
localString.append("ang = ang1*(k + ").append((t % R2) * R1 + (t / R2)).append(");\n");
@@ -1612,8 +1617,8 @@ public class CLFFTPlan {
// Store Data
if (strideO == 1) {
- localString.append("lMemStore = sMem + mad24(i, ").append(radix + 1).append(", j << ").append((int) log2(R1 / R2)).append(");\n");
- localString.append("lMemLoad = sMem + mad24(tid >> ").append((int) log2(radix)).append(", ").append(radix + 1).append(", tid & ").append(radix - 1).append(");\n");
+ localString.append("lMemStore = sMem + mad24(i, ").append(radix + 1).append(", j << ").append(log2(R1 / R2)).append(");\n");
+ localString.append("lMemLoad = sMem + mad24(tid >> ").append(log2(radix)).append(", ").append(radix + 1).append(", tid & ").append(radix - 1).append(");\n");
for (i = 0; i < R1 / R2; i++) {
for (j = 0; j < R2; j++) {
@@ -1658,7 +1663,7 @@ public class CLFFTPlan {
localString.append("barrier(CLK_LOCAL_MEM_FENCE);\n");
localString.append("indexOut += tid;\n");
- if (dataFormat == dataFormat.SplitComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.SplitComplexFormat) {
localString.append("out_real += indexOut;\n");
localString.append("out_imag += indexOut;\n");
for (k = 0; k < R1; k++) {
@@ -1676,7 +1681,7 @@ public class CLFFTPlan {
} else {
localString.append("indexOut += mad24(j, ").append(numIter * strideO).append(", i);\n");
- if (dataFormat == dataFormat.SplitComplexFormat) {
+ if (dataFormat == CLFFTDataFormat.SplitComplexFormat) {
localString.append("out_real += indexOut;\n");
localString.append("out_imag += indexOut;\n");
for (k = 0; k < R1; k++) {
@@ -1739,6 +1744,8 @@ public class CLFFTPlan {
if (this.size.z > 1) {
createGlobalFFTKernelString(this.size.z, this.size.x * this.size.y, dir, 1);
}
+ break;
+
default:
return;
}