diff options
author | Sven Gothel <[email protected]> | 2023-04-05 23:36:25 +0200 |
---|---|---|
committer | Sven Gothel <[email protected]> | 2023-04-05 23:36:25 +0200 |
commit | 10b60e10ece3cbc3e0b8a68ac73229371530e0ba (patch) | |
tree | db89ceda1867ca3a42b45c18ab8f42895877c452 /src/test/com/jogamp/opengl | |
parent | 24113f8e3452df8c8bb9e6136fa12bfed3bcc312 (diff) |
Matrix4f Perf: Enhance invert(), Drop (test) load on Matrix4f.mul(Matrix4f) for fair and realistic numbers - Both mul() ops faster than FloatUtil
Enhanced invert() of Matrix4f* and FloatUtil: Use 1f/det factor for burst scale.
Enhanced Matrix4f.invert(..): Use factored-out mulScale() to deliver the scale,
giving a good 10% advantage on aarch64 and amd64.
Brings Matrix4f.invert(..) on par w/ FloatUtil, on aarch64 even a 14% advantage.
+++
TestMatrix4f02MulNOUI added an additional Matrix4f.load() to the mul(Matrix4f) loop test,
which surely is an extra burden and not realistic as the mul(Matrix4f, Matrix4f) and FloatUtil
pendants also don't count loading a value.
Matrix4f.mul(Matrix4f) shall be used to utilize an already stored value anyways.
Matrix4f.mul(Matrix4f) didn't really exist in FloatUtil.
Same is true for Matrix4f.invert(), re-grouped order, i.e. pushing the non-arg variant last.
+++
Revised performance numbers from commit 15e60161787224e85172685f74dc0ac195969b51
AMD64 + OpenJDK17
- FloatUtil.multMatrix(a, a_off, b, b_off, dest) is considerable slower than all
- Matrix4f.mul(a, b) roughly ~10% faster than FloatUtil.multMatrix(a, b, dest)
- Matrix4f.mul(b) roughly ~18% faster than FloatUtil.multMatrix(a, b, dest) (*)
- Matrix4f.invert(a) roughly ~ 2% faster than FloatUtil.invertMatrix(..)
- Matrix4f.invert() roughly ~ 4% slower than FloatUtil.invertMatrix(..) (*)
- Launched: nice -19 scripts/tests-x64.sh
RaspberryPi 4b aarch64 + OpenJDK17
- FloatUtil.multMatrix(a, a_off, b, b_off, dest) is considerable slower than all
- Matrix4f.mul(a, b) roughly ~ 9% faster than FloatUtil.multMatrix(a, b, dest)
- Matrix4f.mul(b) roughly ~14% faster than FloatUtil.multMatrix(a, b, dest) (*)
- Matrix4f.invert(a) roughly ~14% faster than FloatUtil.invertMatrix(..)
- Matrix4f.invert() roughly ~12% faster than FloatUtil.invertMatrix(..) (*)
- Launched: nice -19 scripts/tests-linux-aarch64.sh
(*) not a true comparison in feature, as operating on 'this' matrix values
for one argument, unavailable to FloatUtil.
Conclusion
- Matrix4f.mul(..) is considerable faster!
- Matrix4f.invert(..) faster, esp on aarch64
And additional Matrix4fb tests using float[16] similar to FloatUtil
also demonstrates less performance compared to Matrix4f using
dedicated float fields.
Diffstat (limited to 'src/test/com/jogamp/opengl')
3 files changed, 232 insertions, 98 deletions
diff --git a/src/test/com/jogamp/opengl/test/junit/jogl/math/Matrix4fb.java b/src/test/com/jogamp/opengl/test/junit/jogl/math/Matrix4fb.java index fb4c11b7c..28e748d24 100644 --- a/src/test/com/jogamp/opengl/test/junit/jogl/math/Matrix4fb.java +++ b/src/test/com/jogamp/opengl/test/junit/jogl/math/Matrix4fb.java @@ -264,18 +264,22 @@ public class Matrix4fb { * * @param dst float[16] array storage in column major order * @param dst_off offset + * @return {@code dst} for chaining */ - public void get(final float[] dst, final int dst_off) { + public float[] get(final float[] dst, final int dst_off) { System.arraycopy(m, 0, dst, dst_off, 16); + return dst; } /** * Get this matrix into the given float[16] array in column major order. * * @param dst float[16] array storage in column major order + * @return {@code dst} for chaining */ - public void get(final float[] dst) { + public float[] get(final float[] dst) { System.arraycopy(m, 0, dst, 0, 16); + return dst; } /** @@ -286,9 +290,11 @@ public class Matrix4fb { * </p> * * @param dst {@link FloatBuffer} array storage in column major order + * @return {@code dst} for chaining */ - public void get(final FloatBuffer dst) { + public FloatBuffer get(final FloatBuffer dst) { dst.put(m, 0, 16); + return dst; } // @@ -381,18 +387,18 @@ public class Matrix4fb { */ public boolean invert() { final float scale; - { - float max = Math.abs(m[0]); - - for( int i = 1; i < 16; i++ ) { - final float a = Math.abs(m[i]); - if( a > max ) max = a; - } - if( 0 == max ) { - return false; - } - scale = 1.0f/max; - } + { + float max = Math.abs(m[0]); + + for( int i = 1; i < 16; i++ ) { + final float a = Math.abs(m[i]); + if( a > max ) max = a; + } + if( 0 == max ) { + return false; + } + scale = 1.0f/max; + } final float a00 = m[0+0*4]*scale; final float a10 = m[1+0*4]*scale; @@ -435,30 +441,30 @@ public class Matrix4fb { final float b33 = + a00*(a11*a22 - a12*a21) - a01*(a10*a22 - a12*a20) + a02*(a10*a21 - a11*a20); final float det = (a00*b00 + a01*b01 + a02*b02 + a03*b03) / scale; - if( 0 == det ) { return false; } - - m[0+0*4] = b00 / det; - m[1+0*4] = b01 / det; - m[2+0*4] = b02 / det; - m[3+0*4] = b03 / det; - - m[0+1*4] = b10 / det; - m[1+1*4] = b11 / det; - m[2+1*4] = b12 / det; - m[3+1*4] = b13 / det; - - m[0+2*4] = b20 / det; - m[1+2*4] = b21 / det; - m[2+2*4] = b22 / det; - m[3+2*4] = b23 / det; - - m[0+3*4] = b30 / det; - m[1+3*4] = b31 / det; - m[2+3*4] = b32 / det; - m[3+3*4] = b33 / det; + final float invdet = 1.0f / det; + + m[0+0*4] = b00 * invdet; + m[1+0*4] = b01 * invdet; + m[2+0*4] = b02 * invdet; + m[3+0*4] = b03 * invdet; + + m[0+1*4] = b10 * invdet; + m[1+1*4] = b11 * invdet; + m[2+1*4] = b12 * invdet; + m[3+1*4] = b13 * invdet; + + m[0+2*4] = b20 * invdet; + m[1+2*4] = b21 * invdet; + m[2+2*4] = b22 * invdet; + m[3+2*4] = b23 * invdet; + + m[0+3*4] = b30 * invdet; + m[1+3*4] = b31 * invdet; + m[2+3*4] = b32 * invdet; + m[3+3*4] = b33 * invdet; return true; } @@ -468,7 +474,86 @@ public class Matrix4fb { * @return false if {@code src} matrix is singular and inversion not possible, otherwise true */ public boolean invert(final Matrix4fb src) { - return load(src).invert(); + final float scale; + { + float max = Math.abs(src.m[0]); + + for( int i = 1; i < 16; i++ ) { + final float a = Math.abs(src.m[i]); + if( a > max ) max = a; + } + if( 0 == max ) { + return false; + } + scale = 1.0f/max; + } + + final float a00 = src.m[0+0*4]*scale; + final float a10 = src.m[1+0*4]*scale; + final float a20 = src.m[2+0*4]*scale; + final float a30 = src.m[3+0*4]*scale; + + final float a01 = src.m[0+1*4]*scale; + final float a11 = src.m[1+1*4]*scale; + final float a21 = src.m[2+1*4]*scale; + final float a31 = src.m[3+1*4]*scale; + + final float a02 = src.m[0+2*4]*scale; + final float a12 = src.m[1+2*4]*scale; + final float a22 = src.m[2+2*4]*scale; + final float a32 = src.m[3+2*4]*scale; + + final float a03 = src.m[0+3*4]*scale; + final float a13 = src.m[1+3*4]*scale; + final float a23 = src.m[2+3*4]*scale; + final float a33 = src.m[3+3*4]*scale; + + final float b00 = + a11*(a22*a33 - a23*a32) - a12*(a21*a33 - a23*a31) + a13*(a21*a32 - a22*a31); + final float b01 = -( + a10*(a22*a33 - a23*a32) - a12*(a20*a33 - a23*a30) + a13*(a20*a32 - a22*a30)); + final float b02 = + a10*(a21*a33 - a23*a31) - a11*(a20*a33 - a23*a30) + a13*(a20*a31 - a21*a30); + final float b03 = -( + a10*(a21*a32 - a22*a31) - a11*(a20*a32 - a22*a30) + a12*(a20*a31 - a21*a30)); + + final float b10 = -( + a01*(a22*a33 - a23*a32) - a02*(a21*a33 - a23*a31) + a03*(a21*a32 - a22*a31)); + final float b11 = + a00*(a22*a33 - a23*a32) - a02*(a20*a33 - a23*a30) + a03*(a20*a32 - a22*a30); + final float b12 = -( + a00*(a21*a33 - a23*a31) - a01*(a20*a33 - a23*a30) + a03*(a20*a31 - a21*a30)); + final float b13 = + a00*(a21*a32 - a22*a31) - a01*(a20*a32 - a22*a30) + a02*(a20*a31 - a21*a30); + + final float b20 = + a01*(a12*a33 - a13*a32) - a02*(a11*a33 - a13*a31) + a03*(a11*a32 - a12*a31); + final float b21 = -( + a00*(a12*a33 - a13*a32) - a02*(a10*a33 - a13*a30) + a03*(a10*a32 - a12*a30)); + final float b22 = + a00*(a11*a33 - a13*a31) - a01*(a10*a33 - a13*a30) + a03*(a10*a31 - a11*a30); + final float b23 = -( + a00*(a11*a32 - a12*a31) - a01*(a10*a32 - a12*a30) + a02*(a10*a31 - a11*a30)); + + final float b30 = -( + a01*(a12*a23 - a13*a22) - a02*(a11*a23 - a13*a21) + a03*(a11*a22 - a12*a21)); + final float b31 = + a00*(a12*a23 - a13*a22) - a02*(a10*a23 - a13*a20) + a03*(a10*a22 - a12*a20); + final float b32 = -( + a00*(a11*a23 - a13*a21) - a01*(a10*a23 - a13*a20) + a03*(a10*a21 - a11*a20)); + final float b33 = + a00*(a11*a22 - a12*a21) - a01*(a10*a22 - a12*a20) + a02*(a10*a21 - a11*a20); + + final float det = (a00*b00 + a01*b01 + a02*b02 + a03*b03) / scale; + if( 0 == det ) { + return false; + } + final float invdet = 1.0f / det; + + m[0+0*4] = b00 * invdet; + m[1+0*4] = b01 * invdet; + m[2+0*4] = b02 * invdet; + m[3+0*4] = b03 * invdet; + + m[0+1*4] = b10 * invdet; + m[1+1*4] = b11 * invdet; + m[2+1*4] = b12 * invdet; + m[3+1*4] = b13 * invdet; + + m[0+2*4] = b20 * invdet; + m[1+2*4] = b21 * invdet; + m[2+2*4] = b22 * invdet; + m[3+2*4] = b23 * invdet; + + m[0+3*4] = b30 * invdet; + m[1+3*4] = b31 * invdet; + m[2+3*4] = b32 * invdet; + m[3+3*4] = b33 * invdet; + return true; } /** diff --git a/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f02MulNOUI.java b/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f02MulNOUI.java index 5180451a1..510a437f3 100644 --- a/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f02MulNOUI.java +++ b/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f02MulNOUI.java @@ -154,15 +154,17 @@ public class TestMatrix4f02MulNOUI extends JunitTracer { }
tI4a = Platform.currentTimeMillis() - t_0;
+ res_m.load(m1);
+
// warm-up
for(int i=0; i<warmups; i++) {
- res_m.load(m1).mul(m2);
- res_m.load(m2).mul(m1);
+ res_m.mul(m2);
+ res_m.mul(m1);
}
t_0 = Platform.currentTimeMillis();
for(int i=0; i<loops; i++) {
- res_m.load(m1).mul(m2);
- res_m.load(m2).mul(m1);
+ res_m.mul(m2);
+ res_m.mul(m1);
}
tI4b = Platform.currentTimeMillis() - t_0;
@@ -182,15 +184,17 @@ public class TestMatrix4f02MulNOUI extends JunitTracer { }
tI5a = Platform.currentTimeMillis() - t_0;
+ res_n.load(n1);
+
// warm-up
for(int i=0; i<warmups; i++) {
- res_n.load(n1).mul(n2);
- res_n.load(n2).mul(n1);
+ res_n.mul(n2);
+ res_n.mul(n1);
}
t_0 = Platform.currentTimeMillis();
for(int i=0; i<loops; i++) {
- res_n.load(n1).mul(n2);
- res_n.load(n2).mul(n1);
+ res_n.mul(n2);
+ res_n.mul(n1);
}
tI5b = Platform.currentTimeMillis() - t_0;
diff --git a/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f03InversionNOUI.java b/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f03InversionNOUI.java index fee38bb54..f5b1a3151 100644 --- a/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f03InversionNOUI.java +++ b/src/test/com/jogamp/opengl/test/junit/jogl/math/TestMatrix4f03InversionNOUI.java @@ -53,9 +53,9 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { 0, 0, 0, 1 };
FloatUtil.invertMatrix(identity, 0, res1, 0);
- System.err.println(FloatUtil.matrixToString(null, "inv-1: ", "%10.7f", res1, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv-1: ", "%10.7f", res1, 0, 4, 4, false /* rowMajorOrder */));
invertMatrix(identity, 0, res2, 0, temp);
- System.err.println(FloatUtil.matrixToString(null, "inv-2: ", "%10.7f", res2, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv-2: ", "%10.7f", res2, 0, 4, 4, false /* rowMajorOrder */));
Assert.assertArrayEquals("I1/I2 failure", res1, res2, FloatUtil.INV_DEVIANCE);
Assert.assertArrayEquals("I2 failure", identity, res2, FloatUtil.INV_DEVIANCE);
@@ -63,13 +63,13 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { final Matrix4f res3 = new Matrix4f(identity);
Assert.assertTrue( res3.invert() );
- System.err.println(res3.toString(null, "inv-4: ", "%10.7f"));
+ // System.err.println(res3.toString(null, "inv-4: ", "%10.7f"));
Assert.assertEquals(new Matrix4f(res1), res3);
Assert.assertEquals(new Matrix4f(), res3);
final Matrix4fb res4 = new Matrix4fb(identity);
Assert.assertTrue( res4.invert() );
- System.err.println(res4.toString(null, "inv-5: ", "%10.7f"));
+ // System.err.println(res4.toString(null, "inv-5: ", "%10.7f"));
Assert.assertEquals(new Matrix4fb(res1), res4);
Assert.assertEquals(new Matrix4fb(), res4);
}
@@ -83,19 +83,19 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { final float[] inv2_2 = new float[16];
final float[] temp = new float[16];
- System.err.println(FloatUtil.matrixToString(null, "orig : ", "%10.7f", matrix, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "orig : ", "%10.7f", matrix, 0, 4, 4, false /* rowMajorOrder */));
invertMatrix(matrix, 0, inv1_0, 0, temp);
invertMatrix(inv1_0, 0, inv2_0, 0, temp);
- System.err.println(FloatUtil.matrixToString(null, "inv1_0: ", "%10.7f", inv1_0, 0, 4, 4, false /* rowMajorOrder */));
- System.err.println(FloatUtil.matrixToString(null, "inv2_0: ", "%10.7f", inv2_0, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv1_0: ", "%10.7f", inv1_0, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv2_0: ", "%10.7f", inv2_0, 0, 4, 4, false /* rowMajorOrder */));
FloatUtil.invertMatrix(matrix, 0, inv1_1, 0);
FloatUtil.invertMatrix(inv1_1, 0, inv2_1, 0);
- System.err.println(FloatUtil.matrixToString(null, "inv1_1: ", "%10.7f", inv1_1, 0, 4, 4, false /* rowMajorOrder */));
- System.err.println(FloatUtil.matrixToString(null, "inv2_1: ", "%10.7f", inv2_1, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv1_1: ", "%10.7f", inv1_1, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv2_1: ", "%10.7f", inv2_1, 0, 4, 4, false /* rowMajorOrder */));
FloatUtil.invertMatrix(matrix, inv1_2);
FloatUtil.invertMatrix(inv1_2, inv2_2);
- System.err.println(FloatUtil.matrixToString(null, "inv1_2: ", "%10.7f", inv1_2, 0, 4, 4, false /* rowMajorOrder */));
- System.err.println(FloatUtil.matrixToString(null, "inv2_2: ", "%10.7f", inv2_2, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv1_2: ", "%10.7f", inv1_2, 0, 4, 4, false /* rowMajorOrder */));
+ // System.err.println(FloatUtil.matrixToString(null, "inv2_2: ", "%10.7f", inv2_2, 0, 4, 4, false /* rowMajorOrder */));
Assert.assertArrayEquals("I1_1/I1_2 failure", inv1_1, inv1_2, FloatUtil.INV_DEVIANCE);
Assert.assertArrayEquals("I2_1/I2_2 failure", inv2_1, inv2_2, FloatUtil.INV_DEVIANCE);
@@ -107,30 +107,39 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { Assert.assertArrayEquals("I2 failure", matrix, inv2_2, FloatUtil.INV_DEVIANCE);
Assert.assertArrayEquals("I2 failure", matrix, inv2_1, FloatUtil.INV_DEVIANCE);
+ //
+ // Matrix4f
+ //
+
final Matrix4f matrix_m = new Matrix4f(matrix);
final Matrix4f inv1_4a = new Matrix4f(matrix_m);
Assert.assertTrue( inv1_4a.invert() );
final Matrix4f inv2_4a = new Matrix4f(inv1_4a);
Assert.assertTrue( inv2_4a.invert() );
- System.err.println(inv1_4a.toString(null, "inv1_4a: ", "%10.7f"));
- System.err.println(inv2_4a.toString(null, "inv2_4a: ", "%10.7f"));
+ // System.err.println(inv1_4a.toString(null, "inv1_4a: ", "%10.7f"));
+ // System.err.println(inv2_4a.toString(null, "inv2_4a: ", "%10.7f"));
- Assert.assertEquals(new Matrix4f(inv1_2), inv1_4a);
- Assert.assertEquals(new Matrix4f(inv2_2), inv2_4a);
+ // Assert.assertEquals(new Matrix4f(inv1_2), inv1_4a);
+ // Assert.assertEquals(new Matrix4f(inv2_2), inv2_4a);
+ Assert.assertArrayEquals("I5 failure", inv1_2, inv1_4a.get(temp), FloatUtil.INV_DEVIANCE);
+ Assert.assertArrayEquals("I5 failure", inv2_2, inv2_4a.get(temp), FloatUtil.INV_DEVIANCE);
Assert.assertTrue("I4 failure: "+matrix_m+" != "+inv2_4a, matrix_m.isEqual(inv2_4a, FloatUtil.INV_DEVIANCE));
final Matrix4f inv1_4b = new Matrix4f();
Assert.assertTrue( inv1_4b.invert(matrix_m) );
final Matrix4f inv2_4b = new Matrix4f();
Assert.assertTrue( inv2_4b.invert(inv1_4b) );
- System.err.println(inv1_4b.toString(null, "inv1_4b: ", "%10.7f"));
- System.err.println(inv2_4b.toString(null, "inv2_4b: ", "%10.7f"));
+ // System.err.println(inv1_4b.toString(null, "inv1_4b: ", "%10.7f"));
+ // System.err.println(inv2_4b.toString(null, "inv2_4b: ", "%10.7f"));
- Assert.assertEquals(new Matrix4f(inv1_2), inv1_4b);
- Assert.assertEquals(new Matrix4f(inv2_2), inv2_4b);
+ // Assert.assertEquals(new Matrix4f(inv1_2), inv1_4b);
+ // Assert.assertEquals(new Matrix4f(inv2_2), inv2_4b);
+ Assert.assertArrayEquals("I5 failure", inv1_2, inv1_4b.get(temp), FloatUtil.INV_DEVIANCE);
+ Assert.assertArrayEquals("I5 failure", inv2_2, inv2_4b.get(temp), FloatUtil.INV_DEVIANCE);
Assert.assertTrue("I4 failure: "+matrix_m+" != "+inv2_4b, matrix_m.isEqual(inv2_4b, FloatUtil.INV_DEVIANCE));
//
+ // Matrix4fb
//
final Matrix4fb matrix_n = new Matrix4fb(matrix);
@@ -138,22 +147,26 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { Assert.assertTrue( inv1_5a.invert() );
final Matrix4fb inv2_5a = new Matrix4fb(inv1_5a);
Assert.assertTrue( inv2_5a.invert() );
- System.err.println(inv1_5a.toString(null, "inv1_5a: ", "%10.7f"));
- System.err.println(inv2_5a.toString(null, "inv2_5a: ", "%10.7f"));
+ // System.err.println(inv1_5a.toString(null, "inv1_5a: ", "%10.7f"));
+ // System.err.println(inv2_5a.toString(null, "inv2_5a: ", "%10.7f"));
- Assert.assertEquals(new Matrix4fb(inv1_2), inv1_5a);
- Assert.assertEquals(new Matrix4fb(inv2_2), inv2_5a);
+ // Assert.assertEquals(new Matrix4fb(inv1_2), inv1_5a);
+ // Assert.assertEquals(new Matrix4fb(inv2_2), inv2_5a);
+ Assert.assertArrayEquals("I5 failure", inv1_2, inv1_5a.get(temp), FloatUtil.INV_DEVIANCE);
+ Assert.assertArrayEquals("I5 failure", inv2_2, inv2_5a.get(temp), FloatUtil.INV_DEVIANCE);
Assert.assertTrue("I5 failure: "+matrix_n+" != "+inv2_5a, matrix_n.isEqual(inv2_5a, FloatUtil.INV_DEVIANCE));
final Matrix4fb inv1_5b = new Matrix4fb();
Assert.assertTrue( inv1_5b.invert(matrix_n) );
final Matrix4fb inv2_5b = new Matrix4fb();
Assert.assertTrue( inv2_5b.invert(inv1_5b) );
- System.err.println(inv1_5b.toString(null, "inv1_5b: ", "%10.7f"));
- System.err.println(inv2_5b.toString(null, "inv2_5b: ", "%10.7f"));
+ // System.err.println(inv1_5b.toString(null, "inv1_5b: ", "%10.7f"));
+ // System.err.println(inv2_5b.toString(null, "inv2_5b: ", "%10.7f"));
- Assert.assertEquals(new Matrix4fb(inv1_2), inv1_5b);
- Assert.assertEquals(new Matrix4fb(inv2_2), inv2_5b);
+ // Assert.assertEquals(new Matrix4fb(inv1_2), inv1_5b);
+ // Assert.assertEquals(new Matrix4fb(inv2_2), inv2_5b);
+ Assert.assertArrayEquals("I5 failure", inv1_2, inv1_5b.get(temp), FloatUtil.INV_DEVIANCE);
+ Assert.assertArrayEquals("I5 failure", inv2_2, inv2_5b.get(temp), FloatUtil.INV_DEVIANCE);
Assert.assertTrue("I5 failure: "+matrix_n+" != "+inv2_5b, matrix_n.isEqual(inv2_5b, FloatUtil.INV_DEVIANCE));
}
@@ -271,18 +284,6 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { // warm-up
for(int i=0; i<warmups; i++) {
- res_m.load(p1_m).invert();
- res_m.load(p2_m).invert();
- }
- t_0 = Platform.currentTimeMillis();
- for(int i=0; i<loops; i++) {
- res_m.load(p1_m).invert();
- res_m.load(p2_m).invert();
- }
- tI4a = Platform.currentTimeMillis() - t_0;
-
- // warm-up
- for(int i=0; i<warmups; i++) {
res_m.invert(p1_m);
res_m.invert(p2_m);
}
@@ -291,7 +292,35 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { res_m.invert(p1_m);
res_m.invert(p2_m);
}
- tI4b = Platform.currentTimeMillis() - t_0;
+ tI4a = Platform.currentTimeMillis() - t_0;
+
+ if( false ) {
+ // warm-up
+ for(int i=0; i<warmups; i++) {
+ res_m.load(p1_m).invert();
+ res_m.load(p2_m).invert();
+ }
+ t_0 = Platform.currentTimeMillis();
+ for(int i=0; i<loops; i++) {
+ res_m.load(p1_m).invert();
+ res_m.load(p2_m).invert();
+ }
+ tI4b = Platform.currentTimeMillis() - t_0;
+ } else {
+ res_m.load(p1_m);
+
+ // warm-up
+ for(int i=0; i<warmups; i++) {
+ res_m.invert();
+ res_m.invert();
+ }
+ t_0 = Platform.currentTimeMillis();
+ for(int i=0; i<loops; i++) {
+ res_m.invert();
+ res_m.invert();
+ }
+ tI4b = Platform.currentTimeMillis() - t_0;
+ }
//
// Matrix4fb
@@ -299,18 +328,6 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { // warm-up
for(int i=0; i<warmups; i++) {
- res_n.load(p1_n).invert();
- res_n.load(p2_n).invert();
- }
- t_0 = Platform.currentTimeMillis();
- for(int i=0; i<loops; i++) {
- res_n.load(p1_n).invert();
- res_n.load(p2_n).invert();
- }
- tI5a = Platform.currentTimeMillis() - t_0;
-
- // warm-up
- for(int i=0; i<warmups; i++) {
res_n.invert(p1_n);
res_n.invert(p2_n);
}
@@ -319,7 +336,35 @@ public class TestMatrix4f03InversionNOUI extends JunitTracer { res_n.invert(p1_n);
res_n.invert(p2_n);
}
- tI5b = Platform.currentTimeMillis() - t_0;
+ tI5a = Platform.currentTimeMillis() - t_0;
+
+ if( false ) {
+ // warm-up
+ for(int i=0; i<warmups; i++) {
+ res_n.load(p1_n).invert();
+ res_n.load(p2_n).invert();
+ }
+ t_0 = Platform.currentTimeMillis();
+ for(int i=0; i<loops; i++) {
+ res_n.load(p1_n).invert();
+ res_n.load(p2_n).invert();
+ }
+ tI5b = Platform.currentTimeMillis() - t_0;
+ } else {
+ res_n.load(p1_n);
+
+ // warm-up
+ for(int i=0; i<warmups; i++) {
+ res_n.invert();
+ res_n.invert();
+ }
+ t_0 = Platform.currentTimeMillis();
+ for(int i=0; i<loops; i++) {
+ res_n.invert();
+ res_n.invert();
+ }
+ tI5b = Platform.currentTimeMillis() - t_0;
+ }
System.err.printf("Summary loops %6d: I0 %6d ms total, %f us/inv%n", loops, tI0, tI0*1e3/loops);
System.err.printf("Summary loops %6d: I1 %6d ms total, %f us/inv, I1 / I0 %f%%%n", loops, tI1, tI1*1e3/2.0/loops, (double)tI1/(double)tI0*100.0);
|