From 607eb99b9cad227dd7be6d149c6b6cf57d060c35 Mon Sep 17 00:00:00 2001 From: Sven Gothel Date: Tue, 7 Mar 2023 00:15:02 +0100 Subject: Graph Perf Counter: Use GlueGen's Clock.currentNanos() to ease on performance-hit measuring performance. This was mostly notable on a Raspberry-Pi 4 arm64, where perfromance degragated around 3x using high-freq counter. Using our well determined Clock.currentNanos() removes this overhead, back to 'easy measuring' and having a well defined 'currentNanos()' since module start. TestTextRendererNEWT00 can enable Region and Font perf-counter w/ '-perf', w/o it only uses its own counter and hence reduce the high-freq burden (64% perf win on raspi4). +++ Below numbers show that Region.addOutlineShape() perhaps needs a little performance work to allow long text to be processed in 'real time' on embedded platform. Hower, usually we cache the Region for long text and can have at least one liner to be renderer within 60fps fast, i.e. Region produced in ~26ms for a 81 char line instead of ~130ms for 664 chars. +++ Raspberry Pi 4b, OpenJDK17, Debian 11: Using current medium sized text_1 w/ 664 chars, w/o '-perf' and after having passed 40 frames, we have following durations: - process the OutlineShape -> Region: 129ms (text) - Render the Region: 53ms Startup Times: - loading GlueGen - loading test 0 [ms] - loading GlueGen - start test 1,910 [ms] - loading test - start test 1,910 [ms] - loading test - gl 2,631 [ms] - loading test - graph 2,636 [ms] - loading test - txt 2,844 [ms] - loading test - draw 3,062 [ms] Perf .. 1 / 1: Perf Launch: Total: graph 5, txt 207, draw 218, txt+draw 425 [ms] 1 / 1: Perf Launch: PerLoop: graph 5,505,740, txt 207,530,736, draw 218,393,680, txt+draw 425,924,416 [ns] 20 / 20: Perf Frame20: Total: graph 16, txt 376, draw 281, txt+draw 657 [ms] 20 / 20: Perf Frame20: PerLoop: graph 807,055, txt 18,820,824, draw 14,075,146, txt+draw 32,895,970 [ns] 20 / 40: Perf Frame40: Total: graph 3, txt 129, draw 53, txt+draw 182 [ms] 20 / 40: Perf Frame40: PerLoop: graph 176,670, txt 6,451,330, draw 2,658,217, txt+draw 9,109,547 [ns] +++ On a modern desktop (~2y old), GNU/Linux Debian 11, AMD GPU on Mesa3D: Using current medium sized text_1 w/ 664 chars, w/o '-perf' and after having passed 40 frames, we have following durations: - process the OutlineShape -> Region: 42ms (text) - Render the Region: 5ms Startup Times: - loading GlueGen - loading test 0 [ms] - loading GlueGen - start test 310 [ms] - loading test - start test 309 [ms] - loading test - gl 459 [ms] - loading test - graph 460 [ms] - loading test - txt 490 [ms] - loading test - draw 506 [ms] Perf .. 1 / 1: Perf Launch: Total: graph 1, txt 29, draw 15, txt+draw 45 [ms] 1 / 1: Perf Launch: PerLoop: graph 1,191,096, txt 29,868,436, draw 15,519,445, txt+draw 45,387,881 [ns] 20 / 20: Perf Frame20: Total: graph 240, txt 68, draw 21, txt+draw 89 [ms] 20 / 20: Perf Frame20: PerLoop: graph 12,045,651, txt 3,415,402, draw 1,069,348, txt+draw 4,484,750 [ns] 20 / 40: Perf Frame40: Total: graph 283, txt 42, draw 5, txt+draw 47 [ms] 20 / 40: Perf Frame40: PerLoop: graph 14,152,395, txt 2,116,114, draw 265,292, txt+draw 2,381,406 [ns] --- .../classes/com/jogamp/graph/curve/Region.java | 36 ++++++++++------------ .../jogamp/graph/font/typecast/TypecastFont.java | 22 ++++++------- 2 files changed, 26 insertions(+), 32 deletions(-) (limited to 'src/jogl/classes') diff --git a/src/jogl/classes/com/jogamp/graph/curve/Region.java b/src/jogl/classes/com/jogamp/graph/curve/Region.java index 068e0aabd..70f30a193 100644 --- a/src/jogl/classes/com/jogamp/graph/curve/Region.java +++ b/src/jogl/classes/com/jogamp/graph/curve/Region.java @@ -28,8 +28,6 @@ package com.jogamp.graph.curve; import java.io.PrintStream; -import java.time.Duration; -import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; @@ -338,7 +336,7 @@ public abstract class Region { protected static final int GL_INT32_MAX = 0x7fffffff; // 2,147,483,647 static class Perf { - Instant t0 = null, t1 = null, t2 = null; + long t0 = 0, t1 = 0, t2 = 0; // all td_ values are in [ns] long td_vertices = 0; long td_tri_push_idx = 0; @@ -362,7 +360,7 @@ public abstract class Region { } public void clear() { - t0 = null; t1 = null; t2 = null; + t0 = 0; t1 = 0; t2 = 0; td_vertices = 0; td_tri_push_idx = 0; td_tri_push_vertidx = 0; @@ -396,11 +394,11 @@ public abstract class Region { } @Override - public Duration getTotalDuration() { + public long getTotalDuration() { if( null != perf ) { - return Duration.ofNanos(perf.td_total); + return perf.td_total; } else { - return Duration.ZERO; + return 0; } } @@ -424,7 +422,7 @@ public abstract class Region { public final void addOutlineShape(final OutlineShape shape, final AffineTransform t, final float[] rgbaColor) { if( null != perf ) { ++perf.count; - perf.t0 = Clock.getMonotonicTime(); + perf.t0 = Clock.currentNanos(); } if( null != frustum ) { final AABBox shapeBox = shape.getBounds(); @@ -468,15 +466,15 @@ public abstract class Region { vertsVNewIdxCount++; } if( null != perf ) { - perf.t1 = Clock.getMonotonicTime(); - perf.td_vertices += Duration.between(perf.t0, perf.t1).toNanos(); + perf.t1 = Clock.currentNanos(); + perf.td_vertices += perf.t1 - perf.t0; } if(DEBUG_INSTANCE) { System.err.println("Region.addOutlineShape(): Processing Triangles"); } for(final Triangle triIn : trisIn) { if( null != perf ) { - perf.t2 = Clock.getMonotonicTime(); + perf.t2 = Clock.currentNanos(); } // if(Region.DEBUG_INSTANCE) { // System.err.println("T["+i+"]: "+triIn); @@ -487,7 +485,7 @@ public abstract class Region { final int tv0Idx = triInVertices[0].getId(); if( null != perf ) { - perf.td_tri_misc += Duration.between(perf.t2, Clock.getMonotonicTime()).toNanos(); + perf.td_tri_misc += Clock.currentNanos() - perf.t2; } if ( max_indices - idxOffset > tv0Idx ) { // valid 'known' idx - move by offset @@ -495,11 +493,11 @@ public abstract class Region { // System.err.println("T["+i+"]: Moved "+tv0Idx+" + "+idxOffset+" -> "+(tv0Idx+idxOffset)); // } if( null != perf ) { - final Instant tpi = Clock.getMonotonicTime(); + final long tpi = Clock.currentNanos(); pushIndices(tv0Idx+idxOffset, triInVertices[1].getId()+idxOffset, triInVertices[2].getId()+idxOffset); - perf.td_tri_push_idx += Duration.between(tpi, Clock.getMonotonicTime()).toNanos(); + perf.td_tri_push_idx += Clock.currentNanos() - tpi; } else { pushIndices(tv0Idx+idxOffset, triInVertices[1].getId()+idxOffset, @@ -512,9 +510,9 @@ public abstract class Region { // System.err.println("T["+i+"]: New Idx "+numVertices); // } if( null != perf ) { - final Instant tpvi = Clock.getMonotonicTime(); + final long tpvi = Clock.currentNanos(); pushNewVerticesIdxImpl(triInVertices[0], triInVertices[1], triInVertices[2], t, rgbaColor); - perf.td_tri_push_vertidx += Duration.between(tpvi, Clock.getMonotonicTime()).toNanos(); + perf.td_tri_push_vertidx += Clock.currentNanos() - tpvi; } else { pushNewVerticesIdxImpl(triInVertices[0], triInVertices[1], triInVertices[2], t, rgbaColor); } @@ -523,9 +521,9 @@ public abstract class Region { tris++; } if( null != perf ) { - final Instant ttriX = Clock.getMonotonicTime(); - perf.td_tri_total += Duration.between(perf.t1, ttriX).toNanos(); - perf.td_total += Duration.between(perf.t0, ttriX).toNanos(); + final long ttriX = Clock.currentNanos(); + perf.td_tri_total += ttriX - perf.t1; + perf.td_total += ttriX - perf.t0; } } if(DEBUG_INSTANCE) { diff --git a/src/jogl/classes/jogamp/graph/font/typecast/TypecastFont.java b/src/jogl/classes/jogamp/graph/font/typecast/TypecastFont.java index ebc3eeacc..aba7d6807 100644 --- a/src/jogl/classes/jogamp/graph/font/typecast/TypecastFont.java +++ b/src/jogl/classes/jogamp/graph/font/typecast/TypecastFont.java @@ -43,8 +43,6 @@ import jogamp.graph.font.typecast.ot.table.KerningPair; import jogamp.graph.font.typecast.ot.table.PostTable; import java.io.PrintStream; -import java.time.Duration; -import java.time.Instant; import java.util.concurrent.TimeUnit; import com.jogamp.common.os.Clock; @@ -368,7 +366,7 @@ class TypecastFont implements Font { } static class Perf { - Instant t0 = null; + long t0 = 0; // all td_ values are in [ns] long td_visitor = 0; long td_total = 0; @@ -382,7 +380,7 @@ class TypecastFont implements Font { } public void clear() { - t0 = null; + t0 = 0; td_visitor = 0; td_total = 0; count = 0; @@ -412,11 +410,11 @@ class TypecastFont implements Font { } @Override - public Duration getTotalDuration() { + public long getTotalDuration() { if( null != perf ) { - return Duration.ofNanos(perf.td_total); + return perf.td_total; } else { - return Duration.ZERO; + return 0; } } @@ -438,7 +436,7 @@ class TypecastFont implements Font { } if( null != perf ) { ++perf.count; - perf.t0 = Clock.getMonotonicTime(); + perf.t0 = Clock.currentNanos(); } final AABBox res = new AABBox(); final int charCount = string.length(); @@ -480,10 +478,9 @@ class TypecastFont implements Font { temp1.translate(advanceTotal, y, temp2); res.resize(temp1.transform(glyphShape.getBounds(), temp_box)); if( null != perf ) { - final Instant t1 = Clock.getMonotonicTime(); + final long t1 = Clock.currentNanos(); visitor.visit(glyphShape, temp1); - final Instant t2 = Clock.getMonotonicTime(); - perf.td_visitor += Duration.between(t1, t2).toNanos(); + perf.td_visitor += Clock.currentNanos() - t1; } else { visitor.visit(glyphShape, temp1); } @@ -492,8 +489,7 @@ class TypecastFont implements Font { } } if( null != perf ) { - final Instant tX = Clock.getMonotonicTime(); - perf.td_total += Duration.between(perf.t0, tX).toNanos(); + perf.td_total += Clock.currentNanos() - perf.t0; } return res; } -- cgit v1.2.3