r153 - trunk/code/renderer

DONOTREPLY at icculus.org DONOTREPLY at icculus.org
Sun Oct 9 20:37:54 EDT 2005


Author: tma
Date: 2005-10-09 20:37:54 -0400 (Sun, 09 Oct 2005)
New Revision: 153

Modified:
   trunk/code/renderer/tr_main.c
Log:
* Replaced drawSurfs qsort algorithm with a radix sort
  + Performance is comparable, although generally slightly
    worse, but...
  + Radix is a stable sort algorithm, so overlapping
    coplanar drawSurfs (i.e. with the same sort key) no
    longer flicker indeterminately
  + Also removes the dubious comment: "FIXME: this was 
    lifted and modified from the microsoft lib source..."


Modified: trunk/code/renderer/tr_main.c
===================================================================
--- trunk/code/renderer/tr_main.c	2005-10-09 13:07:54 UTC (rev 152)
+++ trunk/code/renderer/tr_main.c	2005-10-10 00:37:54 UTC (rev 153)
@@ -1000,205 +1000,50 @@
 */
 
 /*
-=================
-qsort replacement
-
-=================
+===============
+R_Radix
+===============
 */
-static __inline void SWAP_DRAW_SURF(drawSurf_t* a, drawSurf_t* b)
+static __inline void R_Radix( int byte, int size, drawSurf_t *source, drawSurf_t *dest )
 {
-	drawSurf_t t;
-	memcpy(&t, a, sizeof(t));
-	memcpy(a, b, sizeof(t));
-	memcpy(b, &t, sizeof(t));
-}
+  int           count[ 256 ] = { 0 };
+  int           index[ 256 ];
+  int           i;
+  unsigned char *sortKey = NULL;
+  unsigned char *end = NULL;
 
-/* this parameter defines the cutoff between using quick sort and
-   insertion sort for arrays; arrays with lengths shorter or equal to the
-   below value use insertion sort */
+  sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
+  end = sortKey + ( size * sizeof( drawSurf_t ) );
+  for( ; sortKey < end; sortKey += sizeof( drawSurf_t ) )
+    ++count[ *sortKey ];
 
-#define CUTOFF 8            /* testing shows that this is good value */
+  index[ 0 ] = 0;
 
-static void shortsort( drawSurf_t *lo, drawSurf_t *hi ) {
-    drawSurf_t	*p, *max;
+  for( i = 1; i < 256; ++i )
+    index[ i ] = index[ i - 1 ] + count[ i - 1 ];
 
-    while (hi > lo) {
-        max = lo;
-        for (p = lo + 1; p <= hi; p++ ) {
-            if ( p->sort > max->sort ) {
-                max = p;
-            }
-        }
-        SWAP_DRAW_SURF(max, hi);
-        hi--;
-    }
+  sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
+  for( i = 0; i < size; ++i, sortKey += sizeof( drawSurf_t ) )
+    dest[ index[ *sortKey ]++ ] = source[ i ];
 }
 
+/*
+===============
+R_RadixSort
 
-/* sort the array between lo and hi (inclusive)
-FIXME: this was lifted and modified from the microsoft lib source...
- */
-
-void qsortFast (
-    void *base,
-    unsigned num,
-    unsigned width
-    )
+Radix sort with 4 byte size buckets
+===============
+*/
+static void R_RadixSort( drawSurf_t *source, int size )
 {
-    char *lo, *hi;              /* ends of sub-array currently sorting */
-    char *mid;                  /* points to middle of subarray */
-    char *loguy, *higuy;        /* traveling pointers for partition step */
-    unsigned size;              /* size of the sub-array */
-    char *lostk[30], *histk[30];
-    int stkptr;                 /* stack for saving sub-array to be processed */
+  static drawSurf_t scratch[ MAX_DRAWSURFS ];
 
-#if 0
-	if ( sizeof(drawSurf_t) != 8 ) {
-		ri.Error( ERR_DROP, "change SWAP_DRAW_SURF macro" );
-	}
-#endif
-
-    /* Note: the number of stack entries required is no more than
-       1 + log2(size), so 30 is sufficient for any array */
-
-    if (num < 2 || width == 0)
-        return;                 /* nothing to do */
-
-    stkptr = 0;                 /* initialize stack */
-
-    lo = base;
-    hi = (char *)base + width * (num-1);        /* initialize limits */
-
-    /* this entry point is for pseudo-recursion calling: setting
-       lo and hi and jumping to here is like recursion, but stkptr is
-       prserved, locals aren't, so we preserve stuff on the stack */
-recurse:
-
-    size = (hi - lo) / width + 1;        /* number of el's to sort */
-
-    /* below a certain size, it is faster to use a O(n^2) sorting method */
-    if (size <= CUTOFF) {
-         shortsort((drawSurf_t *)lo, (drawSurf_t *)hi);
-    }
-    else {
-        /* First we pick a partititioning element.  The efficiency of the
-           algorithm demands that we find one that is approximately the
-           median of the values, but also that we select one fast.  Using
-           the first one produces bad performace if the array is already
-           sorted, so we use the middle one, which would require a very
-           wierdly arranged array for worst case performance.  Testing shows
-           that a median-of-three algorithm does not, in general, increase
-           performance. */
-
-        mid = lo + (size / 2) * width;      /* find middle element */
-        SWAP_DRAW_SURF((drawSurf_t *)mid, (drawSurf_t *)lo); /* swap it to beginning of array */
-
-       
-        /* We now wish to partition the array into three pieces, one
-           consisiting of elements <= partition element, one of elements
-           equal to the parition element, and one of element >= to it.  This
-           is done below; comments indicate conditions established at every
-           step. */
-
-        loguy = lo;
-        higuy = hi + width;
-
-        /* Note that higuy decreases and loguy increases on every iteration,
-           so loop must terminate. */
-        for (;;) {
-            /* lo <= loguy < hi, lo < higuy <= hi + 1,
-               A[i] <= A[lo] for lo <= i <= loguy,
-               A[i] >= A[lo] for higuy <= i <= hi */
-
-            do  {
-                loguy += width;
-            } while (loguy <= hi &&  
-				( ((drawSurf_t *)loguy)->sort <= ((drawSurf_t *)lo)->sort ) );
-
-            /* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
-               either loguy > hi or A[loguy] > A[lo] */
-
-            do  {
-                higuy -= width;
-            } while (higuy > lo && 
-				( ((drawSurf_t *)higuy)->sort >= ((drawSurf_t *)lo)->sort ) );
-
-            /* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
-               either higuy <= lo or A[higuy] < A[lo] */
-
-            if (higuy < loguy)
-                break;
-
-            /* if loguy > hi or higuy <= lo, then we would have exited, so
-               A[loguy] > A[lo], A[higuy] < A[lo],
-               loguy < hi, highy > lo */
-
-            SWAP_DRAW_SURF((drawSurf_t *)loguy, (drawSurf_t *)higuy);
-
-            /* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
-               of loop is re-established */
-        }
-
-        /*     A[i] >= A[lo] for higuy < i <= hi,
-               A[i] <= A[lo] for lo <= i < loguy,
-               higuy < loguy, lo <= higuy <= hi
-           implying:
-               A[i] >= A[lo] for loguy <= i <= hi,
-               A[i] <= A[lo] for lo <= i <= higuy,
-               A[i] = A[lo] for higuy < i < loguy */
-
-        SWAP_DRAW_SURF((drawSurf_t *)lo, (drawSurf_t *)higuy); /* put partition element in place */
-
-        /* OK, now we have the following:
-              A[i] >= A[higuy] for loguy <= i <= hi,
-              A[i] <= A[higuy] for lo <= i < higuy
-              A[i] = A[lo] for higuy <= i < loguy    */
-
-        /* We've finished the partition, now we want to sort the subarrays
-           [lo, higuy-1] and [loguy, hi].
-           We do the smaller one first to minimize stack usage.
-           We only sort arrays of length 2 or more.*/
-
-        if ( higuy - 1 - lo >= hi - loguy ) {
-            if (lo + width < higuy) {
-                lostk[stkptr] = lo;
-                histk[stkptr] = higuy - width;
-                ++stkptr;
-            }                           /* save big recursion for later */
-
-            if (loguy < hi) {
-                lo = loguy;
-                goto recurse;           /* do small recursion */
-            }
-        }
-        else {
-            if (loguy < hi) {
-                lostk[stkptr] = loguy;
-                histk[stkptr] = hi;
-                ++stkptr;               /* save big recursion for later */
-            }
-
-            if (lo + width < higuy) {
-                hi = higuy - width;
-                goto recurse;           /* do small recursion */
-            }
-        }
-    }
-
-    /* We have sorted the array, except for any pending sorts on the stack.
-       Check if there are any, and do them. */
-
-    --stkptr;
-    if (stkptr >= 0) {
-        lo = lostk[stkptr];
-        hi = histk[stkptr];
-        goto recurse;           /* pop subarray from stack */
-    }
-    else
-        return;                 /* all subarrays done */
+  R_Radix( 0, size, source, scratch );
+  R_Radix( 1, size, scratch, source );
+  R_Radix( 2, size, source, scratch );
+  R_Radix( 3, size, scratch, source );
 }
 
-
 //==========================================================================================
 
 /*
@@ -1261,7 +1106,7 @@
 	}
 
 	// sort the drawsurfs by sort type, then orientation, then shader
-	qsortFast (drawSurfs, numDrawSurfs, sizeof(drawSurf_t) );
+	R_RadixSort( drawSurfs, numDrawSurfs );
 
 	// check for any pass through drawing, which
 	// may cause another view to be rendered first




More information about the quake3-commits mailing list