r153 - trunk/code/renderer
DONOTREPLY at icculus.org
DONOTREPLY at icculus.org
Sun Oct 9 20:37:54 EDT 2005
Author: tma
Date: 2005-10-09 20:37:54 -0400 (Sun, 09 Oct 2005)
New Revision: 153
Modified:
trunk/code/renderer/tr_main.c
Log:
* Replaced drawSurfs qsort algorithm with a radix sort
+ Performance is comparable, although generally slightly
worse, but...
+ Radix is a stable sort algorithm, so overlapping
coplanar drawSurfs (i.e. with the same sort key) no
longer flicker indeterminately
+ Also removes the dubious comment: "FIXME: this was
lifted and modified from the microsoft lib source..."
Modified: trunk/code/renderer/tr_main.c
===================================================================
--- trunk/code/renderer/tr_main.c 2005-10-09 13:07:54 UTC (rev 152)
+++ trunk/code/renderer/tr_main.c 2005-10-10 00:37:54 UTC (rev 153)
@@ -1000,205 +1000,50 @@
*/
/*
-=================
-qsort replacement
-
-=================
+===============
+R_Radix
+===============
*/
-static __inline void SWAP_DRAW_SURF(drawSurf_t* a, drawSurf_t* b)
+static __inline void R_Radix( int byte, int size, drawSurf_t *source, drawSurf_t *dest )
{
- drawSurf_t t;
- memcpy(&t, a, sizeof(t));
- memcpy(a, b, sizeof(t));
- memcpy(b, &t, sizeof(t));
-}
+ int count[ 256 ] = { 0 };
+ int index[ 256 ];
+ int i;
+ unsigned char *sortKey = NULL;
+ unsigned char *end = NULL;
-/* this parameter defines the cutoff between using quick sort and
- insertion sort for arrays; arrays with lengths shorter or equal to the
- below value use insertion sort */
+ sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
+ end = sortKey + ( size * sizeof( drawSurf_t ) );
+ for( ; sortKey < end; sortKey += sizeof( drawSurf_t ) )
+ ++count[ *sortKey ];
-#define CUTOFF 8 /* testing shows that this is good value */
+ index[ 0 ] = 0;
-static void shortsort( drawSurf_t *lo, drawSurf_t *hi ) {
- drawSurf_t *p, *max;
+ for( i = 1; i < 256; ++i )
+ index[ i ] = index[ i - 1 ] + count[ i - 1 ];
- while (hi > lo) {
- max = lo;
- for (p = lo + 1; p <= hi; p++ ) {
- if ( p->sort > max->sort ) {
- max = p;
- }
- }
- SWAP_DRAW_SURF(max, hi);
- hi--;
- }
+ sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
+ for( i = 0; i < size; ++i, sortKey += sizeof( drawSurf_t ) )
+ dest[ index[ *sortKey ]++ ] = source[ i ];
}
+/*
+===============
+R_RadixSort
-/* sort the array between lo and hi (inclusive)
-FIXME: this was lifted and modified from the microsoft lib source...
- */
-
-void qsortFast (
- void *base,
- unsigned num,
- unsigned width
- )
+Radix sort with 4 byte size buckets
+===============
+*/
+static void R_RadixSort( drawSurf_t *source, int size )
{
- char *lo, *hi; /* ends of sub-array currently sorting */
- char *mid; /* points to middle of subarray */
- char *loguy, *higuy; /* traveling pointers for partition step */
- unsigned size; /* size of the sub-array */
- char *lostk[30], *histk[30];
- int stkptr; /* stack for saving sub-array to be processed */
+ static drawSurf_t scratch[ MAX_DRAWSURFS ];
-#if 0
- if ( sizeof(drawSurf_t) != 8 ) {
- ri.Error( ERR_DROP, "change SWAP_DRAW_SURF macro" );
- }
-#endif
-
- /* Note: the number of stack entries required is no more than
- 1 + log2(size), so 30 is sufficient for any array */
-
- if (num < 2 || width == 0)
- return; /* nothing to do */
-
- stkptr = 0; /* initialize stack */
-
- lo = base;
- hi = (char *)base + width * (num-1); /* initialize limits */
-
- /* this entry point is for pseudo-recursion calling: setting
- lo and hi and jumping to here is like recursion, but stkptr is
- prserved, locals aren't, so we preserve stuff on the stack */
-recurse:
-
- size = (hi - lo) / width + 1; /* number of el's to sort */
-
- /* below a certain size, it is faster to use a O(n^2) sorting method */
- if (size <= CUTOFF) {
- shortsort((drawSurf_t *)lo, (drawSurf_t *)hi);
- }
- else {
- /* First we pick a partititioning element. The efficiency of the
- algorithm demands that we find one that is approximately the
- median of the values, but also that we select one fast. Using
- the first one produces bad performace if the array is already
- sorted, so we use the middle one, which would require a very
- wierdly arranged array for worst case performance. Testing shows
- that a median-of-three algorithm does not, in general, increase
- performance. */
-
- mid = lo + (size / 2) * width; /* find middle element */
- SWAP_DRAW_SURF((drawSurf_t *)mid, (drawSurf_t *)lo); /* swap it to beginning of array */
-
-
- /* We now wish to partition the array into three pieces, one
- consisiting of elements <= partition element, one of elements
- equal to the parition element, and one of element >= to it. This
- is done below; comments indicate conditions established at every
- step. */
-
- loguy = lo;
- higuy = hi + width;
-
- /* Note that higuy decreases and loguy increases on every iteration,
- so loop must terminate. */
- for (;;) {
- /* lo <= loguy < hi, lo < higuy <= hi + 1,
- A[i] <= A[lo] for lo <= i <= loguy,
- A[i] >= A[lo] for higuy <= i <= hi */
-
- do {
- loguy += width;
- } while (loguy <= hi &&
- ( ((drawSurf_t *)loguy)->sort <= ((drawSurf_t *)lo)->sort ) );
-
- /* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
- either loguy > hi or A[loguy] > A[lo] */
-
- do {
- higuy -= width;
- } while (higuy > lo &&
- ( ((drawSurf_t *)higuy)->sort >= ((drawSurf_t *)lo)->sort ) );
-
- /* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
- either higuy <= lo or A[higuy] < A[lo] */
-
- if (higuy < loguy)
- break;
-
- /* if loguy > hi or higuy <= lo, then we would have exited, so
- A[loguy] > A[lo], A[higuy] < A[lo],
- loguy < hi, highy > lo */
-
- SWAP_DRAW_SURF((drawSurf_t *)loguy, (drawSurf_t *)higuy);
-
- /* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
- of loop is re-established */
- }
-
- /* A[i] >= A[lo] for higuy < i <= hi,
- A[i] <= A[lo] for lo <= i < loguy,
- higuy < loguy, lo <= higuy <= hi
- implying:
- A[i] >= A[lo] for loguy <= i <= hi,
- A[i] <= A[lo] for lo <= i <= higuy,
- A[i] = A[lo] for higuy < i < loguy */
-
- SWAP_DRAW_SURF((drawSurf_t *)lo, (drawSurf_t *)higuy); /* put partition element in place */
-
- /* OK, now we have the following:
- A[i] >= A[higuy] for loguy <= i <= hi,
- A[i] <= A[higuy] for lo <= i < higuy
- A[i] = A[lo] for higuy <= i < loguy */
-
- /* We've finished the partition, now we want to sort the subarrays
- [lo, higuy-1] and [loguy, hi].
- We do the smaller one first to minimize stack usage.
- We only sort arrays of length 2 or more.*/
-
- if ( higuy - 1 - lo >= hi - loguy ) {
- if (lo + width < higuy) {
- lostk[stkptr] = lo;
- histk[stkptr] = higuy - width;
- ++stkptr;
- } /* save big recursion for later */
-
- if (loguy < hi) {
- lo = loguy;
- goto recurse; /* do small recursion */
- }
- }
- else {
- if (loguy < hi) {
- lostk[stkptr] = loguy;
- histk[stkptr] = hi;
- ++stkptr; /* save big recursion for later */
- }
-
- if (lo + width < higuy) {
- hi = higuy - width;
- goto recurse; /* do small recursion */
- }
- }
- }
-
- /* We have sorted the array, except for any pending sorts on the stack.
- Check if there are any, and do them. */
-
- --stkptr;
- if (stkptr >= 0) {
- lo = lostk[stkptr];
- hi = histk[stkptr];
- goto recurse; /* pop subarray from stack */
- }
- else
- return; /* all subarrays done */
+ R_Radix( 0, size, source, scratch );
+ R_Radix( 1, size, scratch, source );
+ R_Radix( 2, size, source, scratch );
+ R_Radix( 3, size, scratch, source );
}
-
//==========================================================================================
/*
@@ -1261,7 +1106,7 @@
}
// sort the drawsurfs by sort type, then orientation, then shader
- qsortFast (drawSurfs, numDrawSurfs, sizeof(drawSurf_t) );
+ R_RadixSort( drawSurfs, numDrawSurfs );
// check for any pass through drawing, which
// may cause another view to be rendered first
More information about the quake3-commits
mailing list