[quake3-commits] r2052 - in trunk/code: asm qcommon

DONOTREPLY at icculus.org DONOTREPLY at icculus.org
Wed Jun 22 10:36:11 EDT 2011


Author: thilo
Date: 2011-06-22 10:36:11 -0400 (Wed, 22 Jun 2011)
New Revision: 2052

Modified:
   trunk/code/asm/ftola.c
   trunk/code/asm/snapvector.asm
   trunk/code/asm/snapvector.c
   trunk/code/qcommon/common.c
   trunk/code/qcommon/q_shared.h
Log:
- align sse control word storage space to 16 byte boundary for snapvector
- replace some whitespace with tabs in snapvector.c
- Give gcc a bit more freedom in choice of registers


Modified: trunk/code/asm/ftola.c
===================================================================
--- trunk/code/asm/ftola.c	2011-06-21 19:33:02 UTC (rev 2051)
+++ trunk/code/asm/ftola.c	2011-06-22 14:36:11 UTC (rev 2052)
@@ -28,7 +28,7 @@
 
 long qftolsse(float f)
 {
-  register long retval;
+  long retval;
   
   __asm__ volatile
   (
@@ -40,21 +40,25 @@
   return retval;
 }
 
-void qvmftolsse(void)
+int qvmftolsse(void)
 {
+  int retval;
+  
   __asm__ volatile
   (
     "movss (" EDI ", " EBX ", 4), %%xmm0\n"
-    "cvttss2si %%xmm0, " EAX "\n"
+    "cvttss2si %%xmm0, %0\n"
+    : "=r" (retval)
     :
-    :
     : "%xmm0"
   );
+  
+  return retval;
 }
 
 long qftolx87(float f)
 {
-  register long retval;
+  long retval;
 
   __asm__ volatile
   (
@@ -68,13 +72,17 @@
   return retval;
 }
 
-void qvmftolx87(void)
+int qvmftolx87(void)
 {
+  int retval;
+
   __asm__ volatile
   (
     "flds (" EDI ", " EBX ", 4)\n"
     "fistpl (" EDI ", " EBX ", 4)\n"
-    "mov (" EDI ", " EBX ", 4), " EAX "\n"
-    :
+    "mov (" EDI ", " EBX ", 4), %0\n"
+    : "=r" (retval)
   );
+  
+  return retval;
 }

Modified: trunk/code/asm/snapvector.asm
===================================================================
--- trunk/code/asm/snapvector.asm	2011-06-21 19:33:02 UTC (rev 2051)
+++ trunk/code/asm/snapvector.asm	2011-06-22 14:36:11 UTC (rev 2052)
@@ -44,7 +44,7 @@
 ; qsnapvector using SSE
 
   qsnapvectorsse PROC
-    sub rsp, 4
+    sub rsp, 8
 	stmxcsr [rsp]				; save SSE control word
 	ldmxcsr ssecw				; set to round nearest
 
@@ -58,19 +58,19 @@
 	pop rdi
 
 	ldmxcsr [rsp]				; restore sse control word to old value
-	add rsp, 4
+	add rsp, 8
 	ret
   qsnapvectorsse ENDP
 
 ELSE
 
   qsnapvectorsse PROC
-	sub esp, 4
+	sub esp, 8
 	stmxcsr [esp]				; save SSE control word
 	ldmxcsr ssecw				; set to round nearest
 
     push edi
-	mov edi, dword ptr 12[esp]	; maskmovdqu uses edi as implicit memory operand
+	mov edi, dword ptr 16[esp]	; maskmovdqu uses edi as implicit memory operand
 	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu
     movups xmm0, [edi]			; here is stored our vector. Read 4 values in one go
 	cvtps2dq xmm0, xmm0			; convert 4 single fp to int
@@ -79,7 +79,7 @@
 	pop edi
 
 	ldmxcsr [esp]				; restore sse control word to old value
-	add esp, 4
+	add esp, 8
 	ret
   qsnapvectorsse ENDP
 

Modified: trunk/code/asm/snapvector.c
===================================================================
--- trunk/code/asm/snapvector.c	2011-06-21 19:33:02 UTC (rev 2051)
+++ trunk/code/asm/snapvector.c	2011-06-22 14:36:11 UTC (rev 2052)
@@ -25,6 +25,7 @@
 
 /*
  * GNU inline asm version of qsnapvector
+ * See MASM snapvector.asm for commentary
  */
 
 static unsigned char ssemask[16] __attribute__((aligned(16))) =
@@ -32,29 +33,33 @@
 	"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00"
 };
 
-static unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80;
-static unsigned short fpucw = 0x037F;
+static const unsigned int ssecw __attribute__((aligned(16))) = 0x00001F80;
+static const unsigned short fpucw = 0x037F;
 
 void qsnapvectorsse(vec3_t vec)
 {
+	uint32_t oldcw __attribute__((aligned(16)));
+	
 	__asm__ volatile
 	(
-		"sub $4, " ESP "\n"
-		"stmxcsr (" ESP ")\n"
+		"stmxcsr %3\n"
 		"ldmxcsr %1\n"
 
 		"movaps (%0), %%xmm1\n"
-		"movups (" EDI "), %%xmm0\n"
+		"movups (%2), %%xmm0\n"
 		"cvtps2dq %%xmm0, %%xmm0\n"
 		"cvtdq2ps %%xmm0, %%xmm0\n"
+		// vec MUST reside in register rdi as maskmovdqu uses
+		// it as an implicit operand. The "D" constraint makes
+		// sure of that.
 		"maskmovdqu %%xmm1, %%xmm0\n"
 		
-		"ldmxcsr (" ESP ")\n"
-		"add $4, " ESP "\n"
+		"ldmxcsr %3\n"
 		:
-		: "r" (ssemask), "m" (ssecw), "D" (vec)
+		: "r" (ssemask), "m" (ssecw), "D" (vec), "m" (oldcw)
 		: "memory", "%xmm0", "%xmm1"
 	);
+	
 }
 
 #define QROUNDX87(src) \
@@ -67,16 +72,16 @@
 {
 	__asm__ volatile
 	(
-        	"sub $2, " ESP "\n"
-        	"fnstcw (" ESP ")\n"
-        	"fldcw %0\n"
-        	QROUNDX87("(%1)")
-        	QROUNDX87("4(%1)")
-        	QROUNDX87("8(%1)")
-        	"fldcw (" ESP ")\n"
-        	"add $2, " ESP "\n"
-        	:
-        	: "m" (fpucw), "r" (vec)
-        	: "memory"
+		"sub $2, " ESP "\n"
+		"fnstcw (" ESP ")\n"
+		"fldcw %0\n"
+		QROUNDX87("(%1)")
+		QROUNDX87("4(%1)")
+		QROUNDX87("8(%1)")
+		"fldcw (" ESP ")\n"
+		"add $2, " ESP "\n"
+		:
+		: "m" (fpucw), "r" (vec)
+		: "memory"
 	);
 }

Modified: trunk/code/qcommon/common.c
===================================================================
--- trunk/code/qcommon/common.c	2011-06-21 19:33:02 UTC (rev 2051)
+++ trunk/code/qcommon/common.c	2011-06-22 14:36:11 UTC (rev 2052)
@@ -91,10 +91,10 @@
 cvar_t	*com_busyWait;
 
 #if idx64
-  void (*Q_VMftol)(void);
+	int (*Q_VMftol)(void);
 #elif id386
 	long (QDECL *Q_ftol)(float f);
-	void (QDECL *Q_VMftol)(void);
+	int (QDECL *Q_VMftol)(void);
 	void (QDECL *Q_SnapVector)(vec3_t vec);
 #endif
 

Modified: trunk/code/qcommon/q_shared.h
===================================================================
--- trunk/code/qcommon/q_shared.h	2011-06-21 19:33:02 UTC (rev 2051)
+++ trunk/code/qcommon/q_shared.h	2011-06-22 14:36:11 UTC (rev 2052)
@@ -423,23 +423,23 @@
 
 #if idx64
   extern long qftolsse(float f);
-  extern void qvmftolsse(void);
+  extern int qvmftolsse(void);
   extern void qsnapvectorsse(vec3_t vec);
 
   #define Q_ftol qftolsse
   #define Q_SnapVector qsnapvectorsse
 
-  extern void (*Q_VMftol)(void);
+  extern int (*Q_VMftol)(void);
 #elif id386
   extern long QDECL qftolx87(float f);
   extern long QDECL qftolsse(float f);
-  extern void QDECL qvmftolx87(void);
-  extern void QDECL qvmftolsse(void);
+  extern int QDECL qvmftolx87(void);
+  extern int QDECL qvmftolsse(void);
   extern void QDECL qsnapvectorx87(vec3_t vec);
   extern void QDECL qsnapvectorsse(vec3_t vec);
 
   extern long (QDECL *Q_ftol)(float f);
-  extern void (QDECL *Q_VMftol)(void);
+  extern int (QDECL *Q_VMftol)(void);
   extern void (QDECL *Q_SnapVector)(vec3_t vec);
 #else
   #define Q_ftol(f) lrintf((f))



More information about the quake3-commits mailing list