[quake3-commits] r2020 - in trunk: . code/asm code/client code/qcommon code/renderer code/server code/sys

DONOTREPLY at icculus.org DONOTREPLY at icculus.org
Mon Jun 13 05:56:40 EDT 2011


Author: thilo
Date: 2011-06-13 05:56:39 -0400 (Mon, 13 Jun 2011)
New Revision: 2020

Added:
   trunk/code/asm/ftola.asm
   trunk/code/asm/snapvector.asm
   trunk/code/asm/vm_x86_64.asm
Removed:
   trunk/code/asm/ftola.s
   trunk/code/asm/snapvectora.s
Modified:
   trunk/Makefile
   trunk/code/client/cl_cgame.c
   trunk/code/client/snd_wavelet.c
   trunk/code/qcommon/common.c
   trunk/code/qcommon/q_platform.h
   trunk/code/qcommon/q_shared.h
   trunk/code/qcommon/vm_x86.c
   trunk/code/renderer/tr_light.c
   trunk/code/renderer/tr_local.h
   trunk/code/renderer/tr_mesh.c
   trunk/code/renderer/tr_shade.c
   trunk/code/renderer/tr_shade_calc.c
   trunk/code/renderer/tr_sky.c
   trunk/code/server/sv_game.c
   trunk/code/sys/sys_main.c
   trunk/code/sys/sys_unix.c
   trunk/code/sys/sys_win32.c
Log:
- Add MASM assembler files for MSVC x64 projects to support vm_x86 in x64 mode
- Clean up ftol()/snapvector() mess
- Make use of SSE instructions for ftol()/snapvector() if available
- move ftol/snapvector pure assembler to inline assembler, this will add x86_64 and improve support for different calling conventions
- Set FPU control word at program startup to get consistent behaviour on all platforms


Modified: trunk/Makefile
===================================================================
--- trunk/Makefile	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/Makefile	2011-06-13 09:56:39 UTC (rev 2020)
@@ -1504,14 +1504,31 @@
   Q3OBJ += \
     $(B)/client/snd_mixa.o \
     $(B)/client/matha.o \
-    $(B)/client/snapvectora.o
+    $(B)/client/snapvector.o \
+    $(B)/client/ftola.o
 endif
 ifeq ($(ARCH),x86)
   Q3OBJ += \
     $(B)/client/snd_mixa.o \
     $(B)/client/matha.o \
-    $(B)/client/snapvectora.o
+    $(B)/client/snapvector.o \
+    $(B)/client/ftola.o
 endif
+ifeq ($(ARCH),x86_64)
+  Q3OBJ += \
+    $(B)/client/snapvector.o \
+    $(B)/client/ftola.o
+endif
+ifeq ($(ARCH),amd64)
+  Q3OBJ += \
+    $(B)/client/snapvector.o \
+    $(B)/client/ftola.o
+endif
+ifeq ($(ARCH),x64)
+  Q3OBJ += \
+    $(B)/client/snapvector.o \
+    $(B)/client/ftola.o
+endif
 
 ifeq ($(USE_VOIP),1)
 ifeq ($(USE_INTERNAL_SPEEX),1)
@@ -1572,13 +1589,11 @@
 ifeq ($(HAVE_VM_COMPILED),true)
   ifeq ($(ARCH),i386)
     Q3OBJ += \
-      $(B)/client/vm_x86.o \
-      $(B)/client/ftola.o
+      $(B)/client/vm_x86.o
   endif
   ifeq ($(ARCH),x86)
     Q3OBJ += \
-      $(B)/client/vm_x86.o \
-      $(B)/client/ftola.o
+      $(B)/client/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
     ifeq ($(USE_OLD_VM64),1)
@@ -1587,8 +1602,7 @@
         $(B)/client/vm_x86_64_assembler.o
     else
       Q3OBJ += \
-        $(B)/client/vm_x86.o \
-        $(B)/client/ftola.o
+        $(B)/client/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),amd64)
@@ -1598,8 +1612,7 @@
         $(B)/client/vm_x86_64_assembler.o
     else
       Q3OBJ += \
-        $(B)/client/vm_x86.o \
-        $(B)/client/ftola.o
+        $(B)/client/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),x64)
@@ -1609,8 +1622,7 @@
         $(B)/client/vm_x86_64_assembler.o
     else
       Q3OBJ += \
-        $(B)/client/vm_x86.o \
-        $(B)/client/ftola.o
+        $(B)/client/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),ppc)
@@ -1747,14 +1759,31 @@
 
 ifeq ($(ARCH),i386)
   Q3DOBJ += \
-      $(B)/ded/snapvectora.o \
-      $(B)/ded/matha.o
+      $(B)/ded/matha.o \
+      $(B)/ded/snapvector.o \
+      $(B)/ded/ftola.o
 endif
 ifeq ($(ARCH),x86)
   Q3DOBJ += \
-      $(B)/ded/snapvectora.o \
-      $(B)/ded/matha.o
+      $(B)/ded/matha.o \
+      $(B)/ded/snapvector.o \
+      $(B)/ded/ftola.o 
 endif
+ifeq ($(ARCH),x86_64)
+  Q3DOBJ += \
+      $(B)/ded/snapvector.o \
+      $(B)/ded/ftola.o 
+endif
+ifeq ($(ARCH),amd64)
+  Q3DOBJ += \
+      $(B)/ded/snapvector.o \
+      $(B)/ded/ftola.o 
+endif
+ifeq ($(ARCH),x64)
+  Q3DOBJ += \
+      $(B)/ded/snapvector.o \
+      $(B)/ded/ftola.o 
+endif
 
 ifeq ($(USE_INTERNAL_ZLIB),1)
 Q3DOBJ += \
@@ -1769,13 +1798,11 @@
 ifeq ($(HAVE_VM_COMPILED),true)
   ifeq ($(ARCH),i386)
     Q3DOBJ += \
-      $(B)/ded/vm_x86.o \
-      $(B)/ded/ftola.o
+      $(B)/ded/vm_x86.o
   endif
   ifeq ($(ARCH),x86)
     Q3DOBJ += \
-      $(B)/ded/vm_x86.o \
-      $(B)/ded/ftola.o
+      $(B)/ded/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
     ifeq ($(USE_OLD_VM64),1)
@@ -1784,8 +1811,7 @@
         $(B)/ded/vm_x86_64_assembler.o
     else
       Q3DOBJ += \
-        $(B)/ded/vm_x86.o \
-        $(B)/ded/ftola.o
+        $(B)/ded/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),amd64)
@@ -1795,8 +1821,7 @@
         $(B)/ded/vm_x86_64_assembler.o
     else
       Q3DOBJ += \
-        $(B)/ded/vm_x86.o \
-        $(B)/ded/ftola.o
+        $(B)/ded/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),x64)
@@ -1806,8 +1831,7 @@
         $(B)/ded/vm_x86_64_assembler.o
     else
       Q3DOBJ += \
-        $(B)/ded/vm_x86.o \
-        $(B)/ded/ftola.o
+        $(B)/ded/vm_x86.o
     endif
   endif
   ifeq ($(ARCH),ppc)
@@ -2133,6 +2157,10 @@
 $(B)/client/%.o: $(ASMDIR)/%.s
 	$(DO_AS)
 
+# k8 so inline assembler knows about SSE
+$(B)/client/%.o: $(ASMDIR)/%.c
+	$(DO_CC) -march=k8
+
 $(B)/client/%.o: $(CDIR)/%.c
 	$(DO_CC)
 
@@ -2176,6 +2204,10 @@
 $(B)/ded/%.o: $(ASMDIR)/%.s
 	$(DO_AS)
 
+# k8 so inline assembler knows about SSE
+$(B)/ded/%.o: $(ASMDIR)/%.c
+	$(DO_CC) -march=k8
+
 $(B)/ded/%.o: $(SDIR)/%.c
 	$(DO_DED_CC)
 

Added: trunk/code/asm/ftola.asm
===================================================================
--- trunk/code/asm/ftola.asm	                        (rev 0)
+++ trunk/code/asm/ftola.asm	2011-06-13 09:56:39 UTC (rev 2020)
@@ -0,0 +1,90 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo at tjps.eu>
+; 
+; This file is part of Quake III Arena source code.
+; 
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+; 
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+; ===========================================================================
+
+; MASM ftol conversion functions using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+; .data
+
+; ifndef idx64
+;   fpucw WORD 0F7Fh
+; endif
+
+.code
+
+IFDEF idx64
+; qftol using SSE
+
+  qftolsse PROC
+    cvttss2si eax, xmm0
+	ret
+  qftolsse ENDP
+
+  qvmftolsse PROC
+    movss xmm0, dword ptr [rdi + rbx * 4]
+	cvttss2si eax, xmm0
+	ret
+  qvmftolsse ENDP
+
+ELSE
+; qftol using FPU
+
+  qftolx87m macro src
+;    not necessary, fpucw is set with _controlfp at startup
+;    sub esp, 2
+;    fnstcw word ptr [esp]
+;    fldcw fpucw
+    fld dword ptr src
+	fistp dword ptr src
+;	fldcw [esp]
+	mov eax, src
+;	add esp, 2
+	ret
+  endm
+  
+  qftolx87 PROC
+; need this line when storing FPU control word on stack
+;    qftolx87m [esp + 6]
+    qftolx87m [esp + 4]
+  qftolx87 ENDP
+
+  qvmftolx87 PROC
+    qftolx87m [edi + ebx * 4]
+  qvmftolx87 ENDP
+
+; qftol using SSE
+  qftolsse PROC
+    movss xmm0, dword ptr [esp + 4]
+    cvttss2si eax, xmm0
+	ret
+  qftolsse ENDP
+
+  qvmftolsse PROC
+    movss xmm0, dword ptr [edi + ebx * 4]
+	cvttss2si eax, xmm0
+	ret
+  qvmftolsse ENDP
+ENDIF
+
+end

Deleted: trunk/code/asm/ftola.s
===================================================================
--- trunk/code/asm/ftola.s	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/asm/ftola.s	2011-06-13 09:56:39 UTC (rev 2020)
@@ -1,157 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-===========================================================================
-*/
-
-//
-// qftol -- fast floating point to long conversion.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim at ngus.net>
-
-#include "qasm.h"
-
-.data
-
-temp:   .single   0.0
-fpucw:  .long     0
-
-// Precision Control Field , 2 bits / 0x0300
-// PC24 0x0000   Single precision (24 bits).
-// PC53 0x0200   Double precision (53 bits).
-// PC64 0x0300   Extended precision (64 bits).
-
-// Rounding Control Field, 2 bits / 0x0C00
-// RCN  0x0000   Rounding to nearest (even).
-// RCD  0x0400   Rounding down (directed, minus).
-// RCU  0x0800   Rounding up (directed plus).
-// RC0  0x0C00   Rounding towards zero (chop mode).
-
-
-// rounding towards nearest (even)
-cw027F: .long     0x027F
-cw037F: .long     0x037F
-
-// rounding towards zero (chop mode)
-cw0E7F: .long     0x0E7F
-cw0F7F: .long     0x0F7F
-
-
-.text
-
-//
-// int qftol( void ) - default control word
-//
-
-.globl C(qftol)
-
-C(qftol):
-        fistpl temp
-        movl temp,%eax
-        ret
-
-
-//
-// int qftol027F( void ) - DirectX FPU
-//
-
-.globl C(qftol027F)
-
-C(qftol027F):
-        fnstcw fpucw
-        fldcw  cw027F
-        fistpl temp
-        fldcw  fpucw
-        movl temp,%eax
-        ret
-
-//
-// int qftol037F( void ) - Linux FPU
-//
-
-.globl C(qftol037F)
-
-C(qftol037F):
-        fnstcw fpucw
-        fldcw  cw037F
-        fistpl temp
-        fldcw  fpucw
-        movl temp,%eax
-        ret
-
-
-//
-// int qftol0F7F( void ) - ANSI
-//
-
-.globl C(qftol0F7F)
-
-C(qftol0F7F):
-        fnstcw fpucw
-        fldcw  cw0F7F
-        fistpl temp
-        fldcw  fpucw
-        movl temp,%eax
-        ret
-
-//
-// int qftol0E7F( void )
-//
-
-.globl C(qftol0E7F)
-
-C(qftol0E7F):
-        fnstcw fpucw
-        fldcw  cw0E7F
-        fistpl temp
-        fldcw  fpucw
-        movl temp,%eax
-        ret
-
-
-
-//
-// long Q_ftol( float q )
-//
-
-.globl C(Q_ftol)
-
-C(Q_ftol):
-        flds 4(%esp)
-        fistpl temp
-        movl temp,%eax
-        ret
-
-
-//
-// long qftol0F7F( float q ) - Linux FPU
-//
-
-.globl C(Q_ftol0F7F)
-
-C(Q_ftol0F7F):
-        fnstcw fpucw
-        flds 4(%esp)
-        fldcw  cw0F7F
-        fistpl temp
-        fldcw  fpucw
-        movl temp,%eax
-        ret

Added: trunk/code/asm/snapvector.asm
===================================================================
--- trunk/code/asm/snapvector.asm	                        (rev 0)
+++ trunk/code/asm/snapvector.asm	2011-06-13 09:56:39 UTC (rev 2020)
@@ -0,0 +1,107 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo at tjps.eu>
+; 
+; This file is part of Quake III Arena source code.
+; 
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+; 
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+; ===========================================================================
+
+; MASM version of snapvector conversion function using SSE or FPU
+; assume __cdecl calling convention is being used for x86, __fastcall for x64
+;
+; function prototype:
+; void qsnapvector(vec3_t vec)
+
+IFNDEF idx64
+.model flat, c
+ENDIF
+
+.data
+
+  ALIGN 16
+  ssemask DWORD 0FFFFFFFFh, 0FFFFFFFFh, 0FFFFFFFFh, 00000000h
+  ssecw DWORD 00001F80h
+
+IFNDEF idx64
+  fpucw WORD 037Fh
+ENDIF
+
+.code
+
+IFDEF idx64
+; qsnapvector using SSE
+
+  qsnapvectorsse PROC
+    sub rsp, 4
+	stmxcsr [rsp]				; save SSE control word
+	ldmxcsr ssecw				; set to round nearest
+
+    push rdi
+	mov rdi, rcx				; maskmovdqu uses rdi as implicit memory operand
+	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu
+    movups xmm0, [rdi]			; here is stored our vector. Read 4 values in one go
+	cvtps2dq xmm0, xmm0			; convert 4 single fp to int
+	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp
+	maskmovdqu xmm0, xmm1		; write 3 values back to memory
+	pop rdi
+
+	ldmxcsr [rsp]				; restore sse control word to old value
+	add rsp, 4
+	ret
+  qsnapvectorsse ENDP
+
+ELSE
+
+  qsnapvectorsse PROC
+	sub esp, 4
+	stmxcsr [esp]				; save SSE control word
+	ldmxcsr ssecw				; set to round nearest
+
+    push edi
+	mov edi, dword ptr 12[esp]	; maskmovdqu uses edi as implicit memory operand
+	movaps xmm1, ssemask		; initialize the mask register for maskmovdqu
+    movups xmm0, [edi]			; here is stored our vector. Read 4 values in one go
+	cvtps2dq xmm0, xmm0			; convert 4 single fp to int
+	cvtdq2ps xmm0, xmm0			; convert 4 int to single fp
+	maskmovdqu xmm0, xmm1		; write 3 values back to memory
+	pop edi
+
+	ldmxcsr [esp]				; restore sse control word to old value
+	add esp, 4
+	ret
+  qsnapvectorsse ENDP
+
+  qroundx87 macro src
+	fld dword ptr src
+	fistp dword ptr src
+	fild dword ptr src
+	fstp dword ptr src
+  endm    
+
+  qsnapvectorx87 PROC
+	mov eax, dword ptr 4[esp]
+	sub esp, 2
+	fnstcw word ptr [esp]
+	fldcw fpucw
+	qroundx87 [eax]
+	qroundx87 4[eax]
+	qroundx87 8[eax]
+	fldcw [esp]
+	add esp, 2
+  qsnapvectorx87 ENDP
+
+ENDIF
+
+end

Deleted: trunk/code/asm/snapvectora.s
===================================================================
--- trunk/code/asm/snapvectora.s	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/asm/snapvectora.s	2011-06-13 09:56:39 UTC (rev 2020)
@@ -1,103 +0,0 @@
-/*
-===========================================================================
-Copyright (C) 1999-2005 Id Software, Inc.
-
-This file is part of Quake III Arena source code.
-
-Quake III Arena source code is free software; you can redistribute it
-and/or modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the License,
-or (at your option) any later version.
-
-Quake III Arena source code is distributed in the hope that it will be
-useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with Quake III Arena source code; if not, write to the Free Software
-Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-===========================================================================
-*/
-
-//
-// Sys_SnapVector NASM code (Andrew Henderson)
-// See win32/win_shared.c for the Win32 equivalent
-// This code is provided to ensure that the
-//  rounding behavior (and, if necessary, the
-//  precision) of DLL and QVM code are identical
-//  e.g. for network-visible operations.
-// See ftol.nasm for operations on a single float,
-//  as used in compiled VM and DLL code that does
-//  not use this system trap.
-//
-
-// 23/09/05 Ported to gas by intel2gas, best supporting actor Tim Angus
-// <tim at ngus.net>
-
-#include "qasm.h"
-
-#if id386
-.data
-
-fpucw:  .long   0
-cw037F: .long   0x037F
-
-.text
-
-// void Sys_SnapVector( float *v )
-.globl C(Sys_SnapVector)
-C(Sys_SnapVector):
-        pushl   %eax
-        pushl   %ebp
-        movl    %esp,%ebp
-
-        fnstcw  fpucw
-        movl    12(%ebp),%eax
-        fldcw   cw037F
-        flds    (%eax)
-        fistpl  (%eax)
-        fildl   (%eax)
-        fstps   (%eax)
-        flds    4(%eax)
-        fistpl  4(%eax)
-        fildl   4(%eax)
-        fstps   4(%eax)
-        flds    8(%eax)
-        fistpl  8(%eax)
-        fildl   8(%eax)
-        fstps   8(%eax)
-        fldcw   fpucw
-
-        popl %ebp
-        popl %eax
-        ret
-
-// void Sys_SnapVectorCW( float *v, unsigned short int cw )
-.globl C(Sys_SnapVectorCW)
-C(Sys_SnapVectorCW):
-        pushl   %eax
-        pushl   %ebp
-        movl    %esp,%ebp
-
-        fnstcw  fpucw
-        movl    12(%ebp),%eax
-        fldcw   16(%ebp)
-        flds    (%eax)
-        fistpl  (%eax)
-        fildl   (%eax)
-        fstps   (%eax)
-        flds    4(%eax)
-        fistpl  4(%eax)
-        fildl   4(%eax)
-        fstps   4(%eax)
-        flds    8(%eax)
-        fistpl  8(%eax)
-        fildl   8(%eax)
-        fstps   8(%eax)
-        fldcw   fpucw
-
-        popl %ebp
-        popl %eax
-        ret
-#endif

Added: trunk/code/asm/vm_x86_64.asm
===================================================================
--- trunk/code/asm/vm_x86_64.asm	                        (rev 0)
+++ trunk/code/asm/vm_x86_64.asm	2011-06-13 09:56:39 UTC (rev 2020)
@@ -0,0 +1,76 @@
+; ===========================================================================
+; Copyright (C) 2011 Thilo Schulz <thilo at tjps.eu>
+; 
+; This file is part of Quake III Arena source code.
+; 
+; Quake III Arena source code is free software; you can redistribute it
+; and/or modify it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 2 of the License,
+; or (at your option) any later version.
+; 
+; Quake III Arena source code is distributed in the hope that it will be
+; useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+; 
+; You should have received a copy of the GNU General Public License
+; along with Quake III Arena source code; if not, write to the Free Software
+; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+; ===========================================================================
+
+; Call wrapper for vm_x86 when built with MSVC in 64 bit mode,
+; since MSVC does not support inline x64 assembler code anymore.
+;
+; assumes __fastcall calling convention
+
+DoSyscall PROTO
+
+.code
+
+; Call to static void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
+
+qsyscall64 PROC
+  sub rsp, 28h						; after this esp will be aligned to 16 byte boundary
+  mov qword ptr [rsp + 20h], rcx	; 5th parameter "arg" is passed on stack
+  mov r9b, bl						; opStackOfs
+  mov r8, rdi						; opStackBase
+  mov edx, esi						; programStack
+  mov ecx, eax						; syscallNum
+  mov rax, DoSyscall				; store call address of DoSyscall in rax
+  call rax
+  add rsp, 28h
+  ret
+qsyscall64 ENDP
+
+
+; Call to compiled code after setting up the register environment for the VM
+; prototype:
+; uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+qvmcall64 PROC
+  push rsi							; push non-volatile registers to stack
+  push rdi
+  push rbx
+  ; need to save pointer in rcx so we can write back the programData value to caller
+  push rcx
+
+  ; registers r8 and r9 have correct value already thanx to __fastcall
+  xor rbx, rbx						; opStackOfs starts out being 0
+  mov rdi, rdx						; opStack
+  mov esi, dword ptr [rcx]			; programStack
+  
+  call qword ptr [r8]				; instructionPointers[0] is also the entry point
+
+  pop rcx
+
+  mov dword ptr [rcx], esi			; write back the programStack value
+  mov al, bl						; return opStack offset
+
+  pop rbx
+  pop rdi
+  pop rsi
+  
+  ret
+qvmcall64 ENDP
+
+end

Modified: trunk/code/client/cl_cgame.c
===================================================================
--- trunk/code/client/cl_cgame.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/client/cl_cgame.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -660,7 +660,7 @@
 	case CG_REAL_TIME:
 		return Com_RealTime( VMA(1) );
 	case CG_SNAPVECTOR:
-		Sys_SnapVector( VMA(1) );
+		Q_SnapVector(VMA(1));
 		return 0;
 
 	case CG_CIN_PLAYCINEMATIC:

Modified: trunk/code/client/snd_wavelet.c
===================================================================
--- trunk/code/client/snd_wavelet.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/client/snd_wavelet.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -22,8 +22,6 @@
 
 #include "snd_local.h"
 
-long myftol( float f );
-
 #define C0 0.4829629131445341
 #define C1 0.8365163037378079
 #define C2 0.2241438680420134

Modified: trunk/code/qcommon/common.c
===================================================================
--- trunk/code/qcommon/common.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/qcommon/common.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -90,6 +90,14 @@
 cvar_t  *com_homepath;
 cvar_t	*com_busyWait;
 
+#if defined(idx64) 
+  void (*Q_VMftol)(void);
+#elif defined(id386)
+	long (QDECL *Q_ftol)(float f);
+	void (QDECL *Q_VMftol)(void);
+	void (QDECL *Q_SnapVector)(vec3_t vec);
+#endif
+
 // com_speeds times
 int		time_game;
 int		time_frontend;		// renderer frontend time
@@ -2567,6 +2575,53 @@
 
 /*
 =================
+Com_DetectSSE
+Find out whether we have SSE support for Q_ftol function
+=================
+*/
+
+#if defined(id386) || defined(idx64)
+
+static void Com_DetectSSE(void)
+{
+#ifndef idx64
+	cpuFeatures_t feat;
+	
+	feat = Sys_GetProcessorFeatures();
+
+	if(feat & CF_SSE)
+	{
+		if(feat & CF_SSE2)
+			Q_SnapVector = qsnapvectorsse;
+		else
+			Q_SnapVector = qsnapvectorx87;
+
+		Q_ftol = qftolsse;
+#endif
+		Q_VMftol = qvmftolsse;
+
+		Com_Printf("Have SSE support\n");
+#ifndef idx64
+	}
+	else
+	{
+		Q_ftol = qftolx87;
+		Q_VMftol = qvmftolx87;
+		Q_SnapVector = qsnapvectorx87;
+
+		Com_Printf("No SSE support on this machine\n");
+	}
+#endif
+}
+
+#else
+
+#define Com_DetectSSE()
+
+#endif
+
+/*
+=================
 Com_InitRand
 Seed the random number generator, if possible with an OS supplied random seed.
 =================
@@ -2615,6 +2670,8 @@
 //	Swap_Init ();
 	Cbuf_Init ();
 
+	Com_DetectSSE();
+
 	// override anything from the config files with command line args
 	Com_StartupVariable( NULL );
 

Modified: trunk/code/qcommon/q_platform.h
===================================================================
--- trunk/code/qcommon/q_platform.h	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/qcommon/q_platform.h	2011-06-13 09:56:39 UTC (rev 2020)
@@ -87,7 +87,7 @@
 #define OS_STRING "win_mingw64"
 #endif
 
-#define ID_INLINE inline
+#define ID_INLINE __inline
 #define PATH_SEP '\\'
 
 #if defined( __WIN64__ ) 

Modified: trunk/code/qcommon/q_shared.h
===================================================================
--- trunk/code/qcommon/q_shared.h	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/qcommon/q_shared.h	2011-06-13 09:56:39 UTC (rev 2020)
@@ -419,6 +419,58 @@
 
 #define	IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
 
+int Q_isnan(float x);
+
+#ifdef idx64
+  extern long qftolsse(float f);
+  extern void qvmftolsse(void);
+  extern void qsnapvectorsse(vec3_t vec);
+
+  #define Q_ftol qftolsse
+  #define Q_SnapVector qsnapvectorsse
+
+  extern void (*Q_VMftol)(void);
+#elif defined(id386)
+  extern long QDECL qftolx87(float f);
+  extern long QDECL qftolsse(float f);
+  extern void QDECL qvmftolx87(void);
+  extern void QDECL qvmftolsse(void);
+  extern void QDECL qsnapvectorx87(vec3_t vec);
+  extern void QDECL qsnapvectorsse(vec3_t vec);
+
+  extern long (QDECL *Q_ftol)(float f);
+  extern void (QDECL *Q_VMftol)(void);
+  extern void (QDECL *Q_SnapVector)(vec3_t vec);
+#else
+  #define Q_ftol(f) lrintf((f))
+  #define Q_SnapVector(vec)\
+	do\
+	{\
+		vec3_t *temp = (vec);\
+		\
+		(*temp)[0] = round((*temp)[0]);\
+		(*temp)[1] = round((*temp)[1]);\
+		(*temp)[2] = round((*temp)[2]);\
+	} while(0)
+#endif
+/*
+// if your system does not have lrintf() and round() you can try this block. Please also open a bug report at bugzilla.icculus.org
+// or write a mail to the ioq3 mailing list.
+#else
+  #define Q_ftol(f) ((long) (f))
+  #define Q_round(f) do { if((f) < 0) (f) -= 0.5f; else (f) += 0.5f; (f) = Q_ftol((f)); } while(0)
+  #define Q_SnapVector(vec) \
+	do\
+	{\
+		vec3_t *temp = (vec);\
+		\
+		Q_round((*temp)[0]);\
+		Q_round((*temp)[1]);\
+		Q_round((*temp)[2]);\
+	} while(0)
+#endif
+*/
+
 #if idppc
 
 static ID_INLINE float Q_rsqrt( float number ) {
@@ -637,9 +689,7 @@
 void MatrixMultiply(float in1[3][3], float in2[3][3], float out[3][3]);
 void AngleVectors( const vec3_t angles, vec3_t forward, vec3_t right, vec3_t up);
 void PerpendicularVector( vec3_t dst, const vec3_t src );
-int Q_isnan( float x );
 
-
 #ifndef MAX
 #define MAX(x,y) ((x)>(y)?(x):(y))
 #endif

Modified: trunk/code/qcommon/vm_x86.c
===================================================================
--- trunk/code/qcommon/vm_x86.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/qcommon/vm_x86.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -67,29 +67,6 @@
 
 #define FTOL_PTR
 
-#ifdef _MSC_VER
-
-#if defined( FTOL_PTR )
-int _ftol( float );
-static	void	*ftolPtr = _ftol;
-#endif
-
-#else // _MSC_VER
-
-#if defined( FTOL_PTR )
-
-int qftol( void );
-int qftol027F( void );
-int qftol037F( void );
-int qftol0E7F( void );
-int qftol0F7F( void );
-
-
-static	void	*ftolPtr = qftol0F7F;
-#endif // FTOL_PTR
-
-#endif
-
 static	int	instruction, pass;
 static	int	lastConst = 0;
 static	int	oc0, oc1, pop0, pop1;
@@ -111,15 +88,17 @@
 
 static	ELastCommand	LastCommand;
 
-static inline int iss8(int32_t v)
+static int iss8(int32_t v)
 {
 	return (SCHAR_MIN <= v && v <= SCHAR_MAX);
 }
 
-static inline int isu8(uint32_t v)
+#if 0
+static int isu8(uint32_t v)
 {
 	return (v <= UCHAR_MAX);
 }
+#endif
 
 static int NextConstant4(void)
 {
@@ -436,30 +415,37 @@
 =================
 */
 
+#if defined(_MSC_VER) && defined(idx64)
+
+extern void qsyscall64(void);
+extern uint8_t qvmcall64(int *programStack, int *opStack, intptr_t *instructionPointers, byte *dataBase);
+
+// Microsoft does not support inline assembler on x64 platforms. Meh.
+void DoSyscall(int syscallNum, int programStack, int *opStackBase, uint8_t opStackOfs, intptr_t arg)
+{
+#else
 static void DoSyscall(void)
 {
-	vm_t *savedVM;
-
 	int syscallNum;
 	int programStack;
 	int *opStackBase;
-	int opStackOfs;
+	uint8_t opStackOfs;
 	intptr_t arg;
+#endif
 
+	vm_t *savedVM;
+
 #ifdef _MSC_VER
+  #ifndef idx64
 	__asm
 	{
 		mov	dword ptr syscallNum, eax
 		mov	dword ptr programStack, esi
-		mov	dword ptr opStackOfs, ebx
-#ifdef idx64
-		mov	qword ptr opStackBase, rdi
-		mov	qword ptr arg, rcx
-#else
+		mov	byte ptr opStackOfs, bl
 		mov	dword ptr opStackBase, edi
 		mov	dword ptr arg, ecx
-#endif
 	}
+  #endif
 #else
 	__asm__ volatile(
 		""
@@ -539,8 +525,13 @@
 int EmitCallDoSyscall(vm_t *vm)
 {
 	// use edx register to store DoSyscall address
+#if defined(_MSC_VER) && defined(idx64)
+	EmitRexString(0x48, "BA");		// mov edx, qsyscall64
+	EmitPtr(qsyscall64);
+#else
 	EmitRexString(0x48, "BA");		// mov edx, DoSyscall
 	EmitPtr(DoSyscall);
+#endif
 
 	// Push important registers to stack as we can't really make
 	// any assumptions about calling conventions.
@@ -1629,9 +1620,8 @@
 			EmitString("DB 1C 9F");				// fistp dword ptr [edi + ebx * 4]
 #else // FTOL_PTR
 			// call the library conversion function
-			EmitString("D9 04 9F");				// fld dword ptr [edi + ebx * 4]
-			EmitRexString(0x48, "BA");			// mov edx, ftolPtr
-			EmitPtr(ftolPtr);
+			EmitRexString(0x48, "BA");			// mov edx, Q_VMftol
+			EmitPtr(Q_VMftol);
 			EmitRexString(0x48, "FF D2");			// call edx
 			EmitCommand(LAST_COMMAND_MOV_STACK_EAX);	// mov dword ptr [edi + ebx * 4], eax
 #endif
@@ -1746,12 +1736,12 @@
 
 int VM_CallCompiled(vm_t *vm, int *args)
 {
-	int		stack[OPSTACK_SIZE + 7];
+	byte	stack[OPSTACK_SIZE * 4 + 15];
 	void	*entryPoint;
 	int		programCounter;
 	int		programStack, stackOnEntry;
 	byte	*image;
-	int	*opStack, *opStackOnEntry;
+	int	*opStack;
 	int		opStackOfs;
 
 	currentVM = vm;
@@ -1784,35 +1774,16 @@
 
 	// off we go into generated code...
 	entryPoint = vm->codeBase + vm->entryOfs;
-	opStack = opStackOnEntry = PADP(stack, 8);
+	opStack = PADP(stack, 16);
 	*opStack = 0xDEADBEEF;
 	opStackOfs = 0;
 
 #ifdef _MSC_VER
+  #ifdef idx64
+	opStackOfs = qvmcall64(&programStack, opStack, vm->instructionPointers, vm->dataBase);
+  #else
 	__asm
 	{
-#ifdef idx64
-		// non-volatile registers according to x64 calling convention
-		push	rsi
-		push	rdi
-		push	rbx
-		
-		mov	esi, dword ptr programStack
-		mov	rdi, qword ptr opStack
-		mov	ebx, dword ptr opStackOfs
-		mov	r8, qword ptr vm->instructionPointers
-		mov	r9, qword ptr vm->dataBase
-
-		call	entryPoint
-
-		mov	dword ptr opStackOfs, ebx
-		mov	qword ptr opStack, rdi
-		mov	dword ptr programStack, esi
-		
-		pop	rbx
-		pop	rdi
-		pop	rsi
-#else
 		pushad
 
 		mov	esi, dword ptr programStack
@@ -1826,8 +1797,8 @@
 		mov	dword ptr programStack, esi
 		
 		popad
-#endif		
 	}
+  #endif		
 #elif defined(idx64)
 	__asm__ volatile(
 		"movq %5, %%rax\r\n"
@@ -1855,7 +1826,7 @@
 	);
 #endif
 
-	if(opStack != opStackOnEntry || opStackOfs != 1 || *opStack != 0xDEADBEEF)
+	if(opStackOfs != 1 || *opStack != 0xDEADBEEF)
 	{
 		Com_Error(ERR_DROP, "opStack corrupted in compiled code");
 	}

Modified: trunk/code/renderer/tr_light.c
===================================================================
--- trunk/code/renderer/tr_light.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_light.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -359,9 +359,9 @@
 	}
 
 	// save out the byte packet version
-	((byte *)&ent->ambientLightInt)[0] = myftol( ent->ambientLight[0] );
-	((byte *)&ent->ambientLightInt)[1] = myftol( ent->ambientLight[1] );
-	((byte *)&ent->ambientLightInt)[2] = myftol( ent->ambientLight[2] );
+	((byte *)&ent->ambientLightInt)[0] = Q_ftol(ent->ambientLight[0]);
+	((byte *)&ent->ambientLightInt)[1] = Q_ftol(ent->ambientLight[1]);
+	((byte *)&ent->ambientLightInt)[2] = Q_ftol(ent->ambientLight[2]);
 	((byte *)&ent->ambientLightInt)[3] = 0xff;
 	
 	// transform the direction to local space

Modified: trunk/code/renderer/tr_local.h
===================================================================
--- trunk/code/renderer/tr_local.h	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_local.h	2011-06-13 09:56:39 UTC (rev 2020)
@@ -34,14 +34,6 @@
 #define GL_INDEX_TYPE		GL_UNSIGNED_INT
 typedef unsigned int glIndex_t;
 
-// fast float to int conversion
-#if id386 && !defined(__GNUC__)
-long myftol( float f );
-#else
-#define	myftol(x) ((int)(x))
-#endif
-
-
 // everything that is needed by the backend needs
 // to be double buffered to allow it to run in
 // parallel on a dual cpu machine

Modified: trunk/code/renderer/tr_mesh.c
===================================================================
--- trunk/code/renderer/tr_mesh.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_mesh.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -218,7 +218,7 @@
 		}
 
 		flod *= tr.currentModel->numLods;
-		lod = myftol( flod );
+		lod = Q_ftol(flod);
 
 		if ( lod < 0 )
 		{

Modified: trunk/code/renderer/tr_shade.c
===================================================================
--- trunk/code/renderer/tr_shade.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_shade.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -233,7 +233,7 @@
 
 	// it is necessary to do this messy calc to make sure animations line up
 	// exactly with waveforms of the same frequency
-	index = myftol( tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE );
+	index = Q_ftol(tess.shaderTime * bundle->imageAnimationSpeed * FUNCTABLE_SIZE);
 	index >>= FUNCTABLE_SIZE2;
 
 	if ( index < 0 ) {
@@ -689,9 +689,9 @@
 				}
 			}
 			clipBits[i] = clip;
-			colors[0] = myftol(floatColor[0] * modulate);
-			colors[1] = myftol(floatColor[1] * modulate);
-			colors[2] = myftol(floatColor[2] * modulate);
+			colors[0] = Q_ftol(floatColor[0] * modulate);
+			colors[1] = Q_ftol(floatColor[1] * modulate);
+			colors[2] = Q_ftol(floatColor[2] * modulate);
 			colors[3] = 255;
 		}
 

Modified: trunk/code/renderer/tr_shade_calc.c
===================================================================
--- trunk/code/renderer/tr_shade_calc.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_shade_calc.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -27,7 +27,7 @@
 #endif
 
 
-#define	WAVEVALUE( table, base, amplitude, phase, freq )  ((base) + table[ myftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))
+#define	WAVEVALUE( table, base, amplitude, phase, freq )  ((base) + table[ Q_ftol( ( ( (phase) + tess.shaderTime * (freq) ) * FUNCTABLE_SIZE ) ) & FUNCTABLE_MASK ] * (amplitude))
 
 static float *TableForFunc( genFunc_t func ) 
 {
@@ -699,7 +699,7 @@
 		glow = 1;
 	}
 
-	v = myftol( 255 * glow );
+	v = Q_ftol(255 * glow);
 	color[0] = color[1] = color[2] = v;
 	color[3] = 255;
 	v = *(int *)color;
@@ -1018,21 +1018,6 @@
 }
 
 
-
-
-
-
-#if id386 && !defined(__GNUC__)
-
-long myftol( float f ) {
-	static int tmp;
-	__asm fld f
-	__asm fistp tmp
-	__asm mov eax, tmp
-}
-
-#endif
-
 /*
 ** RB_CalcSpecularAlpha
 **
@@ -1195,19 +1180,19 @@
 			*(int *)&colors[i*4] = ambientLightInt;
 			continue;
 		} 
-		j = myftol( ambientLight[0] + incoming * directedLight[0] );
+		j = Q_ftol(ambientLight[0] + incoming * directedLight[0]);
 		if ( j > 255 ) {
 			j = 255;
 		}
 		colors[i*4+0] = j;
 
-		j = myftol( ambientLight[1] + incoming * directedLight[1] );
+		j = Q_ftol(ambientLight[1] + incoming * directedLight[1]);
 		if ( j > 255 ) {
 			j = 255;
 		}
 		colors[i*4+1] = j;
 
-		j = myftol( ambientLight[2] + incoming * directedLight[2] );
+		j = Q_ftol(ambientLight[2] + incoming * directedLight[2]);
 		if ( j > 255 ) {
 			j = 255;
 		}

Modified: trunk/code/renderer/tr_sky.c
===================================================================
--- trunk/code/renderer/tr_sky.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/renderer/tr_sky.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -553,10 +553,10 @@
 			continue;
 		}
 
-		sky_mins_subd[0] = myftol( sky_mins[0][i] * HALF_SKY_SUBDIVISIONS );
-		sky_mins_subd[1] = myftol( sky_mins[1][i] * HALF_SKY_SUBDIVISIONS );
-		sky_maxs_subd[0] = myftol( sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS );
-		sky_maxs_subd[1] = myftol( sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS );
+		sky_mins_subd[0] = Q_ftol(sky_mins[0][i] * HALF_SKY_SUBDIVISIONS);
+		sky_mins_subd[1] = Q_ftol(sky_mins[1][i] * HALF_SKY_SUBDIVISIONS);
+		sky_maxs_subd[0] = Q_ftol(sky_maxs[0][i] * HALF_SKY_SUBDIVISIONS);
+		sky_maxs_subd[1] = Q_ftol(sky_maxs[1][i] * HALF_SKY_SUBDIVISIONS);
 
 		if ( sky_mins_subd[0] < -HALF_SKY_SUBDIVISIONS ) 
 			sky_mins_subd[0] = -HALF_SKY_SUBDIVISIONS;

Modified: trunk/code/server/sv_game.c
===================================================================
--- trunk/code/server/sv_game.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/server/sv_game.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -426,7 +426,7 @@
 	case G_REAL_TIME:
 		return Com_RealTime( VMA(1) );
 	case G_SNAPVECTOR:
-		Sys_SnapVector( VMA(1) );
+		Q_SnapVector(VMA(1));
 		return 0;
 
 		//====================================

Modified: trunk/code/sys/sys_main.c
===================================================================
--- trunk/code/sys/sys_main.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/sys/sys_main.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -416,8 +416,8 @@
 #2 look in fs_basepath
 =================
 */
-void *Sys_LoadDll( const char *name,
-	intptr_t (**entryPoint)(int, ...),
+void * QDECL Sys_LoadDll( const char *name,
+	intptr_t (QDECL **entryPoint)(int, ...),
 	intptr_t (*systemcalls)(intptr_t, ...) )
 {
 	void  *libHandle;

Modified: trunk/code/sys/sys_unix.c
===================================================================
--- trunk/code/sys/sys_unix.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/sys/sys_unix.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -36,6 +36,7 @@
 #include <pwd.h>
 #include <libgen.h>
 #include <fcntl.h>
+#include <fenv.h>
 
 qboolean stdinIsATTY;
 
@@ -125,33 +126,8 @@
 	return curtime;
 }
 
-#if !id386
 /*
 ==================
-fastftol
-==================
-*/
-long fastftol( float f )
-{
-	return (long)f;
-}
-
-/*
-==================
-Sys_SnapVector
-==================
-*/
-void Sys_SnapVector( float *v )
-{
-	v[0] = rint(v[0]);
-	v[1] = rint(v[1]);
-	v[2] = rint(v[2]);
-}
-#endif
-
-
-/*
-==================
 Sys_RandomBytes
 ==================
 */
@@ -756,6 +732,12 @@
 	// NOP
 }
 
+void Sys_SetFloatEnv(void)
+{
+	// rounding towards 0
+	fesetround(FE_TOWARDZERO);
+}
+
 /*
 ==============
 Sys_PlatformInit

Modified: trunk/code/sys/sys_win32.c
===================================================================
--- trunk/code/sys/sys_win32.c	2011-06-12 15:28:44 UTC (rev 2019)
+++ trunk/code/sys/sys_win32.c	2011-06-13 09:56:39 UTC (rev 2020)
@@ -37,6 +37,7 @@
 #include <wincrypt.h>
 #include <shlobj.h>
 #include <psapi.h>
+#include <float.h>
 
 // Used to determine where to store user-specific files
 static char homePath[ MAX_OSPATH ] = { 0 };
@@ -45,14 +46,38 @@
 static UINT timerResolution = 0;
 #endif
 
-#ifdef __WIN64__
-void Sys_SnapVector( float *v )
+/*
+================
+Sys_SetFPUCW
+Set FPU control word to default value
+================
+*/
+
+#ifndef _RC_CHOP
+// mingw doesn't seem to have these defined :(
+
+  #define _MCW_EM	0x0008001fU
+  #define _MCW_RC	0x00000300U
+  #define _MCW_PC	0x00030000U
+  #define _RC_CHOP	0x00000300U
+  #define _PC_53	0x00010000U
+  
+  unsigned int _controlfp(unsigned int new, unsigned int mask);
+#endif
+
+#define FPUCWMASK1 (_MCW_RC | _MCW_EM)
+#define FPUCW (_RC_CHOP | _MCW_EM | _PC_53)
+
+#ifdef idx64
+#define FPUCWMASK	(FPUCWMASK1)
+#else
+#define FPUCWMASK	(FPUCWMASK1 | _MCW_PC)
+#endif
+
+void Sys_SetFloatEnv(void)
 {
-        v[0] = rint(v[0]);
-        v[1] = rint(v[1]);
-        v[2] = rint(v[2]);
+	_controlfp(FPUCW, FPUCWMASK);
 }
-#endif
 
 /*
 ================
@@ -140,36 +165,8 @@
 	return sys_curtime;
 }
 
-#ifndef __GNUC__ //see snapvectora.s
 /*
 ================
-Sys_SnapVector
-================
-*/
-void Sys_SnapVector( float *v )
-{
-	int i;
-	float f;
-
-	f = *v;
-	__asm	fld		f;
-	__asm	fistp	i;
-	*v = i;
-	v++;
-	f = *v;
-	__asm	fld		f;
-	__asm	fistp	i;
-	*v = i;
-	v++;
-	f = *v;
-	__asm	fld		f;
-	__asm	fistp	i;
-	*v = i;
-}
-#endif
-
-/*
-================
 Sys_RandomBytes
 ================
 */
@@ -719,9 +716,12 @@
 {
 #ifndef DEDICATED
 	TIMECAPS ptc;
-	
 	const char *SDL_VIDEODRIVER = getenv( "SDL_VIDEODRIVER" );
+#endif
 
+	Sys_SetFloatEnv();
+
+#ifndef DEDICATED
 	if( SDL_VIDEODRIVER )
 	{
 		Com_Printf( "SDL_VIDEODRIVER is externally set to \"%s\", "



More information about the quake3-commits mailing list