r1122 - in trunk: . code/qcommon

DONOTREPLY at icculus.org DONOTREPLY at icculus.org
Thu Aug 16 07:02:22 EDT 2007


Author: ludwig
Date: 2007-08-16 07:02:21 -0400 (Thu, 16 Aug 2007)
New Revision: 1122

Added:
   trunk/code/qcommon/vm_x86_64_assembler.c
Modified:
   trunk/Makefile
   trunk/code/qcommon/vm_x86_64.c
Log:
new x86_64 vm that doesn't use gas


Modified: trunk/Makefile
===================================================================
--- trunk/Makefile	2007-07-27 21:52:31 UTC (rev 1121)
+++ trunk/Makefile	2007-08-16 11:02:21 UTC (rev 1122)
@@ -1059,7 +1059,7 @@
     Q3OBJ += $(B)/client/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
-    Q3OBJ += $(B)/client/vm_x86_64.o
+    Q3OBJ += $(B)/client/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o
   endif
   ifeq ($(ARCH),ppc)
     Q3OBJ += $(B)/client/vm_ppc.o
@@ -1223,7 +1223,7 @@
     Q3DOBJ += $(B)/ded/vm_x86.o
   endif
   ifeq ($(ARCH),x86_64)
-    Q3DOBJ += $(B)/ded/vm_x86_64.o
+    Q3DOBJ += $(B)/ded/vm_x86_64.o $(B)/client/vm_x86_64_assembler.o
   endif
   ifeq ($(ARCH),ppc)
     Q3DOBJ += $(B)/ded/vm_ppc.o

Modified: trunk/code/qcommon/vm_x86_64.c
===================================================================
--- trunk/code/qcommon/vm_x86_64.c	2007-07-27 21:52:31 UTC (rev 1121)
+++ trunk/code/qcommon/vm_x86_64.c	2007-08-16 11:02:21 UTC (rev 1122)
@@ -28,10 +28,16 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/time.h>
+#include <time.h>
 #include <fcntl.h>
 #include <errno.h>
 #include <unistd.h>
+#include <stdarg.h>
 
+//#define USE_GAS
+//#define DEBUG_VM
+
 #ifdef DEBUG_VM
 #define Dfprintf(fd, args...) fprintf(fd, ##args)
 static FILE* qdasmout;
@@ -39,6 +45,19 @@
 #define Dfprintf(args...)
 #endif
 
+#define VM_X86_64_MMAP
+
+#ifndef USE_GAS
+void assembler_set_output(char* buf);
+size_t assembler_get_code_size(void);
+void assembler_init(int pass);
+void assemble_line(const char* input, size_t len);
+#ifdef Dfprintf
+#undef Dfprintf
+#define Dfprintf(args...)
+#endif
+#endif // USE_GAS
+
 static void VM_Destroy_Compiled(vm_t* self);
 
 /*
@@ -206,8 +225,29 @@
 	[OP_BLOCK_COPY] = 4,
 };
 
+#ifdef USE_GAS
 #define emit(x...) \
 	do { fprintf(fh_s, ##x); fputc('\n', fh_s); } while(0)
+#else
+void emit(const char* fmt, ...)
+{
+	va_list ap;
+	char line[4096];
+	va_start(ap, fmt);
+	vsnprintf(line, sizeof(line), fmt, ap);
+	va_end(ap);
+	assemble_line(line, strlen(line));
+}
+#endif // USE_GAS
+
+#ifdef USE_GAS
+#define JMPIARG \
+	emit("jmp i_%08x", iarg);
+#else
+#define JMPIARG \
+	emit("movq $%lu, %%rax", vm->codeBase+vm->instructionPointers[iarg]); \
+	emit("jmpq *%rax");
+#endif
  
 // integer compare and jump
 #define IJ(op) \
@@ -215,7 +255,8 @@
 	emit("movl 4(%%rsi), %%eax"); \
 	emit("cmpl 8(%%rsi), %%eax"); \
 	emit(op " i_%08x", instruction+1); \
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 
 #ifdef USE_X87
 #define FJ(bits, op) \
@@ -225,7 +266,8 @@
 	emit("fnstsw %%ax");\
 	emit("testb $" #bits ", %%ah");\
 	emit(op " i_%08x", instruction+1);\
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 #define XJ(x)
 #else
 #define FJ(x, y)
@@ -235,7 +277,8 @@
 	emit("ucomiss 8(%%rsi), %%xmm0");\
 	emit("jp i_%08x", instruction+1);\
 	emit(op " i_%08x", instruction+1);\
-	emit("jmp i_%08x", iarg);
+	JMPIARG \
+	neednilabel = 1;
 #endif
 
 #define SIMPLE(op) \
@@ -292,9 +335,14 @@
 
 static void* getentrypoint(vm_t* vm)
 {
+#ifdef USE_GAS
        return vm->codeBase+64; // skip ELF header
+#else
+       return vm->codeBase;
+#endif // USE_GAS
 }
 
+#ifdef USE_GAS
 char* mmapfile(const char* fn, size_t* size)
 {
 	int fd = -1;
@@ -382,6 +430,7 @@
 
 	return size;
 }
+#endif // USE_GAS
 
 static void block_copy_vm(unsigned dest, unsigned src, unsigned count)
 {
@@ -410,8 +459,13 @@
 	char* code;
 	unsigned iarg = 0;
 	unsigned char barg = 0;
+	int neednilabel = 0;
+	struct timeval tvstart =  {0, 0};
+
+#ifdef USE_GAS
+	byte* compiledcode;
+	int   compiledsize;
 	void* entryPoint;
-
 	char fn_s[2*MAX_QPATH]; // output file for assembler code
 	char fn_o[2*MAX_QPATH]; // file written by as
 #ifdef DEBUG_VM
@@ -419,16 +473,16 @@
 #endif
 	FILE* fh_s;
 	int fd_s, fd_o;
-	byte* compiledcode;
-	int   compiledsize;
 
+	gettimeofday(&tvstart, NULL);
+
 	Com_Printf("compiling %s\n", vm->name);
 
 #ifdef DEBUG_VM
 	snprintf(fn_s, sizeof(fn_s), "%.63s.s", vm->name);
 	snprintf(fn_o, sizeof(fn_o), "%.63s.o", vm->name);
-	fd_s = open(fn_s, O_CREAT|O_WRONLY, 0644);
-	fd_o = open(fn_o, O_CREAT|O_WRONLY, 0644);
+	fd_s = open(fn_s, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+	fd_o = open(fn_o, O_CREAT|O_WRONLY|O_TRUNC, 0644);
 #else
 	snprintf(fn_s, sizeof(fn_s), "/tmp/%.63s.s_XXXXXX", vm->name);
 	snprintf(fn_o, sizeof(fn_o), "/tmp/%.63s.o_XXXXXX", vm->name);
@@ -462,25 +516,50 @@
 		return;
 	}
 
-	// translate all instructions
-	pc = 0;
-	code = (char *)header + header->codeOffset;
-
 	emit("start:");
 	emit("or %%r8, %%r8"); // check whether to set up instruction pointers
 	emit("jnz main");
 	emit("jmp setupinstructionpointers");
 
 	emit("main:");
+#else  // USE_GAS
+	int pass;
+	size_t compiledOfs = 0;
 
+	gettimeofday(&tvstart, NULL);
+
+	for (pass = 0; pass < 2; ++pass) {
+
+	if(pass)
+	{
+		compiledOfs = assembler_get_code_size();
+		vm->codeLength = compiledOfs;
+		vm->codeBase = mmap(NULL, compiledOfs, PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+		if(vm->codeBase == (void*)-1)
+			Com_Error(ERR_DROP, "VM_CompileX86: can't mmap memory");
+
+		assembler_set_output((char*)vm->codeBase);
+	}
+
+	assembler_init(pass);
+
+#endif // USE_GAS
+
+	// translate all instructions
+	pc = 0;
+	code = (char *)header + header->codeOffset;
+
 	for ( instruction = 0; instruction < header->instructionCount; ++instruction )
 	{
 		op = code[ pc ];
 		++pc;
 
-		vm->instructionPointers[instruction] = pc;
+#ifndef USE_GAS
+		vm->instructionPointers[instruction] = assembler_get_code_size();
+#endif
 
-#if 0
+		/* store current instruction number in r15 for debugging */
+#if 1
 		emit("nop");
 		emit("movq $%d, %%r15", instruction);
 		emit("nop");
@@ -501,7 +580,17 @@
 		{
 			Dfprintf(qdasmout, "%s\n", opnames[op]);
 		}
+
+#ifdef USE_GAS
 		emit("i_%08x:", instruction);
+#else
+		if(neednilabel)
+		{
+			emit("i_%08x:", instruction);
+			neednilabel = 0;
+		}
+#endif
+
 		switch ( op )
 		{
 			case OP_UNDEF:
@@ -560,6 +649,7 @@
 //				emit("frstor 4(%%rsi)");
 				emit("addq $4, %%rsi");
 				emit("movl %%eax, (%%rsi)"); // store return value
+				neednilabel = 1;
 				break;
 			case OP_PUSH:
 				emit("addq $4, %%rsi");
@@ -628,7 +718,8 @@
 				emit("jp dojump_i_%08x", instruction);
 				emit("jz i_%08x", instruction+1);
 				emit("dojump_i_%08x:", instruction);
-				emit("jmp i_%08x", iarg);
+				JMPIARG
+				neednilabel = 1;
 #endif
 				break;
 			case OP_LTF:
@@ -855,7 +946,7 @@
 		}
 	}
 
-
+#ifdef USE_GAS
 	emit("setupinstructionpointers:");
 	emit("movq $%lu, %%rax", (unsigned long)vm->instructionPointers);
 	for ( instruction = 0; instruction < header->instructionCount; ++instruction )
@@ -888,8 +979,17 @@
 	vm->codeBase   = compiledcode; // remember to skip ELF header!
 	vm->codeLength = compiledsize;
 
+#else  // USE_GAS
+	}
+	assembler_init(0);
+
+	if(mprotect(vm->codeBase, compiledOfs, PROT_READ|PROT_EXEC))
+		Com_Error(ERR_DROP, "VM_CompileX86: mprotect failed");
+#endif // USE_GAS
+
 	vm->destroy = VM_Destroy_Compiled;
 	
+#ifdef USE_GAS
 	entryPoint = getentrypoint(vm);
 
 //	__asm__ __volatile__ ("int3");
@@ -910,8 +1010,6 @@
 	fclose(qdasmout);
 #endif
 
-	Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
-
 out:
 	close(fd_o);
 
@@ -922,12 +1020,30 @@
 		unlink(fn_s);
 	}
 #endif
+#endif // USE_GAS
+
+	if(vm->compiled)
+	{
+		struct timeval tvdone =  {0, 0};
+		struct timeval dur =  {0, 0};
+		Com_Printf( "VM file %s compiled to %i bytes of code (%p - %p)\n", vm->name, vm->codeLength, vm->codeBase, vm->codeBase+vm->codeLength );
+
+		gettimeofday(&tvdone, NULL);
+		timersub(&tvdone, &tvstart, &dur);
+		Com_Printf( "compilation took %lu.%06lu seconds\n", dur.tv_sec, dur.tv_usec );
+	}
 }
 
 
 void VM_Destroy_Compiled(vm_t* self)
 {
+#ifdef USE_GAS
 	munmap(self->codeBase, self->codeLength);
+#elif _WIN32
+	VirtualFree(self->codeBase, self->codeLength, MEM_RELEASE);
+#else
+	munmap(self->codeBase, self->codeLength);
+#endif
 }
 
 /*

Added: trunk/code/qcommon/vm_x86_64_assembler.c
===================================================================
--- trunk/code/qcommon/vm_x86_64_assembler.c	                        (rev 0)
+++ trunk/code/qcommon/vm_x86_64_assembler.c	2007-08-16 11:02:21 UTC (rev 1122)
@@ -0,0 +1,1419 @@
+/*
+===========================================================================
+vm_x86_64_assembler.c -- assembler for x86-64
+
+Copyright (C) 2007 Ludwig Nussel <ludwig.nussel at suse.de>, Novell inc.
+
+Quake III Arena source code is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the License,
+or (at your option) any later version.
+
+Quake III Arena source code is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with Quake III Arena source code; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+===========================================================================
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long u64;
+
+static char* out;
+static unsigned compiledOfs;
+static unsigned assembler_pass;
+
+static const char* cur_line;
+
+static FILE* fout;
+
+#define MIN(a,b)  ((a) < (b) ? (a) : (b))
+#define MAX(a,b)  ((a) > (b) ? (a) : (b))
+
+#define crap(fmt, args...) do { \
+	_crap(__FUNCTION__, fmt, ##args); \
+} while(0)
+
+#define CRAP_INVALID_ARGS crap("invalid arguments %s, %s", argtype2str(arg1.type),argtype2str(arg2.type));
+
+#ifdef DEBUG
+#define debug(fmt, args...) printf(fmt, ##args)
+#else
+#define debug(fmt, args...)
+#endif
+
+static void _crap(const char* func, const char* fmt, ...)
+{
+	va_list ap;
+	fprintf(stderr, "%s() - ", func);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fputc('\n', stderr);
+	if(cur_line && cur_line[0])
+		fprintf(stderr, "-> %s\n", cur_line);
+	exit(1);
+}
+
+static void emit1(unsigned char v)
+{
+	if(assembler_pass)
+	{
+		out[compiledOfs++] = v;
+		if(fout) fwrite(&v, 1, 1, fout);
+		debug("%02hhx ", v);
+	}
+	else
+	{
+		++compiledOfs;
+	}
+}
+
+static inline void emit2(u16 v)
+{
+	emit1(v&0xFF);
+	emit1((v>>8)&0xFF);
+}
+
+static inline void emit4(u32 v)
+{
+	emit1(v&0xFF);
+	emit1((v>>8)&0xFF);
+	emit1((v>>16)&0xFF);
+	emit1((v>>24)&0xFF);
+}
+
+static inline void emit8(u64 v)
+{
+	emit4(v&0xFFFFFFFF);
+	emit4((v>>32)&0xFFFFFFFF);
+}
+
+enum {
+	REX_W = 0x08,
+	REX_R = 0x04,
+	REX_X = 0x02,
+	REX_B = 0x01,
+};
+
+enum {
+	MODRM_MOD_00 = 0x00,
+	MODRM_MOD_01 = 0x01 << 6,
+	MODRM_MOD_10 = 0x02 << 6,
+	MODRM_MOD_11 = 0x03 << 6,
+	MODRM_RM_SIB = 0x04,
+};
+
+typedef enum
+{
+	T_NONE      = 0x00,
+	T_REGISTER  = 0x01,
+	T_IMMEDIATE = 0x02,
+	T_MEMORY    = 0x04,
+	T_LABEL     = 0x08,
+	T_ABSOLUTE  = 0x80
+} argtype_t;
+
+typedef enum {
+	R_8   = 0x100, 
+	R_16  = 0x200, 
+	R_64  = 0x800, 
+	R_MSZ = 0xF00,  // size mask
+	R_XMM = 0x2000, // xmm register. year, sucks
+	R_EAX =  0x00,
+	R_EBX =  0x03,
+	R_ECX =  0x01,
+	R_EDX =  0x02,
+	R_ESI =  0x06,
+	R_EDI =  0x07,
+	R_ESP =  0x04,
+	R_RAX =  R_EAX | R_64,
+	R_RBX =  R_EBX | R_64,
+	R_RCX =  R_ECX | R_64,
+	R_RDX =  R_EDX | R_64,
+	R_RSI =  R_ESI | R_64,
+	R_RDI =  R_EDI | R_64,
+	R_RSP =  R_ESP | R_64,
+	R_R8  =  0x08  | R_64,
+	R_R9  =  0x09  | R_64,
+	R_R10 =  0x0A  | R_64,
+	R_R15 =  0x0F  | R_64,
+	R_AL  =  R_EAX | R_8,
+	R_AX  =  R_EAX | R_16,
+	R_CL  =  R_ECX | R_8,
+	R_XMM0 = 0x00  | R_XMM,
+	R_MGP =  0x0F, // mask for general purpose registers
+} reg_t;
+
+typedef enum {
+	MODRM_SIB = 0,
+	MODRM_NOSIB = 0x3,
+} modrm_sib_t;
+
+typedef struct {
+	unsigned disp;
+	argtype_t basetype;
+	union {
+		u64 imm;
+		reg_t reg;
+	} base;
+	argtype_t indextype;
+	union {
+		u64 imm;
+		reg_t reg;
+	} index;
+	unsigned scale;
+} memref_t;
+
+#define LABELLEN 32
+
+typedef struct {
+	argtype_t type;
+	union {
+		u64 imm;
+		reg_t reg;
+		memref_t mem;
+		char label[LABELLEN];
+	} v;
+	int absolute:1;
+} arg_t;
+
+typedef void (*emitfunc)(const char* op, arg_t arg1, arg_t arg2, void* data);
+
+typedef struct {
+	char* mnemonic;
+	emitfunc func;
+	void* data;
+} op_t;
+
+typedef struct {
+	u8 xmmprefix;
+	u8 subcode; // in modrm
+	u8 rmcode;  // opcode for reg/mem, reg
+	u8 mrcode;  // opcode for reg, reg/mem
+	u8 rcode8;  // opcode for reg8/mem8
+	u8 rcode;  // opcode for reg/mem
+} opparam_t;
+
+/* ************************* */
+
+static unsigned hashkey(const char *string, unsigned len) {
+	unsigned register hash, i;
+
+	hash = 0;
+	for (i = 0; i < len && string[i] != '\0'; ++i) {
+		hash += string[i] * (119 + i);
+	}
+	hash = (hash ^ (hash >> 10) ^ (hash >> 20));
+	return hash;
+}
+
+struct hashentry {
+	char* label;
+	unsigned address;
+	struct hashentry* next;
+};
+static struct hashentry* labelhash[1021];
+
+// no dup check!
+static void hash_add_label(const char* label, unsigned address)
+{
+	struct hashentry* h;
+	unsigned i = hashkey(label, -1U);
+	i %= sizeof(labelhash)/sizeof(labelhash[0]);
+	h = malloc(sizeof(struct hashentry));
+	h->label = strdup(label);
+	h->address = address;
+	h->next = labelhash[i];
+	labelhash[i] = h;
+}
+
+static unsigned lookup_label(const char* label)
+{
+	struct hashentry* h;
+	unsigned i = hashkey(label, -1U);
+	i %= sizeof(labelhash)/sizeof(labelhash[0]);
+	for(h = labelhash[i]; h; h = h->next )
+	{
+		if(!strcmp(h->label, label))
+			return h->address;
+	}
+	if(assembler_pass)
+		crap("label %s undefined", label);
+	return 0;
+}
+
+static void labelhash_free(void)
+{
+	struct hashentry* h;
+	unsigned i;
+	unsigned z = 0, min = -1U, max = 0, t = 0;
+	for ( i = 0; i < sizeof(labelhash)/sizeof(labelhash[0]); ++i)
+	{
+		unsigned n = 0;
+		h = labelhash[i];
+		while(h)
+		{
+			struct hashentry* next = h->next;
+			free(h->label);
+			free(h);
+			h = next;
+			++n;
+		}
+		t+=n;
+		if(!n) ++z;
+		//else printf("%u\n", n);
+		min = MIN(min, n);
+		max = MAX(max, n);
+	}
+	printf("total %u, hsize %lu, zero %u, min %u, max %u\n", t, sizeof(labelhash)/sizeof(labelhash[0]), z, min, max);
+	memset(labelhash, 0, sizeof(labelhash));
+}
+
+/* ************************* */
+
+
+static const char* argtype2str(argtype_t t)
+{
+	switch(t)
+	{
+		case T_NONE: return "none";
+		case T_REGISTER: return "register";
+		case T_IMMEDIATE: return "immediate";
+		case T_MEMORY: return "memory";
+		case T_LABEL: return "label";
+		default: crap("invalid type");
+	}
+	/* not reached */
+	return T_NONE;
+}
+
+/* ************************* */
+
+static inline int iss8(u64 v)
+{
+	return (labs(v) <= 0x80);
+}
+
+static inline int isu8(u64 v)
+{
+	return (v <= 0xff);
+}
+
+static inline int iss16(u64 v)
+{
+	return (labs(v) <= 0x8000);
+}
+
+static inline int isu16(u64 v)
+{
+	return (v <= 0xffff);
+}
+
+static inline int iss32(u64 v)
+{
+	return (labs(v) <= 0x80000000);
+}
+
+static inline int isu32(u64 v)
+{
+	return (v <= 0xffffffff);
+}
+
+static void emit_opsingle(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 op = (u8)((unsigned long) data);
+
+	if(arg1.type != T_NONE || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	emit1(op);
+}
+
+static void emit_opsingle16(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	emit1(0x66);
+	emit_opsingle(mnemonic, arg1, arg2, data);
+}
+
+static void compute_rexmodrmsib(u8* rex_r, u8* modrm_r, u8* sib_r, arg_t* arg1, arg_t* arg2)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	if((arg1->type == T_REGISTER && arg2->type == T_REGISTER)
+	&& ((arg1->v.reg & R_MSZ) != (arg2->v.reg & R_MSZ))
+	&& !((arg1->v.reg & R_XMM) || (arg2->v.reg & R_XMM)))
+		crap("both registers must be of same width");
+
+	if((arg1->type == T_REGISTER && arg1->v.reg & R_64)
+	|| (arg2->type == T_REGISTER && arg2->v.reg & R_64))
+	{
+		rex |= REX_W;
+	}
+
+	if(arg1->type == T_REGISTER)
+	{
+		if((arg1->v.reg & R_MGP) > 0x07)
+			rex |= REX_R;
+
+		modrm |= (arg1->v.reg & 0x07) << 3;
+	}
+
+	if(arg2->type == T_REGISTER)
+	{
+		if((arg2->v.reg & R_MGP) > 0x07)
+			rex |= REX_B;
+
+		modrm |= (arg2->v.reg & 0x07);
+	}
+
+	if(arg2->type == T_MEMORY)
+	{
+		if((arg2->v.mem.basetype == T_REGISTER && !(arg2->v.mem.base.reg & R_64))
+		|| (arg2->v.mem.indextype == T_REGISTER && !(arg2->v.mem.index.reg & R_64)))
+		{
+			crap("only 64bit base/index registers are %x %x", arg2->v.mem.base.reg, arg2->v.mem.index.reg);
+		}
+
+		if(arg2->v.mem.indextype == T_REGISTER)
+		{
+			modrm |= MODRM_RM_SIB;
+			if(!arg2->v.mem.disp)
+			{
+				modrm |= MODRM_MOD_00;
+			}
+			else if(iss8(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_01;
+			}
+			else if(isu32(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_10;
+			}
+			else
+			{
+				crap("invalid displacement");
+			}
+
+			if((arg2->v.mem.index.reg & R_MGP) > 0x07)
+				rex |= REX_X;
+
+			if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+				rex |= REX_B;
+
+			if(arg2->v.mem.basetype != T_REGISTER)
+				crap("base must be register");
+			switch(arg2->v.mem.scale)
+			{
+				case 1: break;
+				case 2: sib |= 1 << 6; break;
+				case 4: sib |= 2 << 6; break;
+				case 8: sib |= 3 << 6; break;
+			}
+			sib |= (arg2->v.mem.index.reg & 0x07) << 3;
+			sib |= (arg2->v.mem.base.reg & 0x07);
+		}
+		else if(arg2->v.mem.indextype == T_NONE)
+		{
+			if(!arg2->v.mem.disp)
+			{
+				modrm |= MODRM_MOD_00;
+			}
+			else if(iss8(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_01;
+			}
+			else if(isu32(arg2->v.mem.disp))
+			{
+				modrm |= MODRM_MOD_10;
+			}
+			else
+			{
+				crap("invalid displacement");
+			}
+
+			if(arg2->v.mem.basetype != T_REGISTER)
+				crap("todo: base != register");
+
+			if((arg2->v.mem.base.reg & R_MGP) > 0x07)
+				rex |= REX_B;
+
+			modrm |= arg2->v.mem.base.reg & 0x07;
+		}
+		else
+		{
+			crap("invalid indextype");
+		}
+	}
+	else
+	{
+		modrm |= MODRM_MOD_11;
+	}
+
+	if(rex)
+		rex |= 0x40; // XXX
+
+	*rex_r = rex;
+	*modrm_r = modrm;
+	*sib_r = sib;
+}
+
+static void maybe_emit_displacement(arg_t* arg)
+{
+	if(arg->type != T_MEMORY)
+		return;
+
+	if(arg->v.mem.disp)
+	{
+		if(iss8(arg->v.mem.disp))
+		{
+			emit1((u8)arg->v.mem.disp);
+		}
+		else if(isu32(arg->v.mem.disp))
+		{
+			emit4(arg->v.mem.disp);
+		}
+		else
+		{
+			crap("invalid displacement");
+		}
+	}
+}
+
+/* one byte operator with register added to operator */
+static void emit_opreg(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 op = (u8)((unsigned long) data);
+
+	if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if((arg1.v.reg & R_MGP) > 0x07)
+		emit1(0x40 | REX_B);
+
+	op |= (arg1.v.reg & 0x07);
+
+	emit1(op);
+}
+
+/* operator which operates on reg/mem */
+static void emit_op_rm(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+	opparam_t* params = data;
+
+	if((arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+	modrm |= params->subcode << 3;
+
+	if(arg1.v.reg & R_16)
+		emit1(0x66);
+
+	if(rex) emit1(rex);
+	if(arg1.v.reg & R_8)
+		emit1(params->rcode8); // op reg8/mem8,
+	else
+		emit1(params->rcode); // op reg/mem,
+	emit1(modrm);
+	if((modrm & 0x07) == MODRM_RM_SIB)
+		emit1(sib);
+
+	maybe_emit_displacement(&arg1);
+}
+
+/* operator which operates on reg/mem with cl */
+static void emit_op_rm_cl(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+	opparam_t* params = data;
+
+	if(arg2.type != T_REGISTER || arg1.type != T_REGISTER)
+		CRAP_INVALID_ARGS;
+
+	if((arg1.v.reg & R_MGP) != R_ECX && !(arg1.v.reg & R_8))
+		crap("only cl register is valid");
+
+	arg1.type = T_NONE; // don't complain, we know it's cl anyways
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+	modrm |= params->subcode << 3;
+
+	if(arg2.v.reg & R_16)
+		emit1(0x66);
+
+	if(rex) emit1(rex);
+	if(arg2.v.reg & R_8)
+		emit1(params->rcode8); // op reg8/mem8,
+	else
+		emit1(params->rcode); // op reg/mem,
+	emit1(modrm);
+	if((modrm & 0x07) == MODRM_RM_SIB)
+		emit1(sib);
+
+	maybe_emit_displacement(&arg2);
+}
+
+static void emit_mov(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+	{
+		u8 op = 0xb8;
+		
+		if(arg2.v.reg & R_8)
+		{
+			if(!isu8(arg1.v.imm))
+				crap("value too large for 8bit register");
+
+			op = 0xb0;
+		}
+		else if(arg2.v.reg & R_16)
+		{
+			if(!isu16(arg1.v.imm))
+				crap("value too large for 16bit register");
+			emit1(0x66);
+		}
+		else if(!arg2.v.reg & R_64)
+		{
+			if(!isu32(arg1.v.imm))
+				crap("value too large for 32bit register");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+
+		op |= (arg2.v.reg & 0x07);
+
+		emit1(op);
+
+		if(arg2.v.reg & R_8) emit1(arg1.v.imm);
+		else if(arg2.v.reg & R_16) emit2(arg1.v.imm);
+		else if(arg2.v.reg & R_64) emit8(arg1.v.imm);
+		else emit4(arg1.v.imm);
+	}
+	else if(arg1.type == T_IMMEDIATE && arg2.type == T_MEMORY)
+	{
+		if(!iss32(arg1.v.imm))
+		{
+			crap("only 32bit immediates supported");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+		if(rex) emit1(rex);
+		emit1(0xc7); // mov reg/mem, imm
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		emit4(arg1.v.imm);
+	}
+	else if(arg1.type == T_REGISTER && arg2.type == T_REGISTER) // XXX: same as next
+	{
+		if(arg1.type != T_REGISTER || arg2.type != T_REGISTER)
+			crap("both args must be registers");
+
+		if((arg1.v.reg & R_MSZ) != (arg2.v.reg & R_MSZ))
+			crap("both registers must be same width");
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+		emit1(0x89); // mov reg reg/mem,
+		emit1(modrm);
+	}
+	else if(arg1.type == T_REGISTER && arg2.type == T_MEMORY)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(arg1.v.reg & R_16)
+			emit1(0x66);
+
+		if(rex) emit1(rex);
+		if(arg1.v.reg & R_8)
+			emit1(0x88); // mov reg reg/mem,
+		else
+			emit1(0x89); // mov reg reg/mem,
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(arg2.v.reg & R_16)
+			emit1(0x66);
+
+		if(rex) emit1(rex);
+		if(arg2.v.reg & R_8)
+			emit1(0x8a); // mov reg/mem, reg
+		else
+			emit1(0x8b); // mov reg/mem, reg
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static void emit_subaddand(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex = 0;
+	u8 modrm = 0;
+	u8 sib = 0;
+
+	opparam_t* params = data;
+
+	if(arg1.type == T_IMMEDIATE && arg2.type == T_REGISTER)
+	{
+		if(!iss32(arg1.v.imm))
+		{
+			crap("only 8 and 32 bit immediates supported");
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		modrm |= params->subcode << 3;
+
+		if(rex) emit1(rex);
+#if 0
+		if(isu8(arg1.v.imm))
+		{
+			emit1(0x83); // sub reg/mem, imm8
+			emit1(modrm);
+			emit1(arg1.v.imm&0xFF);
+		}
+		else
+#endif
+		{
+			emit1(0x81); // sub reg/mem, imm32
+			emit1(modrm);
+			emit4(arg1.v.imm);
+		}
+	}
+	else if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(rex) emit1(rex);
+		emit1(params->rmcode); // sub reg/mem, reg
+		emit1(modrm);
+		if(arg2.type == T_MEMORY && (modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(rex) emit1(rex);
+		emit1(params->mrcode); // sub reg, reg/mem
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static void emit_condjump(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	unsigned off;
+	int disp;
+	unsigned char opcode = (unsigned char)(((unsigned long)data)&0xFF);
+
+	if(arg1.type != T_LABEL || arg2.type != T_NONE)
+		crap("%s: argument must be label", mnemonic);
+
+	emit1(opcode);
+
+	off = lookup_label(arg1.v.label);
+	disp = off-(compiledOfs+1);
+	if(assembler_pass && abs(disp) > 127)
+		crap("cannot jump that far (%x -> %x = %x)", compiledOfs, off, disp);
+
+	emit1(disp);
+}
+
+static void emit_jmp(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	if((arg1.type != T_LABEL && arg1.type != T_REGISTER && arg1.type != T_MEMORY) || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if(arg1.type == T_LABEL)
+	{
+		unsigned off;
+		int disp;
+
+		off = lookup_label(arg1.v.label);
+		disp = off-(compiledOfs+5);
+		emit1(0xe9);
+		emit4(disp);
+	}
+	else
+	{
+		u8 rex, modrm, sib;
+
+		if(arg1.type == T_REGISTER)
+		{
+			if(!arg1.absolute)
+				crap("jmp must be absolute");
+
+			if((arg1.v.reg & R_64) != R_64)
+				crap("register must be 64bit");
+
+			arg1.v.reg ^= R_64; // no rex required for call
+		}
+
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		modrm |= 0x4 << 3;
+
+		if(rex) emit1(rex);
+		emit1(0xff);
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+		maybe_emit_displacement(&arg1);
+	}
+}
+
+static void emit_call(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+
+	if(arg1.type != T_REGISTER || arg2.type != T_NONE)
+		CRAP_INVALID_ARGS;
+
+	if(!arg1.absolute)
+		crap("call must be absolute");
+
+	if((arg1.v.reg & R_64) != R_64)
+		crap("register must be 64bit");
+
+	arg1.v.reg ^= R_64; // no rex required for call
+
+	compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+	modrm |= 0x2 << 3;
+
+	if(rex) emit1(rex);
+	emit1(0xff);
+	emit1(modrm);
+}
+
+
+static void emit_twobyte(const char* mnemonic, arg_t arg1, arg_t arg2, void* data)
+{
+	u8 rex, modrm, sib;
+
+	opparam_t* params = data;
+
+	if(arg1.type == T_REGISTER && (arg2.type == T_MEMORY || arg2.type == T_REGISTER))
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg1, &arg2);
+
+		if(params->xmmprefix) emit1(params->xmmprefix);
+		if(rex) emit1(rex);
+		emit1(0x0f);
+		emit1(params->rmcode); // sub reg/mem, reg
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg2);
+	}
+	else if(arg1.type == T_MEMORY && arg2.type == T_REGISTER && params->mrcode)
+	{
+		compute_rexmodrmsib(&rex, &modrm, &sib, &arg2, &arg1);
+
+		if(params->xmmprefix) emit1(params->xmmprefix);
+		if(rex) emit1(rex);
+		emit1(0x0f);
+		emit1(params->mrcode); // sub reg, reg/mem
+		emit1(modrm);
+		if((modrm & 0x07) == MODRM_RM_SIB)
+			emit1(sib);
+
+		maybe_emit_displacement(&arg1);
+	}
+	else
+		CRAP_INVALID_ARGS;
+}
+
+static opparam_t params_add = { subcode: 0, rmcode: 0x01, };
+static opparam_t params_or = { subcode: 1, rmcode: 0x09, };
+static opparam_t params_and = { subcode: 4, rmcode: 0x21, };
+static opparam_t params_sub = { subcode: 5, rmcode: 0x29, };
+static opparam_t params_xor = { subcode: 6, rmcode: 0x31, };
+static opparam_t params_cmp = { subcode: 6, rmcode: 0x39, mrcode: 0x3b, };
+static opparam_t params_dec = { subcode: 1, rcode: 0xff, rcode8: 0xfe, };
+static opparam_t params_sar = { subcode: 7, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shl = { subcode: 4, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_shr = { subcode: 5, rcode: 0xd3, rcode8: 0xd2, };
+static opparam_t params_idiv = { subcode: 7, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_div = { subcode: 6, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_imul = { subcode: 5, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_mul = { subcode: 4, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_neg = { subcode: 3, rcode: 0xf7, rcode8: 0xf6, };
+static opparam_t params_not = { subcode: 2, rcode: 0xf7, rcode8: 0xf6, };
+
+static opparam_t params_cvtsi2ss = { xmmprefix: 0xf3, rmcode: 0x2a };
+static opparam_t params_cvttss2si = { xmmprefix: 0xf3, rmcode: 0x2c };
+static opparam_t params_addss = { xmmprefix: 0xf3, mrcode: 0x58 };
+static opparam_t params_divss = { xmmprefix: 0xf3, mrcode: 0x5e };
+static opparam_t params_movss = { xmmprefix: 0xf3, mrcode: 0x10, rmcode: 0x11 };
+static opparam_t params_mulss = { xmmprefix: 0xf3, mrcode: 0x59 };
+static opparam_t params_subss = { xmmprefix: 0xf3, mrcode: 0x5c };
+static opparam_t params_ucomiss = { mrcode: 0x2e };
+
+static int ops_sorted = 0;
+static op_t ops[] = {
+	{ "addl", emit_subaddand, &params_add },
+	{ "addq", emit_subaddand, &params_add },
+	{ "addss", emit_twobyte, &params_addss },
+	{ "andl", emit_subaddand, &params_and },
+	{ "andq", emit_subaddand, &params_and },
+	{ "callq", emit_call, NULL },
+	{ "cbw", emit_opsingle16, (void*)0x98 },
+	{ "cdq", emit_opsingle, (void*)0x99 },
+	{ "cmpl", emit_subaddand, &params_cmp },
+	{ "cmpq", emit_subaddand, &params_cmp },
+	{ "cvtsi2ss", emit_twobyte, &params_cvtsi2ss },
+	{ "cvttss2si", emit_twobyte, &params_cvttss2si },
+	{ "cwde", emit_opsingle, (void*)0x98 },
+	{ "decl", emit_op_rm, &params_dec },
+	{ "decq", emit_op_rm, &params_dec },
+	{ "divl", emit_op_rm, &params_div },
+	{ "divq", emit_op_rm, &params_div },
+	{ "divss", emit_twobyte, &params_divss },
+	{ "idivl", emit_op_rm, &params_idiv },
+	{ "imull", emit_op_rm, &params_imul },
+	{ "int3", emit_opsingle, (void*)0xcc },
+	{ "ja", emit_condjump, (void*)0x77 },
+	{ "jbe", emit_condjump, (void*)0x76 },
+	{ "jb", emit_condjump, (void*)0x72 },
+	{ "je", emit_condjump, (void*)0x74 },
+	{ "jl", emit_condjump, (void*)0x7c },
+	{ "jmp", emit_jmp, NULL },
+	{ "jmpq", emit_jmp, NULL },
+	{ "jnae", emit_condjump, (void*)0x72 },
+	{ "jna", emit_condjump, (void*)0x76 },
+	{ "jnbe", emit_condjump, (void*)0x77 },
+	{ "jnb", emit_condjump, (void*)0x73 },
+	{ "jnc", emit_condjump, (void*)0x73 },
+	{ "jne", emit_condjump, (void*)0x75 },
+	{ "jnge", emit_condjump, (void*)0x7c },
+	{ "jng", emit_condjump, (void*)0x7e },
+	{ "jnle", emit_condjump, (void*)0x7f },
+	{ "jnl", emit_condjump, (void*)0x7d },
+	{ "jnz", emit_condjump, (void*)0x75 },
+	{ "jp", emit_condjump, (void*)0x7a },
+	{ "jz", emit_condjump, (void*)0x74 },
+	{ "movb", emit_mov, NULL },
+	{ "movl", emit_mov, NULL },
+	{ "movq", emit_mov, NULL },
+	{ "movss", emit_twobyte, &params_movss },
+	{ "movw", emit_mov, NULL },
+	{ "mull", emit_op_rm, &params_mul },
+	{ "mulss", emit_twobyte, &params_mulss },
+	{ "negl", emit_op_rm, &params_neg },
+	{ "negq", emit_op_rm, &params_neg },
+	{ "nop", emit_opsingle, (void*)0x90 },
+	{ "notl", emit_op_rm, &params_not },
+	{ "notq", emit_op_rm, &params_not },
+	{ "or",   emit_subaddand, &params_or },
+	{ "orl",  emit_subaddand, &params_or },
+	{ "pop", emit_opreg, (void*)0x58 },
+	{ "push", emit_opreg, (void*)0x50 },
+	{ "ret", emit_opsingle, (void*)0xc3 },
+	{ "sarl", emit_op_rm_cl, &params_sar },
+	{ "shl", emit_op_rm_cl, &params_shl },
+	{ "shrl", emit_op_rm_cl, &params_shr },
+	{ "subl", emit_subaddand, &params_sub },
+	{ "subq", emit_subaddand, &params_sub },
+	{ "subss", emit_twobyte, &params_subss },
+	{ "ucomiss", emit_twobyte, &params_ucomiss },
+	{ "xorl",  emit_subaddand, &params_xor },
+	{ "xorq",  emit_subaddand, &params_xor },
+	{ NULL, NULL, NULL }
+};
+
+static int opsort(const void* A, const void* B)
+{
+	const op_t* a = A;
+	const op_t* b = B;
+	return strcmp(a->mnemonic, b->mnemonic);
+}
+
+static op_t* getop(const char* n)
+{
+#if 0
+	op_t* o = ops;
+	while(o->mnemonic)
+	{
+		if(!strcmp(o->mnemonic, n))
+			return o;
+		++o;
+	}
+
+#else
+	unsigned m, t, b;
+	int r;
+	t = sizeof(ops)/sizeof(ops[0])-1;
+	b = 0;
+
+	while(b <= t)
+	{
+		m = ((t-b)>>1) + b;
+		if((r = strcmp(ops[m].mnemonic, n)) == 0)
+		{
+			return &ops[m];
+		}
+		else if(r < 0)
+		{
+			b = m + 1;
+		}
+		else
+		{
+			t = m - 1;
+		}
+	}
+#endif
+
+	return NULL;
+}
+
+static reg_t parsereg(const char* str)
+{
+	const char* s = str;
+	if(*s == 'a' && s[1] == 'l' && !s[2])
+	{
+		return R_AL;
+	}
+	else if(*s == 'a' && s[1] == 'x' && !s[2])
+	{
+		return R_AX;
+	}
+	if(*s == 'c' && s[1] == 'l' && !s[2])
+	{
+		return R_CL;
+	}
+	if(*s == 'x')
+	{
+		if(!strcmp(s, "xmm0"))
+			return R_XMM0;
+	}
+	else if(*s == 'r' && s[1])
+	{
+		++s;
+		if(s[1] == 'x')
+		{
+			switch(*s++)
+			{
+				case 'a': return R_RAX;
+				case 'b': return R_RBX;
+				case 'c': return R_RCX;
+				case 'd': return R_RDX;
+			}
+		}
+		else if(s[1] == 'i')
+		{
+			switch(*s++)
+			{
+				case 's': return R_RSI;
+				case 'd': return R_RDI;
+			}
+		}
+		else if(s[0] == 's' && s[1] == 'p' && !s[2])
+		{
+			return R_RSP;
+		}
+		else if(*s == '8' && !s[1])
+			return R_R8;
+		else if(*s == '9' && !s[1])
+			return R_R9;
+		else if(*s == '1' && s[1] == '0')
+			return R_R10;
+		else if(*s == '1' && s[1] == '5')
+			return R_R15;
+	}
+	else if(*s == 'e' && s[1])
+	{
+		++s;
+		if(s[1] == 'x')
+		{
+			switch(*s++)
+			{
+				case 'a': return R_EAX;
+				case 'b': return R_EBX;
+				case 'c': return R_ECX;
+				case 'd': return R_EDX;
+			}
+		}
+		else if(s[1] == 'i')
+		{
+			switch(*s++)
+			{
+				case 's': return R_ESI;
+				case 'd': return R_EDI;
+			}
+		}
+	}
+
+	crap("invalid register %s", str);
+
+	return 0;
+}
+
+typedef enum {
+	TOK_LABEL = 0x80,
+	TOK_INT = 0x81,
+	TOK_END = 0x82,
+	TOK_INVALID = 0x83,
+} token_t;
+
+static unsigned char nexttok(const char** str, char* label, u64* val)
+{
+	const char* s = *str;
+
+	if(label) *label = 0;
+	if(val) *val = 0;
+
+	while(*s && *s == ' ') ++s;
+
+	if(!*s)
+	{
+		return TOK_END;
+	}
+	else if(*s == '$' || *s == '*' || *s == '%' || *s == '-' || *s == ')' || *s == '(' || *s == ',')
+	{
+		*str = s+1;
+		return *s;
+	}
+	else if(*s >= 'a' && *s <= 'z')
+	{
+		size_t a = strspn(s+1, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_");
+		if(a+1 >= LABELLEN)
+			crap("label %s too long", s);
+		if(label)
+		{
+			strncpy(label, s, a+1);
+			label[a+1] = 0;
+		}
+		*str = s+a+1;
+		return TOK_LABEL;
+	}
+	else if(*s >= '0' && *s <= '9')
+	{
+		char* endptr = NULL;
+		u64 v = strtol(s, &endptr, 0);
+		if(endptr && (endptr-s == 0))
+			crap("invalid integer %s", s);
+		if(val) *val = v;
+		*str = endptr;
+		return TOK_INT;
+	}
+	crap("can't parse '%s'", *str);
+	return TOK_INVALID;
+}
+
+static arg_t parsearg(const char** str)
+{
+	arg_t arg;
+	const char* s = *str;
+	char label[20];
+	u64 val;
+	int negative = 1;
+	unsigned ttype;
+
+	arg.type = T_NONE;
+	arg.absolute = 0;
+
+	while(*s && *s == ' ') ++s;
+
+	switch(nexttok(&s, label, &val))
+	{
+		case '$' :
+			ttype = nexttok(&s, NULL, &val);
+			if(ttype == '-')
+			{
+				negative = -1;
+				ttype = nexttok(&s, NULL, &val);
+			}
+			if(ttype != TOK_INT)
+				crap("expected integer");
+			arg.type = T_IMMEDIATE;
+			arg.v.imm = negative * val;
+			break;
+		case '*' :
+			if((ttype = nexttok(&s, NULL, NULL)) != '%')
+			{
+				if(ttype == '(')
+					goto tok_memory;
+				crap("expected '%%'");
+			}
+			arg.absolute = 1;
+			/* fall through */
+		case '%' :
+			if(nexttok(&s, label, &val) != TOK_LABEL)
+				crap("expected label");
+			arg.type = T_REGISTER;
+			arg.v.reg = parsereg(label);
+			break;
+		case TOK_LABEL:
+			arg.type = T_LABEL;
+			strncpy(arg.v.label, label, LABELLEN);
+			break;
+		case '-':
+			negative = -1;
+			if(nexttok(&s, NULL, &val) != TOK_INT)
+				crap("expected integer");
+			/* fall through */
+		case TOK_INT:
+			if(nexttok(&s, label, NULL) != '(')
+				crap("expected '('"); // mov to/from fixed address not supported
+			/* fall through */
+		case '(':
+tok_memory:
+			arg.type = T_MEMORY;
+			arg.v.mem.indextype = T_NONE;
+			arg.v.mem.disp = negative * val;
+			ttype = nexttok(&s, label, &val);
+			if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+			{
+				crap("expected register");
+			}
+			if (ttype == '%')
+			{
+				arg.v.mem.basetype = T_REGISTER;
+				arg.v.mem.base.reg = parsereg(label);
+			}
+			else if (ttype == TOK_INT)
+			{
+				arg.v.mem.basetype = T_IMMEDIATE;
+				arg.v.mem.base.imm = val;
+			}
+			if((ttype = nexttok(&s, NULL, NULL)) == ',')
+			{
+				ttype = nexttok(&s, label, &val);
+				if(ttype == '%' && nexttok(&s, label, &val) != TOK_LABEL)
+				{
+					crap("expected register");
+				}
+				if (ttype == '%')
+				{
+					arg.v.mem.indextype = T_REGISTER;
+					arg.v.mem.index.reg = parsereg(label);
+				}
+				else if (ttype == TOK_INT)
+				{
+					crap("index must be register");
+					arg.v.mem.indextype = T_IMMEDIATE;
+					arg.v.mem.index.imm = val;
+				}
+				if(nexttok(&s, NULL, NULL) != ',')
+					crap("expected ','");
+				if(nexttok(&s, NULL, &val) != TOK_INT)
+					crap("expected integer");
+				if(val != 1 && val != 2 && val != 4 && val != 8)
+					crap("scale must 1, 2, 4 or 8");
+				arg.v.mem.scale = val;
+
+				ttype = nexttok(&s, NULL, NULL);
+			}
+			if(ttype != ')')
+			{
+				crap("expected ')' or ','");
+			}
+			break;
+		default:
+			crap("invalid token %hhu in %s", *(unsigned char*)s, *str);
+			break;
+	}
+
+	*str = s;
+
+	return arg;
+}
+
+/* ************************* */
+
+void assembler_init(int pass)
+{
+	compiledOfs = 0;
+	assembler_pass = pass;
+	if(!pass)
+	{
+		labelhash_free();
+		cur_line = NULL;
+	}
+	if(!ops_sorted)
+	{
+		ops_sorted = 1;
+		qsort(ops, sizeof(ops)/sizeof(ops[0])-1, sizeof(ops[0]), opsort);
+	}
+}
+
+size_t assembler_get_code_size(void)
+{
+	return compiledOfs;
+}
+
+void assembler_set_output(char* buf)
+{
+	out = buf;
+}
+
+void assemble_line(const char* input, size_t len)
+{
+	char line[4096];
+	char* s;
+	op_t* o;
+	char* opn;
+	arg_t arg1, arg2;
+
+	arg1.type = T_NONE;
+	arg2.type = T_NONE;
+	opn = NULL;
+	o = NULL;
+
+	if(len < 1)
+		return;
+
+	if(len >= sizeof(line))
+		crap("line too long");
+
+	memcpy(line, input, sizeof(line));
+	cur_line = input;
+
+	if(line[len-1] == '\n') line[--len] = 0;
+	if(line[len-1] == ':')
+	{
+		line[--len] = 0;
+		if(assembler_pass)
+			debug("%s: 0x%x\n", line, compiledOfs);
+		else
+			hash_add_label(line, compiledOfs);
+	}
+	else
+	{
+		opn = line;
+		s = strchr(line, ' ');
+		if(s)
+		{
+			*s++ = 0;
+			arg1 = parsearg((const char**)&s);
+			if(*s)
+			{
+				if(*s != ',')
+					crap("expected ',', got '%c'", *s);
+				++s;
+				arg2 = parsearg((const char**)&s);
+			}
+		}
+
+		if(!opn)
+		{
+			crap("no operator in %s", line);
+		}
+
+		o = getop(opn);
+		if(!o)
+		{
+			crap("cannot handle op %s", opn);
+		}
+		o->func(opn, arg1, arg2, o->data);
+		if(assembler_pass)
+			debug("   - %s%s", cur_line, cur_line[strlen(cur_line)-1]=='\n'?"":"\n");
+	}
+}
+
+#ifdef SA_STANDALONE
+int main(int argc, char* argv[])
+{
+	char line[4096];
+	size_t len;
+	int pass;
+	FILE* file = NULL;
+
+	if(argc < 2)
+	{
+		crap("specify file");
+	}
+
+	file = fopen(argv[1], "r");
+	if(!file)
+	{
+		crap("can't open file");
+	}
+
+	if(argc > 2)
+	{
+		fout = fopen(argv[2], "w");
+		if(!fout)
+		{
+			crap("can't open %s for writing", argv[2]);
+		}
+	}
+
+	for(pass = 0; pass < 2; ++pass)
+	{
+		if(fseek(file, 0, SEEK_SET))
+			crap("can't rewind file");
+
+		if(pass)
+		{
+			char* b = malloc(assembler_get_code_size());
+			if(!b)
+				crap("cannot allocate memory");
+			assembler_set_output(b);
+		}
+
+		assembler_init(pass);
+
+		while(fgets(line, sizeof(line), file))
+		{
+			len = strlen(line);
+			if(!len) continue;
+
+			assemble_line(line, len);
+		}
+	}
+
+	assembler_init(0);
+
+	fclose(file);
+
+	return 0;
+}
+#endif




More information about the quake3-commits mailing list