#include "CodeGenerator.hpp"

#include "Error.hpp"

#include <stdio.h>

namespace SoftWire
{
	bool CodeGenerator::emulateSSE = false;

	CodeGenerator::CodeGenerator()
	{
		physicalEAX = 0;
		physicalECX = 0;
		physicalEDX = 0;
		physicalEBX = 0;
		physicalESI = 0;
		physicalEDI = 0;

		physicalMM0 = 0;
		physicalMM1 = 0;
		physicalMM2 = 0;
		physicalMM3 = 0;
		physicalMM4 = 0;
		physicalMM5 = 0;
		physicalMM6 = 0;
		physicalMM7 = 0;

		physicalXMM0 = 0;
		physicalXMM1 = 0;
		physicalXMM2 = 0;
		physicalXMM3 = 0;
		physicalXMM4 = 0;
		physicalXMM5 = 0;
		physicalXMM6 = 0;
		physicalXMM7 = 0;

		priorityEAX = 0;
		priorityECX = 0;
		priorityEDX = 0;
		priorityEBX = 0;
		priorityESI = 0;
		priorityEDI = 0;

		priorityMM0 = 0;
		priorityMM1 = 0;
		priorityMM2 = 0;
		priorityMM3 = 0;
		priorityMM4 = 0;
		priorityMM5 = 0;
		priorityMM6 = 0;
		priorityMM7 = 0;

		priorityXMM0 = 0;
		priorityXMM1 = 0;
		priorityXMM2 = 0;
		priorityXMM3 = 0;
		priorityXMM4 = 0;
		priorityXMM5 = 0;
		priorityXMM6 = 0;
		priorityXMM7 = 0;
	}

	const OperandREG8 &CodeGenerator::r8(const OperandREF &ref, bool copy)
	{
		return (OperandREG8&)r32(ref, copy);
	}

	const OperandREG8 &CodeGenerator::x8(const OperandREF &ref, bool copy)
	{
		return (OperandREG8&)x32(ref, copy);
	}

	const OperandREG8 &CodeGenerator::t8(int i)
	{
		return (OperandREG8&)t32(i);
	}

	const OperandR_M8 CodeGenerator::m8(const OperandREF &ref)
	{
		return (OperandR_M8&)m32(ref);
	}

	const OperandREG16 &CodeGenerator::r16(const OperandREF &ref, bool copy)
	{
		return (OperandREG16&)r32(ref, copy);
	}

	const OperandREG16 &CodeGenerator::x16(const OperandREF &ref, bool copy)
	{
		return (OperandREG16&)x32(ref, copy);
	}

	const OperandREG16 &CodeGenerator::t16(int i)
	{
		return (OperandREG16&)t32(i);
	}

	const OperandR_M16 CodeGenerator::m16(const OperandREF &ref)
	{
		return (OperandR_M16&)m32(ref);
	}

	const OperandREG32 &CodeGenerator::r32(const OperandREF &ref, bool copy)
	{
		if(ref == 0 && copy) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalEAX == ref) return access(eax);
		else if(physicalECX == ref) return access(ecx);
		else if(physicalEDX == ref) return access(edx);
		else if(physicalEBX == ref) return access(ebx);
		else if(physicalESI == ref) return access(esi);
		else if(physicalEDI == ref) return access(edi);

		// Search for free registers
		     if(physicalEAX == 0 && priorityEAX == 0) return assign(eax, ref, copy);
		else if(physicalECX == 0 && priorityECX == 0) return assign(ecx, ref, copy);
		else if(physicalEDX == 0 && priorityEDX == 0) return assign(edx, ref, copy);
		else if(physicalEBX == 0 && priorityEBX == 0) return assign(ebx, ref, copy);
		else if(physicalESI == 0 && priorityESI == 0) return assign(esi, ref, copy);
		else if(physicalEDI == 0 && priorityEDI == 0) return assign(edi, ref, copy);

		// Need to spill one
		Encoding::Reg candidate = Encoding::REG_UNKNOWN;
		unsigned int priority = 0xFFFFFFFF - 2;   // Don't spill most recently used

		if(priorityEAX < priority && real(physicalEAX)) {priority = priorityEAX; candidate = Encoding::EAX;}
		if(priorityECX < priority && real(physicalECX)) {priority = priorityECX; candidate = Encoding::ECX;}
		if(priorityEDX < priority && real(physicalEDX)) {priority = priorityEDX; candidate = Encoding::EDX;}
		if(priorityEBX < priority && real(physicalEBX)) {priority = priorityEBX; candidate = Encoding::EBX;}
		if(priorityESI < priority && real(physicalESI)) {priority = priorityESI; candidate = Encoding::ESI;}
		if(priorityEDI < priority && real(physicalEDI)) {priority = priorityEDI; candidate = Encoding::EDI;}

		switch(candidate)
		{
		case Encoding::REG_UNKNOWN: throw Error("Out of physical general purpose registers. Use free().");
		case Encoding::EAX: spill(eax); return assign(eax, ref, copy);
		case Encoding::ECX: spill(ecx); return assign(ecx, ref, copy);
		case Encoding::EDX: spill(edx); return assign(edx, ref, copy);
		case Encoding::EBX: spill(ebx); return assign(ebx, ref, copy);
		case Encoding::ESI: spill(esi); return assign(esi, ref, copy);
		case Encoding::EDI: spill(edi); return assign(edi, ref, copy);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandREG32 &CodeGenerator::x32(const OperandREF &ref, bool copy)
	{
		free(ref);
		return r32(ref, copy);
	}

	const OperandREG32 &CodeGenerator::t32(int i)
	{
		if(i < 0 || i >= 6) throw Error("Register allocator t32 index out of range");

		return x32(i);
	}

	const OperandR_M32 CodeGenerator::m32(const OperandREF &ref)
	{
		if(ref == 0) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalEAX == ref) return access(eax);
		else if(physicalECX == ref) return access(ecx);
		else if(physicalEDX == ref) return access(edx);
		else if(physicalEBX == ref) return access(ebx);
		else if(physicalESI == ref) return access(esi);
		else if(physicalEDI == ref) return access(edi);

		return dword_ptr [ref];
	}

	const OperandREG32 &CodeGenerator::allocate(const OperandREG32 &reg, const OperandREF &ref, bool copy)
	{
		return assign(reg, ref, copy);
	}

	const OperandREG32 &CodeGenerator::assign(const OperandREG32 &reg, const OperandREF &ref, bool copy)
	{
		switch(reg.reg)
		{
		case Encoding::EAX:
			if(physicalEAX != 0) throw Error("eax not available for register allocation");
			physicalEAX = ref;
			if(copy && real(ref)) mov(eax, dword_ptr [ref]);
			return access(eax);
		case Encoding::ECX:
			if(physicalECX != 0) throw Error("ecx not available for register allocation");
			physicalECX = ref;
			if(copy && real(ref)) mov(ecx, dword_ptr [ref]);
			return access(ecx);
		case Encoding::EDX:
			if(physicalEDX != 0) throw Error("edx not available for register allocation");
			physicalEDX = ref;
			if(copy && real(ref)) mov(edx, dword_ptr [ref]);
			return access(edx);
		case Encoding::EBX:
			if(physicalEBX != 0) throw Error("ebx not available for register allocation");
			physicalEBX = ref;
			if(copy && real(ref)) mov(ebx, dword_ptr [ref]);
			return access(ebx);
		case Encoding::ESI:
			if(physicalESI != 0) throw Error("esi not available for register allocation");
			physicalESI = ref;
			if(copy && real(ref)) mov(esi, dword_ptr [ref]);
			return access(esi);
		case Encoding::EDI:
			if(physicalEDI != 0) throw Error("edi not available for register allocation");
			physicalEDI = ref;
			if(copy && real(ref)) mov(edi, dword_ptr [ref]);
			return access(edi);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandREG32 &CodeGenerator::access(const OperandREG32 &reg)
	{
		// Decrease priority of other registers
		if(reg.reg != Encoding::EAX && priorityEAX) priorityEAX--;
		if(reg.reg != Encoding::ECX && priorityECX) priorityECX--;
		if(reg.reg != Encoding::EDX && priorityEDX) priorityEDX--;
		if(reg.reg != Encoding::EBX && priorityEBX) priorityEBX--;
		if(reg.reg != Encoding::ESI && priorityESI) priorityESI--;
		if(reg.reg != Encoding::EDI && priorityEDI) priorityEDI--;

		// Give highest priority
		     if(reg.reg == Encoding::EAX) priorityEAX = 0xFFFFFFFF;
		else if(reg.reg == Encoding::ECX) priorityECX = 0xFFFFFFFF;
		else if(reg.reg == Encoding::EDX) priorityEDX = 0xFFFFFFFF;
		else if(reg.reg == Encoding::EBX) priorityEBX = 0xFFFFFFFF;
		else if(reg.reg == Encoding::ESI) priorityESI = 0xFFFFFFFF;
		else if(reg.reg == Encoding::EDI) priorityEDI = 0xFFFFFFFF;

		return reg;
	}

	void CodeGenerator::free(const OperandREG32 &reg)
	{
		switch(reg.reg)
		{
		case Encoding::EAX: physicalEAX = 0; priorityEAX = 0; break;
		case Encoding::ECX: physicalECX = 0; priorityECX = 0; break;
		case Encoding::EDX: physicalEDX = 0; priorityEDX = 0; break;
		case Encoding::EBX: physicalEBX = 0; priorityEBX = 0; break;
		case Encoding::ESI: physicalESI = 0; priorityESI = 0; break;
		case Encoding::EDI: physicalEDI = 0; priorityEDI = 0; break;
		default: throw INTERNAL_ERROR;
		}
	}

	void CodeGenerator::spill(const OperandREG32 &reg)
	{
		switch(reg.reg)
		{
		case Encoding::EAX: if(real(physicalEAX)) mov(dword_ptr [physicalEAX], eax); break;
		case Encoding::ECX: if(real(physicalECX)) mov(dword_ptr [physicalECX], ecx); break;
		case Encoding::EDX: if(real(physicalEDX)) mov(dword_ptr [physicalEDX], edx); break;
		case Encoding::EBX: if(real(physicalEBX)) mov(dword_ptr [physicalEBX], ebx); break;
		case Encoding::ESI: if(real(physicalESI)) mov(dword_ptr [physicalESI], esi); break;
		case Encoding::EDI: if(real(physicalEDI)) mov(dword_ptr [physicalEDI], edi); break;
		default: throw INTERNAL_ERROR;
		}

		free(reg);
	}

	const OperandMMREG &CodeGenerator::r64(const OperandREF &ref, bool copy)
	{
		if(ref == 0 && copy) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalMM0 == ref) return access(mm0);
		else if(physicalMM1 == ref) return access(mm1);
		else if(physicalMM2 == ref) return access(mm2);
		else if(physicalMM3 == ref) return access(mm3);
		else if(physicalMM4 == ref) return access(mm4);
		else if(physicalMM5 == ref) return access(mm5);
		else if(physicalMM6 == ref) return access(mm6);
		else if(physicalMM7 == ref) return access(mm7);

		// Search for free registers
		     if(physicalMM0 == 0 && priorityMM0 == 0) return assign(mm0, ref, copy);
		else if(physicalMM1 == 0 && priorityMM1 == 0) return assign(mm1, ref, copy);
		else if(physicalMM2 == 0 && priorityMM2 == 0) return assign(mm2, ref, copy);
		else if(physicalMM3 == 0 && priorityMM3 == 0) return assign(mm3, ref, copy);
		else if(physicalMM4 == 0 && priorityMM4 == 0) return assign(mm4, ref, copy);
		else if(physicalMM5 == 0 && priorityMM5 == 0) return assign(mm5, ref, copy);
		else if(physicalMM6 == 0 && priorityMM6 == 0) return assign(mm6, ref, copy);
		else if(physicalMM7 == 0 && priorityMM7 == 0) return assign(mm7, ref, copy);

		// Need to spill one
		Encoding::Reg candidate = Encoding::REG_UNKNOWN;
		unsigned int priority = 0xFFFFFFFF - 2;   // Don't spill most recently used

		if(priorityMM0 < priority && real(physicalMM0)) {priority = priorityMM0; candidate = Encoding::MM0;}
		if(priorityMM1 < priority && real(physicalMM1)) {priority = priorityMM1; candidate = Encoding::MM1;}
		if(priorityMM2 < priority && real(physicalMM2)) {priority = priorityMM2; candidate = Encoding::MM2;}
		if(priorityMM3 < priority && real(physicalMM3)) {priority = priorityMM3; candidate = Encoding::MM3;}
		if(priorityMM4 < priority && real(physicalMM4)) {priority = priorityMM4; candidate = Encoding::MM4;}
		if(priorityMM5 < priority && real(physicalMM5)) {priority = priorityMM5; candidate = Encoding::MM5;}
		if(priorityMM6 < priority && real(physicalMM6)) {priority = priorityMM6; candidate = Encoding::MM6;}
		if(priorityMM7 < priority && real(physicalMM7)) {priority = priorityMM7; candidate = Encoding::MM7;}

		switch(candidate)
		{
		case Encoding::REG_UNKNOWN: throw Error("Out of physical MMX registers. Use free().");
		case Encoding::MM0: spill(mm0); return assign(mm0, ref, copy);
		case Encoding::MM1: spill(mm1); return assign(mm1, ref, copy);
		case Encoding::MM2: spill(mm2); return assign(mm2, ref, copy);
		case Encoding::MM3: spill(mm3); return assign(mm3, ref, copy);
		case Encoding::MM4: spill(mm4); return assign(mm4, ref, copy);
		case Encoding::MM5: spill(mm5); return assign(mm5, ref, copy);
		case Encoding::MM6: spill(mm6); return assign(mm6, ref, copy);
		case Encoding::MM7: spill(mm7); return assign(mm7, ref, copy);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandMMREG &CodeGenerator::x64(const OperandREF &ref, bool copy)
	{
		free(ref);
		return r64(ref, copy);
	}

	const OperandMMREG &CodeGenerator::t64(int i)
	{
		if(i < 0 || i >= 8) throw Error("Register allocator t64 index out of range");

		return x64(i);
	}

	const OperandR_M64 CodeGenerator::m64(const OperandREF &ref)
	{
		if(ref == 0) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalMM0 == ref) return access(mm0);
		else if(physicalMM1 == ref) return access(mm1);
		else if(physicalMM2 == ref) return access(mm2);
		else if(physicalMM3 == ref) return access(mm3);
		else if(physicalMM4 == ref) return access(mm4);
		else if(physicalMM5 == ref) return access(mm5);
		else if(physicalMM6 == ref) return access(mm6);
		else if(physicalMM7 == ref) return access(mm7);

		return qword_ptr [ref];
	}

	const OperandMMREG &CodeGenerator::allocate(const OperandMMREG &reg, const OperandREF &ref, bool copy)
	{
		return assign(reg, ref, copy);
	}

	const OperandMMREG &CodeGenerator::assign(const OperandMMREG &reg, const OperandREF &ref, bool copy)
	{
		switch(reg.reg)
		{
		case Encoding::MM0:
			if(physicalMM0 != 0) throw Error("mm0 not available for register allocation");
			physicalMM0 = ref;
			if(copy && real(ref)) movq(mm0, qword_ptr [ref]);
			return access(mm0);
		case Encoding::MM1:
			if(physicalMM1 != 0) throw Error("mm1 not available for register allocation");
			physicalMM1 = ref;
			if(copy && real(ref)) movq(mm1, qword_ptr [ref]);
			return access(mm1);
		case Encoding::MM2:
			if(physicalMM2 != 0) throw Error("mm2 not available for register allocation");
			physicalMM2 = ref;
			if(copy && real(ref)) movq(mm2, qword_ptr [ref]);
			return access(mm2);
		case Encoding::MM3:
			if(physicalMM3 != 0) throw Error("mm3 not available for register allocation");
			physicalMM3 = ref;
			if(copy && real(ref)) movq(mm3, qword_ptr [ref]);
			return access(mm3);
		case Encoding::MM4:
			if(physicalMM4 != 0) throw Error("mm4 not available for register allocation");
			physicalMM4 = ref;
			if(copy && real(ref)) movq(mm4, qword_ptr [ref]);
			return access(mm4);
		case Encoding::MM5:
			if(physicalMM5 != 0) throw Error("mm5 not available for register allocation");
			physicalMM5 = ref;
			if(copy && real(ref)) movq(mm5, qword_ptr [ref]);
			return access(mm5);
		case Encoding::MM6:
			if(physicalMM6 != 0) throw Error("mm6 not available for register allocation");
			physicalMM6 = ref;
			if(copy && real(ref)) movq(mm6, qword_ptr [ref]);
			return access(mm6);
		case Encoding::MM7:
			if(physicalMM7 != 0) throw Error("mm7 not available for register allocation");
			physicalMM7 = ref;
			if(copy && real(ref)) movq(mm7, qword_ptr [ref]);
			return access(mm7);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandMMREG &CodeGenerator::access(const OperandMMREG &reg)
	{
		// Decrease priority of other registers
		if(reg.reg != Encoding::MM0 && priorityMM0) priorityMM0--;
		if(reg.reg != Encoding::MM1 && priorityMM1) priorityMM1--;
		if(reg.reg != Encoding::MM2 && priorityMM2) priorityMM2--;
		if(reg.reg != Encoding::MM3 && priorityMM3) priorityMM3--;
		if(reg.reg != Encoding::MM4 && priorityMM4) priorityMM4--;
		if(reg.reg != Encoding::MM5 && priorityMM5) priorityMM5--;
		if(reg.reg != Encoding::MM6 && priorityMM6) priorityMM6--;
		if(reg.reg != Encoding::MM7 && priorityMM7) priorityMM7--;

		// Give highest priority
		     if(reg.reg == Encoding::MM0) priorityMM0 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM1) priorityMM1 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM2) priorityMM2 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM3) priorityMM3 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM4) priorityMM4 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM5) priorityMM5 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM6) priorityMM6 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::MM7) priorityMM7 = 0xFFFFFFFF;

		return reg;
	}

	void CodeGenerator::free(const OperandMMREG &reg)
	{
		bool free = false;

		switch(reg.reg)
		{
		case Encoding::MM0: physicalMM0 = 0; free = (priorityMM0 != 0); priorityMM0 = 0; break;
		case Encoding::MM1: physicalMM1 = 0; free = (priorityMM1 != 0); priorityMM1 = 0; break;
		case Encoding::MM2: physicalMM2 = 0; free = (priorityMM2 != 0); priorityMM2 = 0; break;
		case Encoding::MM3: physicalMM3 = 0; free = (priorityMM3 != 0); priorityMM3 = 0; break;
		case Encoding::MM4: physicalMM4 = 0; free = (priorityMM4 != 0); priorityMM4 = 0; break;
		case Encoding::MM5: physicalMM5 = 0; free = (priorityMM5 != 0); priorityMM5 = 0; break;
		case Encoding::MM6: physicalMM6 = 0; free = (priorityMM6 != 0); priorityMM6 = 0; break;
		case Encoding::MM7: physicalMM7 = 0; free = (priorityMM7 != 0); priorityMM7 = 0; break;
		default: throw INTERNAL_ERROR;
		}

		if(emulateSSE && free)
		{
			if(!priorityMM0 &&
			   !priorityMM1 &&
			   !priorityMM2 &&
			   !priorityMM3 &&
			   !priorityMM4 &&
			   !priorityMM5 &&
			   !priorityMM6 &&
			   !priorityMM7)
			{
				emms();
			}
		}
	}

	void CodeGenerator::spill(const OperandMMREG &reg)
	{
		switch(reg.reg)
		{
		case Encoding::MM0: if(real(physicalMM0)) movq(qword_ptr [physicalMM0], mm0); break;
		case Encoding::MM1: if(real(physicalMM1)) movq(qword_ptr [physicalMM1], mm1); break;
		case Encoding::MM2: if(real(physicalMM2)) movq(qword_ptr [physicalMM2], mm2); break;
		case Encoding::MM3: if(real(physicalMM3)) movq(qword_ptr [physicalMM3], mm3); break;
		case Encoding::MM4: if(real(physicalMM4)) movq(qword_ptr [physicalMM4], mm4); break;
		case Encoding::MM5: if(real(physicalMM5)) movq(qword_ptr [physicalMM5], mm5); break;
		case Encoding::MM6: if(real(physicalMM6)) movq(qword_ptr [physicalMM6], mm6); break;
		case Encoding::MM7: if(real(physicalMM7)) movq(qword_ptr [physicalMM7], mm7); break;
		default: throw INTERNAL_ERROR;
		}

		free(reg);
	}

	const OperandXMMREG &CodeGenerator::r128(const OperandREF &ref, bool copy)
	{
		if(ref == 0 && copy) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalXMM0 == ref) return access(xmm0);
		else if(physicalXMM1 == ref) return access(xmm1);
		else if(physicalXMM2 == ref) return access(xmm2);
		else if(physicalXMM3 == ref) return access(xmm3);
		else if(physicalXMM4 == ref) return access(xmm4);
		else if(physicalXMM5 == ref) return access(xmm5);
		else if(physicalXMM6 == ref) return access(xmm6);
		else if(physicalXMM7 == ref) return access(xmm7);

		// Search for free registers
		     if(physicalXMM0 == 0 && priorityXMM0 == 0) return assign(xmm0, ref, copy);
		else if(physicalXMM1 == 0 && priorityXMM1 == 0) return assign(xmm1, ref, copy);
		else if(physicalXMM2 == 0 && priorityXMM2 == 0) return assign(xmm2, ref, copy);
		else if(physicalXMM3 == 0 && priorityXMM3 == 0) return assign(xmm3, ref, copy);
		else if(physicalXMM4 == 0 && priorityXMM4 == 0) return assign(xmm4, ref, copy);
		else if(physicalXMM5 == 0 && priorityXMM5 == 0) return assign(xmm5, ref, copy);
		else if(physicalXMM6 == 0 && priorityXMM6 == 0) return assign(xmm6, ref, copy);
		else if(physicalXMM7 == 0 && priorityXMM7 == 0) return assign(xmm7, ref, copy);

		// Need to spill one
		Encoding::Reg candidate = Encoding::REG_UNKNOWN;
		unsigned int priority = 0xFFFFFFFF - 2;   // Don't spill most recently used

		if(priorityXMM0 < priority && real(physicalXMM0)) {priority = priorityXMM0; candidate = Encoding::XMM0;}
		if(priorityXMM1 < priority && real(physicalXMM1)) {priority = priorityXMM1; candidate = Encoding::XMM1;}
		if(priorityXMM2 < priority && real(physicalXMM2)) {priority = priorityXMM2; candidate = Encoding::XMM2;}
		if(priorityXMM3 < priority && real(physicalXMM3)) {priority = priorityXMM3; candidate = Encoding::XMM3;}
		if(priorityXMM4 < priority && real(physicalXMM4)) {priority = priorityXMM4; candidate = Encoding::XMM4;}
		if(priorityXMM5 < priority && real(physicalXMM5)) {priority = priorityXMM5; candidate = Encoding::XMM5;}
		if(priorityXMM6 < priority && real(physicalXMM6)) {priority = priorityXMM6; candidate = Encoding::XMM6;}
		if(priorityXMM7 < priority && real(physicalXMM7)) {priority = priorityXMM7; candidate = Encoding::XMM7;}

		switch(candidate)
		{
		case Encoding::REG_UNKNOWN: throw Error("Out of physical SSE registers. Use free().");
		case Encoding::XMM0: spill(xmm0); return assign(xmm0, ref, copy);
		case Encoding::XMM1: spill(xmm1); return assign(xmm1, ref, copy);
		case Encoding::XMM2: spill(xmm2); return assign(xmm2, ref, copy);
		case Encoding::XMM3: spill(xmm3); return assign(xmm3, ref, copy);
		case Encoding::XMM4: spill(xmm4); return assign(xmm4, ref, copy);
		case Encoding::XMM5: spill(xmm5); return assign(xmm5, ref, copy);
		case Encoding::XMM6: spill(xmm6); return assign(xmm6, ref, copy);
		case Encoding::XMM7: spill(xmm7); return assign(xmm7, ref, copy);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandXMMREG &CodeGenerator::x128(const OperandREF &ref, bool copy)
	{
		free(ref);
		return r128(ref, copy);
	}

	const OperandXMMREG &CodeGenerator::t128(int i)
	{
		if(i < 0 || i >= 8) throw Error("Register allocator t128 index out of range");

		return x128(i);
	}

	const OperandR_M128 CodeGenerator::m128(const OperandREF &ref)
	{
		if(ref == 0) throw Error("Cannot dereference 0");

		// Check if already allocated
		     if(physicalXMM0 == ref) return access(xmm0);
		else if(physicalXMM1 == ref) return access(xmm1);
		else if(physicalXMM2 == ref) return access(xmm2);
		else if(physicalXMM3 == ref) return access(xmm3);
		else if(physicalXMM4 == ref) return access(xmm4);
		else if(physicalXMM5 == ref) return access(xmm5);
		else if(physicalXMM6 == ref) return access(xmm6);
		else if(physicalXMM7 == ref) return access(xmm7);

		return xword_ptr [ref];
	}

	const OperandXMMREG &CodeGenerator::allocate(const OperandXMMREG &reg, const OperandREF &ref, bool copy)
	{
		return assign(reg, ref, copy);
	}

	const OperandXMMREG &CodeGenerator::assign(const OperandXMMREG &reg, const OperandREF &ref, bool copy)
	{
		switch(reg.reg)
		{
		case Encoding::XMM0:
			if(physicalXMM0 != 0) throw Error("xmm0 not available for register allocation");
			physicalXMM0 = ref;
			if(copy && real(ref)) movaps(xmm0, xword_ptr [ref]);
			return access(xmm0);
		case Encoding::XMM1:
			if(physicalXMM1 != 0) throw Error("xmm1 not available for register allocation");
			physicalXMM1 = ref;
			if(copy && real(ref)) movaps(xmm1, xword_ptr [ref]);
			return access(xmm1);
		case Encoding::XMM2:
			if(physicalXMM2 != 0) throw Error("xmm2 not available for register allocation");
			physicalXMM2 = ref;
			if(copy && real(ref)) movaps(xmm2, xword_ptr [ref]);
			return access(xmm2);
		case Encoding::XMM3:
			if(physicalXMM3 != 0) throw Error("xmm3 not available for register allocation");
			physicalXMM3 = ref;
			if(copy && real(ref)) movaps(xmm3, xword_ptr [ref]);
			return access(xmm3);
		case Encoding::XMM4:
			if(physicalXMM4 != 0) throw Error("xmm4 not available for register allocation");
			physicalXMM4 = ref;
			if(copy && real(ref)) movaps(xmm4, xword_ptr [ref]);
			return access(xmm4);
		case Encoding::XMM5:
			if(physicalXMM5 != 0) throw Error("xmm5 not available for register allocation");
			physicalXMM5 = ref;
			if(copy && real(ref)) movaps(xmm5, xword_ptr [ref]);
			return access(xmm5);
		case Encoding::XMM6:
			if(physicalXMM6 != 0) throw Error("xmm6 not available for register allocation");
			physicalXMM6 = ref;
			if(copy && real(ref)) movaps(xmm6, xword_ptr [ref]);
			return access(xmm6);
		case Encoding::XMM7:
			if(physicalXMM7 != 0) throw Error("xmm7 not available for register allocation");
			physicalXMM7 = ref;
			if(copy && real(ref)) movaps(xmm7, xword_ptr [ref]);
			return access(xmm7);
		default: throw INTERNAL_ERROR;
		}
	}

	const OperandXMMREG &CodeGenerator::access(const OperandXMMREG &reg)
	{
		// Decrease priority of other registers
		if(reg.reg != Encoding::XMM0 && priorityXMM0) priorityXMM0--;
		if(reg.reg != Encoding::XMM1 && priorityXMM1) priorityXMM1--;
		if(reg.reg != Encoding::XMM2 && priorityXMM2) priorityXMM2--;
		if(reg.reg != Encoding::XMM3 && priorityXMM3) priorityXMM3--;
		if(reg.reg != Encoding::XMM4 && priorityXMM4) priorityXMM4--;
		if(reg.reg != Encoding::XMM5 && priorityXMM5) priorityXMM5--;
		if(reg.reg != Encoding::XMM6 && priorityXMM6) priorityXMM6--;
		if(reg.reg != Encoding::XMM7 && priorityXMM7) priorityXMM7--;

		// Give highest priority
		     if(reg.reg == Encoding::XMM0) priorityXMM0 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM1) priorityXMM1 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM2) priorityXMM2 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM3) priorityXMM3 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM4) priorityXMM4 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM5) priorityXMM5 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM6) priorityXMM6 = 0xFFFFFFFF;
		else if(reg.reg == Encoding::XMM7) priorityXMM7 = 0xFFFFFFFF;

		return reg;
	}

	void CodeGenerator::free(const OperandXMMREG &reg)
	{
		switch(reg.reg)
		{
		case Encoding::XMM0: physicalXMM0 = 0; priorityXMM0 = 0; break;
		case Encoding::XMM1: physicalXMM1 = 0; priorityXMM1 = 0; break;
		case Encoding::XMM2: physicalXMM2 = 0; priorityXMM2 = 0; break;
		case Encoding::XMM3: physicalXMM3 = 0; priorityXMM3 = 0; break;
		case Encoding::XMM4: physicalXMM4 = 0; priorityXMM4 = 0; break;
		case Encoding::XMM5: physicalXMM5 = 0; priorityXMM5 = 0; break;
		case Encoding::XMM6: physicalXMM6 = 0; priorityXMM6 = 0; break;
		case Encoding::XMM7: physicalXMM7 = 0; priorityXMM7 = 0; break;
		default: throw INTERNAL_ERROR;
		}
	}

	void CodeGenerator::spill(const OperandXMMREG &reg)
	{
		switch(reg.reg)
		{
		case Encoding::XMM0: if(real(physicalXMM0)) movaps(xword_ptr [physicalXMM0], xmm0); break;
		case Encoding::XMM1: if(real(physicalXMM1)) movaps(xword_ptr [physicalXMM1], xmm1); break;
		case Encoding::XMM2: if(real(physicalXMM2)) movaps(xword_ptr [physicalXMM2], xmm2); break;
		case Encoding::XMM3: if(real(physicalXMM3)) movaps(xword_ptr [physicalXMM3], xmm3); break;
		case Encoding::XMM4: if(real(physicalXMM4)) movaps(xword_ptr [physicalXMM4], xmm4); break;
		case Encoding::XMM5: if(real(physicalXMM5)) movaps(xword_ptr [physicalXMM5], xmm5); break;
		case Encoding::XMM6: if(real(physicalXMM6)) movaps(xword_ptr [physicalXMM6], xmm6); break;
		case Encoding::XMM7: if(real(physicalXMM7)) movaps(xword_ptr [physicalXMM7], xmm7); break;
		default: throw INTERNAL_ERROR;
		}

		free(reg);
	}

	bool CodeGenerator::real(const OperandREF &ref)
	{
		return ref.baseReg != Encoding::REG_UNKNOWN ||
		       ref.indexReg != Encoding::REG_UNKNOWN ||
		       ref.scale != 0 ||
		       ref.displacement >= 8;
	}

	void CodeGenerator::free(const OperandREF &ref)
	{
		     if(physicalEAX == ref) free(eax);
		else if(physicalECX == ref) free(ecx);
		else if(physicalEDX == ref) free(edx);
		else if(physicalEBX == ref) free(ebx);
		else if(physicalESI == ref) free(esi);
		else if(physicalEDI == ref) free(edi);

		else if(physicalMM0 == ref) free(mm0);
		else if(physicalMM1 == ref) free(mm1);
		else if(physicalMM2 == ref) free(mm2);
		else if(physicalMM3 == ref) free(mm3);
		else if(physicalMM4 == ref) free(mm4);
		else if(physicalMM5 == ref) free(mm5);
		else if(physicalMM6 == ref) free(mm6);
		else if(physicalMM7 == ref) free(mm7);
	}

	void CodeGenerator::spill(const OperandREF &ref)
	{
		     if(physicalEAX == ref) mov(dword_ptr [physicalEAX], eax);
		else if(physicalECX == ref) mov(dword_ptr [physicalECX], ecx);
		else if(physicalEDX == ref) mov(dword_ptr [physicalEDX], edx);
		else if(physicalEBX == ref) mov(dword_ptr [physicalEBX], ebx);
		else if(physicalESI == ref) mov(dword_ptr [physicalESI], esi);
		else if(physicalEDI == ref) mov(dword_ptr [physicalEDI], edi);

		else if(physicalMM0 == ref) movq(qword_ptr [physicalMM0], mm0);
		else if(physicalMM1 == ref) movq(qword_ptr [physicalMM1], mm1);
		else if(physicalMM2 == ref) movq(qword_ptr [physicalMM2], mm2);
		else if(physicalMM3 == ref) movq(qword_ptr [physicalMM3], mm3);
		else if(physicalMM4 == ref) movq(qword_ptr [physicalMM4], mm4);
		else if(physicalMM5 == ref) movq(qword_ptr [physicalMM5], mm5);
		else if(physicalMM6 == ref) movq(qword_ptr [physicalMM6], mm6);
		else if(physicalMM7 == ref) movq(qword_ptr [physicalMM7], mm7);

		free(ref);
	}

	void CodeGenerator::freeAll()
	{
		free(eax);
		free(ecx);
		free(edx);
		free(ebx);
		free(esi);
		free(edi);

		free(mm0);
		free(mm1);
		free(mm2);
		free(mm3);
		free(mm4);
		free(mm5);
		free(mm6);
		free(mm7);

		free(xmm0);
		free(xmm1);
		free(xmm2);
		free(xmm3);
		free(xmm4);
		free(xmm5);
		free(xmm6);
		free(xmm7);
	}

	void CodeGenerator::spillAll()
	{
		spill(eax);
		spill(ecx);
		spill(edx);
		spill(ebx);
		spill(esi);
		spill(edi);

		spill(mm0);
		spill(mm1);
		spill(mm2);
		spill(mm3);
		spill(mm4);
		spill(mm5);
		spill(mm6);
		spill(mm7);

		spill(xmm0);
		spill(xmm1);
		spill(xmm2);
		spill(xmm3);
		spill(xmm4);
		spill(xmm5);
		spill(xmm6);
		spill(xmm7);
	}

	void CodeGenerator::spillMMX()
	{
		spill(mm0);
		spill(mm1);
		spill(mm2);
		spill(mm3);
		spill(mm4);
		spill(mm5);
		spill(mm6);
		spill(mm7);
	}

	void CodeGenerator::spillMMXcept(const OperandMMREG &reg)
	{
		if(reg.reg != 0) spill(mm0);
		if(reg.reg != 1) spill(mm1);
		if(reg.reg != 2) spill(mm2);
		if(reg.reg != 3) spill(mm3);
		if(reg.reg != 4) spill(mm4);
		if(reg.reg != 5) spill(mm5);
		if(reg.reg != 6) spill(mm6);
		if(reg.reg != 7) spill(mm7);

		emms();
	}

	void CodeGenerator::enableEmulateSSE()
	{
		emulateSSE = true;
	}

	void CodeGenerator::disableEmulateSSE()
	{
		emulateSSE = false;
	}

	int CodeGenerator::addps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[i][0]]);
			fadd(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);

			fld(dword_ptr [&sse[i][1]]);
			fadd(dword_ptr [&sse[j][1]]);
			fstp(dword_ptr [&sse[i][1]]);

			fld(dword_ptr [&sse[i][2]]);
			fadd(dword_ptr [&sse[j][2]]);
			fstp(dword_ptr [&sse[i][2]]);

			fld(dword_ptr [&sse[i][3]]);
			fadd(dword_ptr [&sse[j][3]]);
			fstp(dword_ptr [&sse[i][3]]);

			return -1;
		}
		else
		{
			return Assembler::addps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::addps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fld(dword_ptr [&sse[i][0]]);
			fadd((OperandMEM32&)(mem128+0));
			fstp(dword_ptr [&sse[i][0]]);

			fld(dword_ptr [&sse[i][1]]);
			fadd((OperandMEM32&)(mem128+4));
			fstp(dword_ptr [&sse[i][1]]);

			fld(dword_ptr [&sse[i][2]]);
			fadd((OperandMEM32&)(mem128+8));
			fstp(dword_ptr [&sse[i][2]]);

			fld(dword_ptr [&sse[i][3]]);
			fadd((OperandMEM32&)(mem128+12));
			fstp(dword_ptr [&sse[i][3]]);
			
			return -1;
		}
		else
		{
			return Assembler::addps(xmm, mem128);
		}
	}
	
	int CodeGenerator::addps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return addps(xmm, (OperandXMMREG&)r_m128);
			else                               return addps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::addps(xmm, r_m128);
		}
	}

	int CodeGenerator::addss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fadd(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::addss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::addss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fadd((OperandMEM32&)mem32);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::addss(xmm, mem32);
		}
	}
	
	int CodeGenerator::addss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return addss(xmm, (OperandXMMREG&)r_m32);
			else                              return addss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::addss(xmm, r_m32);
		}
	}

	int CodeGenerator::andnps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			not(dword_ptr [&sse[i][0]]);
			and(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			not(dword_ptr [&sse[i][1]]);
			and(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			not(dword_ptr [&sse[i][2]]);
			and(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			not(dword_ptr [&sse[i][3]]);
			and(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::andnps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::andnps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(mem128+0));
			not(dword_ptr [&sse[i][0]]);
			and(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+4));
			not(dword_ptr [&sse[i][1]]);
			and(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+8));
			not(dword_ptr [&sse[i][2]]);
			and(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+12));
			not(dword_ptr [&sse[i][3]]);
			and(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::andnps(xmm, mem128);
		}
	}
	
	int CodeGenerator::andnps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return andnps(xmm, (OperandXMMREG&)r_m128);
			else                               return andnps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::andnps(xmm, r_m128);
		}
	}

	int CodeGenerator::andps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			and(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			and(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			and(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			and(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::andps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::andps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(mem128+0));
			and(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+4));
			and(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+8));
			and(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+12));
			and(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::andps(xmm, mem128);
		}
	}
	
	int CodeGenerator::andps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return andps(xmm, (OperandXMMREG&)r_m128);
			else                               return andps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::andps(xmm, r_m128);
		}
	}

	int CodeGenerator::cmpps(OperandXMMREG xmmi, OperandXMMREG xmmj, char c)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float zero = 0;
			static float one = 1;
			fld(dword_ptr [&zero]);		// st2
			fld(dword_ptr [&one]);		// st1

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][0]]);

			fld(dword_ptr [&sse[j][1]]);
			fld(dword_ptr [&sse[i][1]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][1]]);

			fld(dword_ptr [&sse[j][2]]);
			fld(dword_ptr [&sse[i][2]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][2]]);

			fld(dword_ptr [&sse[j][3]]);
			fld(dword_ptr [&sse[i][3]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][3]]);

			ffree(st0);
			ffree(st1);

			return -1;
		}
		else
		{
			return Assembler::cmpps(xmmi, xmmj, c);
		}
	}

	int CodeGenerator::cmpps(OperandXMMREG xmm, OperandMEM128 mem128, char c)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float zero = 0;
			static float one = 1;
			fld(dword_ptr [&zero]);		// st2
			fld(dword_ptr [&one]);		// st1

			fld((OperandMEM32&)(mem128+0));
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][0]]);

			fld((OperandMEM32&)(mem128+4));
			fld(dword_ptr [&sse[i][1]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][1]]);

			fld((OperandMEM32&)(mem128+8));
			fld(dword_ptr [&sse[i][2]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][2]]);

			fld((OperandMEM32&)(mem128+12));
			fld(dword_ptr [&sse[i][3]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][3]]);

			ffree(st0);
			ffree(st1);

			return -1;
		}
		else
		{
			return Assembler::cmpps(xmm, mem128, c);
		}
	}

	int CodeGenerator::cmpps(OperandXMMREG xmm, OperandR_M128 r_m128, char c)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return cmpps(xmm, (OperandXMMREG&)r_m128, c);
			else                               return cmpps(xmm, (OperandMEM128&)r_m128, c);
		}
		else
		{
			return Assembler::cmpps(xmm, r_m128, c);
		}
	}

	int CodeGenerator::cmpeqps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 0);
	}

	int CodeGenerator::cmpeqps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 0);
	}

	int CodeGenerator::cmpeqps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 0);
	}

	int CodeGenerator::cmpleps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 2);
	}

	int CodeGenerator::cmpleps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 2);
	}

	int CodeGenerator::cmpleps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 2);
	}

	int CodeGenerator::cmpltps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 1);
	}

	int CodeGenerator::cmpltps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 1);
	}

	int CodeGenerator::cmpltps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 1);
	}

	int CodeGenerator::cmpneqps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 4);
	}

	int CodeGenerator::cmpneqps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 4);
	}

	int CodeGenerator::cmpneqps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 4);
	}

	int CodeGenerator::cmpnleps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 6);
	}

	int CodeGenerator::cmpnleps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 6);
	}

	int CodeGenerator::cmpnleps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 6);
	}

	int CodeGenerator::cmpnltps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 5);
	}

	int CodeGenerator::cmpnltps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 5);
	}

	int CodeGenerator::cmpnltps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 5);
	}

	int CodeGenerator::cmpordps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 7);
	}

	int CodeGenerator::cmpordps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 7);
	}

	int CodeGenerator::cmpordps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 7);
	}

	int CodeGenerator::cmpunordps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpps(xmmi, xmmj, 3);
	}

	int CodeGenerator::cmpunordps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		return cmpps(xmm, mem128, 3);
	}

	int CodeGenerator::cmpunordps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		return cmpps(xmm, r_m128, 3);
	}

	int CodeGenerator::cmpss(OperandXMMREG xmmi, OperandXMMREG xmmj, char c)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float zero = 0;
			static float one = 1;
			fld(dword_ptr [&zero]);		// st2
			fld(dword_ptr [&one]);		// st1

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][0]]);

			ffree(st0);
			ffree(st1);

			return -1;
		}
		else
		{
			return Assembler::cmpss(xmmi, xmmj, c);
		}
	}

	int CodeGenerator::cmpss(OperandXMMREG xmm, OperandMEM32 mem32, char c)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float zero = 0;
			static float one = 1;
			fld(dword_ptr [&zero]);		// st2
			fld(dword_ptr [&one]);		// st1

			fld((OperandMEM32&)(mem32+0));
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			switch(c)
			{
			case 0:   // CMPEQPS
				fcmove(st1);
				fcmovne(st2);
				break;
			case 1:   // CMPLTPS
				fcmovb(st1);
				fcmovnb(st2);
				break;
			case 2:   // CMPLEPS
				fcmovbe(st1);
				fcmovnbe(st2);
				break;
			case 3:   // CMPUNORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			case 4:   // CMPNEQPS
				fcmovne(st1);
				fcmove(st2);
				break;
			case 5:   // CMPNLTPS
				fcmovnb(st1);
				fcmovb(st2);
				break;
			case 6:   // CMPNLEPS
				fcmovnbe(st1);
				fcmovbe(st2);
				break;
			case 7:   // CMPORDPS
				fcmovnu(st1);
				fcmovu(st2);
				break;
			default:
				throw INTERNAL_ERROR;
			}
			fstp(dword_ptr [&sse[i][0]]);

			ffree(st0);
			ffree(st1);

			return -1;
		}
		else
		{
			return Assembler::cmpss(xmm, mem32, c);
		}
	}

	int CodeGenerator::cmpss(OperandXMMREG xmm, OperandXMM32 xmm32, char c)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return cmpss(xmm, (OperandXMMREG&)xmm32, c);
			else                              return cmpss(xmm, (OperandMEM32&)xmm32, c);
		}
		else
		{
			return Assembler::cmpss(xmm, xmm32, c);
		}
	}

	int CodeGenerator::cmpeqss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 0);
	}

	int CodeGenerator::cmpeqss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 0);
	}

	int CodeGenerator::cmpeqss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 0);
	}

	int CodeGenerator::cmpless(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 2);
	}

	int CodeGenerator::cmpless(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 2);
	}

	int CodeGenerator::cmpless(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 2);
	}

	int CodeGenerator::cmpltss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 1);
	}

	int CodeGenerator::cmpltss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 1);
	}

	int CodeGenerator::cmpltss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 1);
	}

	int CodeGenerator::cmpneqss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 4);
	}

	int CodeGenerator::cmpneqss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 4);
	}

	int CodeGenerator::cmpneqss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 4);
	}

	int CodeGenerator::cmpnless(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 6);
	}

	int CodeGenerator::cmpnless(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 6);
	}

	int CodeGenerator::cmpnless(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 6);
	}

	int CodeGenerator::cmpnltss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 5);
	}

	int CodeGenerator::cmpnltss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 5);
	}

	int CodeGenerator::cmpnltss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 5);
	}

	int CodeGenerator::cmpordss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 7);
	}

	int CodeGenerator::cmpordss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 7);
	}

	int CodeGenerator::cmpordss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 7);
	}

	int CodeGenerator::cmpunordss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		return cmpss(xmmi, xmmj, 3);
	}

	int CodeGenerator::cmpunordss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		return cmpss(xmm, mem32, 3);
	}

	int CodeGenerator::cmpunordss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		return cmpss(xmm, xmm32, 3);
	}

	int CodeGenerator::comiss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::comiss(xmmi, xmmj);
		}
	}

	int CodeGenerator::comiss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			
			fld(mem32);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::comiss(xmm, mem32);
		}
	}

	int CodeGenerator::comiss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return comiss(xmm, (OperandXMMREG&)xmm32);
			else                              return comiss(xmm, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::comiss(xmm, xmm32);
		}
	}

	int CodeGenerator::cvtpi2ps(OperandXMMREG xmm, OperandMMREG mm)
	{
		if(emulateSSE)
		{
			static int dword[2];
			movq(qword_ptr [dword], mm);
			const int i = xmm.reg;
			spillMMX();

			fild(dword_ptr [&dword[0]]);
			fstp(dword_ptr [&sse[i][0]]);
			fild(dword_ptr [&dword[1]]);
			fstp(dword_ptr [&sse[i][1]]);

			return -1;
		}
		else
		{
			return Assembler::cvtpi2ps(xmm, mm);
		}
	}

	int CodeGenerator::cvtpi2ps(OperandXMMREG xmm, OperandMEM64 mem64)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fild((OperandMEM32&)(mem64+0));
			fstp(dword_ptr [&sse[i][0]]);
			fild((OperandMEM32&)(mem64+4));
			fstp(dword_ptr [&sse[i][1]]);

			return -1;
		}
		else
		{
			return Assembler::cvtpi2ps(xmm, mem64);
		}
	}

	int CodeGenerator::cvtpi2ps(OperandXMMREG xmm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			if(r_m64.type == Operand::MMREG) return cvtpi2ps(xmm, (OperandMMREG&)r_m64);
			else                             return cvtpi2ps(xmm, (OperandMEM64&)r_m64);
		}
		else
		{
			return Assembler::cvtpi2ps(xmm, r_m64);
		}
	}

	int CodeGenerator::cvtps2pi(OperandMMREG mm, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int dword[2];

			spillMMXcept(mm);
			const int i = xmm.reg;
		//	short fpuCW1;
		//	short fpuCW2;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	and(word_ptr [&fpuCW2], (short)0xF3FF);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(dword_ptr [&sse[i][0]]);
			fistp(dword_ptr [&dword[0]]);
			fld(dword_ptr [&sse[i][1]]);
			fistp(dword_ptr [&dword[1]]);

		//	fstcw(word_ptr [&fpuCW1]);
			movq(mm, qword_ptr [dword]);

			return -1;
		}
		else
		{
			return Assembler::cvtps2pi(mm, xmm);
		}
	}

	int CodeGenerator::cvtps2pi(OperandMMREG mm, OperandMEM64 mem64)
	{
		if(emulateSSE)
		{
			static int dword[2];

			spillMMXcept(mm);
		//	short fpuCW1;
		//	short fpuCW2;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	and(word_ptr [&fpuCW2], (short)0xF3FF);
		//	fstcw(word_ptr [&fpuCW2]);

			fld((OperandMEM32&)(mem64+0));
			fistp(dword_ptr [&dword[0]]);
			fld((OperandMEM32&)(mem64+4));
			fistp(dword_ptr [&dword[1]]);

		//	fstcw(word_ptr [&fpuCW1]);
			movq(mm, qword_ptr [dword]);

			return -1;
		}
		else
		{
			return Assembler::cvtps2pi(mm, mem64);
		}
	}

	int CodeGenerator::cvtps2pi(OperandMMREG mm, OperandXMM64 xmm64)
	{
		if(emulateSSE)
		{
			if(xmm64.type == Operand::XMMREG) return cvtps2pi(mm, (OperandXMMREG&)xmm64);
			else                              return cvtps2pi(mm, (OperandMEM64&)xmm64);
		}
		else
		{
			return Assembler::cvtps2pi(mm, xmm64);
		}
	}

	int CodeGenerator::cvttps2pi(OperandMMREG mm, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int dword[2];
			spillMMXcept(mm);
			const int i = xmm.reg;
		//	short fpuCW1;
		//	short fpuCW2;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	or(word_ptr [&fpuCW2], (short)0x0C00);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(dword_ptr [&sse[i][0]]);
			fistp(dword_ptr [&dword[0]]);
			fld(dword_ptr [&sse[i][1]]);
			fistp(dword_ptr [&dword[1]]);

		//	fstcw(word_ptr [&fpuCW1]);
			movq(mm, qword_ptr [dword]);

			return -1;
		}
		else
		{
			return Assembler::cvttps2pi(mm, xmm);
		}
	}

	int CodeGenerator::cvttps2pi(OperandMMREG mm, OperandMEM64 mem64)
	{
		if(emulateSSE)
		{
			static int dword[2];

			spillMMXcept(mm);
		//	short fpuCW1;
		//	short fpuCW2;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	or(word_ptr [&fpuCW2], (short)0x0C00);
		//	fstcw(word_ptr [&fpuCW2]);

			fld((OperandMEM32&)(mem64+0));
			fistp(dword_ptr [&dword[0]]);
			fld((OperandMEM32&)(mem64+4));
			fistp(dword_ptr [&dword[1]]);

		//	fstcw(word_ptr [&fpuCW1]);
			movq(mm, qword_ptr [dword]);

			return -1;
		}
		else
		{
			return Assembler::cvttps2pi(mm, mem64);
		}
	}

	int CodeGenerator::cvttps2pi(OperandMMREG mm, OperandXMM64 xmm64)
	{
		if(emulateSSE)
		{
			if(xmm64.type == Operand::XMMREG) return cvttps2pi(mm, (OperandXMMREG&)xmm64);
			else                              return cvttps2pi(mm, (OperandMEM64&)xmm64);
		}
		else
		{
			return Assembler::cvttps2pi(mm, xmm64);
		}
	}

	int CodeGenerator::cvtsi2ss(OperandXMMREG xmm, OperandREG32 reg32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static int dword;

			mov(dword_ptr [&dword], reg32);
			fild(dword_ptr [&dword]);
			fstp(dword_ptr [&sse[i][0]]);

			return -1;
		}
		else
		{
			return Assembler::cvtsi2ss(xmm, reg32);
		}
	}

	int CodeGenerator::cvtsi2ss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fild(mem32);
			fstp(dword_ptr [&sse[i][0]]);

			return -1;
		}
		else
		{
			return Assembler::cvtsi2ss(xmm, mem32);
		}
	}

	int CodeGenerator::cvtsi2ss(OperandXMMREG xmm, OperandR_M32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::REG32) return cvtsi2ss(xmm, (OperandREG32&)r_m32);
			else                             return cvtsi2ss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::cvtsi2ss(xmm, r_m32);
		}
	}

	int CodeGenerator::cvtss2si(OperandREG32 reg32, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
		//	short fpuCW1;
		//	short fpuCW2;
			static int dword;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	and(word_ptr [&fpuCW2], (short)0xF3FF);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(dword_ptr [&sse[i][0]]);
			fistp(dword_ptr [&dword]);
			mov(reg32, dword_ptr [&dword]);

		//	fstcw(word_ptr [&fpuCW1]);

			return -1;
		}
		else
		{
			return Assembler::cvtss2si(reg32, xmm);
		}
	}

	int CodeGenerator::cvtss2si(OperandREG32 reg32, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
		//	short fpuCW1;
		//	short fpuCW2;
			static int dword;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	and(word_ptr [&fpuCW2], (short)0xF3FF);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(mem32);
			fistp(dword_ptr [&dword]);
			mov(reg32, dword_ptr [&dword]);

		//	fstcw(word_ptr [&fpuCW1]);

			return -1;
		}
		else
		{
			return Assembler::cvtss2si(reg32, mem32);
		}
	}

	int CodeGenerator::cvtss2si(OperandREG32 reg32, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return cvtss2si(reg32, (OperandXMMREG&)xmm32);
			else                              return cvtss2si(reg32, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::cvtss2si(reg32, xmm32);
		}
	}

	int CodeGenerator::cvttss2si(OperandREG32 reg32, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
		//	short fpuCW1;
		//	short fpuCW2;
			static int dword;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	or(word_ptr [&fpuCW2], (short)0x0C00);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(dword_ptr [&sse[i][0]]);
			fistp(dword_ptr [&dword]);
			mov(reg32, dword_ptr [&dword]);

		//	fstcw(word_ptr [&fpuCW1]);

			return -1;
		}
		else
		{
			return Assembler::cvttss2si(reg32, xmm);
		}
	}

	int CodeGenerator::cvttss2si(OperandREG32 reg32, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
		//	short fpuCW1;
		//	short fpuCW2;
			static int dword;

		//	fldcw(word_ptr [&fpuCW1]);
		//	fldcw(word_ptr [&fpuCW2]);
		//	or(word_ptr [&fpuCW2], (short)0x0C00);
		//	fstcw(word_ptr [&fpuCW2]);

			fld(mem32);
			fistp(dword_ptr [&dword]);
			mov(reg32, dword_ptr [&dword]);

		//	fstcw(word_ptr [&fpuCW1]);

			return -1;
		}
		else
		{
			return Assembler::cvttss2si(reg32, mem32);
		}
	}

	int CodeGenerator::cvttss2si(OperandREG32 reg32, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return cvttss2si(reg32, (OperandXMMREG&)xmm32);
			else                              return cvttss2si(reg32, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::cvttss2si(reg32, xmm32);
		}
	}

	int CodeGenerator::divps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fdiv(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fdiv(dword_ptr [&sse[j][1]]);
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fdiv(dword_ptr [&sse[j][2]]);
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fdiv(dword_ptr [&sse[j][3]]);
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::divps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::divps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fdiv((OperandMEM32&)(mem128+0));
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fdiv((OperandMEM32&)(mem128+4));
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fdiv((OperandMEM32&)(mem128+8));
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fdiv((OperandMEM32&)(mem128+12));
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::divps(xmm, mem128);
		}
	}
	
	int CodeGenerator::divps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return divps(xmm, (OperandXMMREG&)r_m128);
			else                               return divps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::divps(xmm, r_m128);
		}
	}

	int CodeGenerator::divss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fdiv(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::divss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::divss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fdiv((OperandMEM32&)mem32);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::divss(xmm, mem32);
		}
	}
	
	int CodeGenerator::divss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return divss(xmm, (OperandXMMREG&)r_m32);
			else                              return divss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::divss(xmm, r_m32);
		}
	}

	int CodeGenerator::ldmxcsr(OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return Assembler::ldmxcsr(mem32);
		}
	}

	int CodeGenerator::maskmovq(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			static __int64 qword1;
			static __int64 qword2;

			static int t0;

			movq(qword_ptr [&qword1], mmi);
			movq(qword_ptr [&qword2], mmj);

			test(byte_ptr [&qword2+0], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+0]);
			cmovnz(r32(&t0), dword_ptr [&qword1+0]);
			mov(byte_ptr [edi+0], r8(&t0));

			test(byte_ptr [&qword2+1], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+1]);
			cmovnz(r32(&t0), dword_ptr [&qword1+1]);
			mov(byte_ptr [edi+1], r8(&t0));

			test(byte_ptr [&qword2+2], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+2]);
			cmovnz(r32(&t0), dword_ptr [&qword1+2]);
			mov(byte_ptr [edi+2], r8(&t0));

			test(byte_ptr [&qword2+3], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+3]);
			cmovnz(r32(&t0), dword_ptr [&qword1+3]);
			mov(byte_ptr [edi+3], r8(&t0));

			test(byte_ptr [&qword2+4], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+4]);
			cmovnz(r32(&t0), dword_ptr [&qword1+4]);
			mov(byte_ptr [edi+4], r8(&t0));

			test(byte_ptr [&qword2+5], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+5]);
			cmovnz(r32(&t0), dword_ptr [&qword1+5]);
			mov(byte_ptr [edi+5], r8(&t0));

			test(byte_ptr [&qword2+6], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+6]);
			cmovnz(r32(&t0), dword_ptr [&qword1+6]);
			mov(byte_ptr [edi+6], r8(&t0));

			test(byte_ptr [&qword2+7], (unsigned char)0x80);
			mov(x8(&t0), byte_ptr [edi+7]);
			cmovnz(r32(&t0), dword_ptr [&qword1+7]);
			mov(byte_ptr [edi+7], r8(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::maskmovq(mmi, mmj);
		}
	}

	int CodeGenerator::maxps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][1]]);
			fld(dword_ptr [&sse[i][1]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][1]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][2]]);
			fld(dword_ptr [&sse[i][2]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][2]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][3]]);
			fld(dword_ptr [&sse[i][3]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][3]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::maxps(xmmi, xmmj);
		}
	}

	int CodeGenerator::maxps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fld((OperandMEM32&)(mem128+0));
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+4));
			fld(dword_ptr [&sse[i][1]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][1]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+8));
			fld(dword_ptr [&sse[i][2]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][2]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+0));
			fld(dword_ptr [&sse[i][3]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][3]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::maxps(xmm, mem128);
		}
	}

	int CodeGenerator::maxps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return maxps(xmm, (OperandXMMREG&)r_m128);
			else                               return maxps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::maxps(xmm, r_m128);
		}
	}

	int CodeGenerator::maxss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::maxss(xmmi, xmmj);
		}
	}

	int CodeGenerator::maxss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fld(mem32);
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::maxss(xmm, mem32);
		}
	}

	int CodeGenerator::maxss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return maxss(xmm, (OperandXMMREG&)xmm32);
			else                              return maxss(xmm, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::maxss(xmm, xmm32);
		}
	}

	int CodeGenerator::minps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][1]]);
			fld(dword_ptr [&sse[i][1]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][1]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][2]]);
			fld(dword_ptr [&sse[i][2]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][2]]);
			ffree(st0);

			fld(dword_ptr [&sse[j][3]]);
			fld(dword_ptr [&sse[i][3]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][3]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::minps(xmmi, xmmj);
		}
	}

	int CodeGenerator::minps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fld((OperandMEM32&)(mem128+0));
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+4));
			fld(dword_ptr [&sse[i][1]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][1]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+8));
			fld(dword_ptr [&sse[i][2]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][2]]);
			ffree(st0);

			fld((OperandMEM32&)(mem128+0));
			fld(dword_ptr [&sse[i][3]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][3]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::minps(xmm, mem128);
		}
	}

	int CodeGenerator::minps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return minps(xmm, (OperandXMMREG&)r_m128);
			else                               return minps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::minps(xmm, r_m128);
		}
	}

	int CodeGenerator::minss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::minss(xmmi, xmmj);
		}
	}

	int CodeGenerator::minss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;

			fld(mem32);
			fld(dword_ptr [&sse[i][0]]);
			fucomi(st0, st1);
			fcmovnb(st1);
			fstp(dword_ptr [&sse[i][0]]);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::minss(xmm, mem32);
		}
	}

	int CodeGenerator::minss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return minss(xmm, (OperandXMMREG&)xmm32);
			else                              return minss(xmm, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::minss(xmm, xmm32);
		}
	}

	int CodeGenerator::movaps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(xmmi == xmmj) return -1;
		
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movaps(xmmi, xmmj);
		}
	}

	int CodeGenerator::movaps(OperandXMMREG xmm, OperandMEM128 m128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(m128+0));
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m128+4));
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m128+8));
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m128+12));
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movaps(xmm, m128);
		}
	}

	int CodeGenerator::movaps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(xmm == r_m128) return -1;

		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return movaps(xmm, (OperandXMMREG&)r_m128);
			else                               return movaps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::movaps(xmm, r_m128);
		}
	}

	int CodeGenerator::movaps(OperandMEM128 m128, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov((OperandMEM32&)(m128+0), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov((OperandMEM32&)(m128+4), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			mov((OperandMEM32&)(m128+8), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			mov((OperandMEM32&)(m128+12), r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movaps(m128, xmm);
		}
	}

	int CodeGenerator::movaps(OperandR_M128 r_m128, OperandXMMREG xmm)
	{
		if(r_m128 == xmm) return -1;

		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return movaps((OperandXMMREG&)r_m128, xmm);
			else                               return movaps((OperandMEM128&)r_m128, xmm);
		}
		else
		{
			return Assembler::movaps(r_m128, xmm);
		}
	}

	int CodeGenerator::movhlps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movhlps(xmmi, xmmj);
		}
	}

	int CodeGenerator::movhps(OperandXMMREG xmm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(m64+0));
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m64+4));
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movhps(xmm, m64);
		}
	}

	int CodeGenerator::movhps(OperandMEM64 m64, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			mov((OperandMEM32&)(m64+0), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			mov((OperandMEM32&)(m64+4), r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movhps(m64, xmm);
		}
	}

	int CodeGenerator::movhps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(xmmi == xmmj) return -1;

		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movhps(xmmi, xmmj);
		}
	}

	int CodeGenerator::movlhps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movlhps(xmmi, xmmj);
		}
	}

	int CodeGenerator::movlps(OperandXMMREG xmm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(m64+0));
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m64+4));
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movlps(xmm, m64);
		}
	}

	int CodeGenerator::movlps(OperandMEM64 m64, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov((OperandMEM32&)(m64+0), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov((OperandMEM32&)(m64+4), r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movlps(m64, xmm);
		}
	}

	int CodeGenerator::movlps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(xmmi == xmmj) return -1;

		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movlps(xmmi, xmmj);
		}
	}

	int CodeGenerator::movmskps(OperandREG32 reg32, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			const int i = xmm.reg;

			static int t0;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			shr(r32(&t0), 31);
			mov(reg32, r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			shr(r32(&t0), 31);
			shl(r32(&t0), 1);
			or(reg32, r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			shr(r32(&t0), 31);
			shl(r32(&t0), 2);
			or(reg32, r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			shr(r32(&t0), 31);
			shl(r32(&t0), 3);
			or(reg32, r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movmskps(reg32, xmm);
		}
	}

	int CodeGenerator::movntps(OperandMEM128 m128, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov((OperandMEM32&)(m128+0), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov((OperandMEM32&)(m128+4), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			mov((OperandMEM32&)(m128+8), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			mov((OperandMEM32&)(m128+12), r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movntps(m128, xmm);
		}
	}

	int CodeGenerator::movntq(OperandMEM64 m64, OperandMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov((OperandMEM32&)(m64+0), r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov((OperandMEM32&)(m64+4), r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movntq(m64, xmm);
		}
	}

	int CodeGenerator::movq(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(mmi == mmj) return -1;
		
		return Assembler::movq(mmi, mmj);
	}

	int CodeGenerator::movq(OperandMMREG mm, OperandMEM64 mem64)
	{
		return Assembler::movq(mm, mem64);
	}

	int CodeGenerator::movq(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(mm == r_m64) return -1;

		return Assembler::movq(mm, r_m64);
	}

	int CodeGenerator::movq(OperandMEM64 mem64, OperandMMREG mm)
	{
		return Assembler::movq(mem64, mm);
	}

	int CodeGenerator::movq(OperandR_M64 r_m64, OperandMMREG mm)
	{
		if(r_m64 == mm) return -1;

		return Assembler::movq(r_m64, mm);
	}

	int CodeGenerator::movss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(xmmi == xmmj) return -1;

		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movss(xmmi, xmmj);
		}
	}

	int CodeGenerator::movss(OperandXMMREG xmm, OperandMEM32 m32)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), m32);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(dword_ptr [&sse[i][1]], 0);
			mov(dword_ptr [&sse[i][2]], 0);
			mov(dword_ptr [&sse[i][3]], 0);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movss(xmm, m32);
		}
	}

	int CodeGenerator::movss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return movss(xmm, (OperandXMMREG&)r_m32);
			else                              return movss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::movss(xmm, r_m32);
		}
	}

	int CodeGenerator::movss(OperandMEM32 m32, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov(m32, r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::movss(m32, xmm);
		}
	}

	int CodeGenerator::movss(OperandXMM32 r_m32, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return movss((OperandXMMREG&)r_m32, xmm);
			else                              return movss((OperandMEM32&)r_m32, xmm);
		}
		else
		{
			return Assembler::movss(r_m32, xmm);
		}
	}

	int CodeGenerator::movups(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			return movaps(xmmi, xmmj);
		}
		else
		{
			return Assembler::movups(xmmi, xmmj);
		}
	}

	int CodeGenerator::movups(OperandXMMREG xmm, OperandMEM128 m128)
	{
		if(emulateSSE)
		{
			return movaps(xmm, m128);
		}
		else
		{
			return Assembler::movups(xmm, m128);
		}
	}

	int CodeGenerator::movups(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			return movaps(xmm, r_m128);
		}
		else
		{
			return Assembler::movups(xmm, r_m128);
		}
	}

	int CodeGenerator::movups(OperandMEM128 m128, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			return movaps(m128, xmm);
		}
		else
		{
			return Assembler::movups(m128, xmm);
		}
	}

	int CodeGenerator::movups(OperandR_M128 r_m128, OperandXMMREG xmm)
	{
		if(emulateSSE)
		{
			return movaps(r_m128, xmm);
		}
		else
		{
			return Assembler::movups(r_m128, xmm);
		}
	}

	int CodeGenerator::mulps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fmul(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fmul(dword_ptr [&sse[j][1]]);
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fmul(dword_ptr [&sse[j][2]]);
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fmul(dword_ptr [&sse[j][3]]);
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::mulps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::mulps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fmul((OperandMEM32&)(mem128+0));
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fmul((OperandMEM32&)(mem128+4));
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fmul((OperandMEM32&)(mem128+8));
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fmul((OperandMEM32&)(mem128+12));
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::mulps(xmm, mem128);
		}
	}
	
	int CodeGenerator::mulps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return mulps(xmm, (OperandXMMREG&)r_m128);
			else                               return mulps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::mulps(xmm, r_m128);
		}
	}

	int CodeGenerator::mulss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fmul(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::mulss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::mulss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fmul((OperandMEM32&)mem32);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::mulss(xmm, mem32);
		}
	}
	
	int CodeGenerator::mulss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return mulss(xmm, (OperandXMMREG&)r_m32);
			else                              return mulss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::mulss(xmm, r_m32);
		}
	}

	int CodeGenerator::orps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			or(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			or(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			or(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			or(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::orps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::orps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(mem128+0));
			or(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+4));
			or(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+8));
			or(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+12));
			or(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::orps(xmm, mem128);
		}
	}
	
	int CodeGenerator::orps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return orps(xmm, (OperandXMMREG&)r_m128);
			else                               return orps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::orps(xmm, r_m128);
		}
	}

	int CodeGenerator::pavgb(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			static char byte1[8];
			static char byte2[8];

			movq(qword_ptr [byte1], mmi);
			movq(qword_ptr [byte2], mmj);

			static int t0;

			mov(x8(&t0), byte_ptr [&byte1[0]]);
			add(r8(&t0), byte_ptr [&byte2[0]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[0]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[1]]);
			add(r8(&t0), byte_ptr [&byte2[1]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[1]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[2]]);
			add(r8(&t0), byte_ptr [&byte2[2]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[2]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[3]]);
			add(r8(&t0), byte_ptr [&byte2[3]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[3]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[4]]);
			add(r8(&t0), byte_ptr [&byte2[4]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[4]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[5]]);
			add(r8(&t0), byte_ptr [&byte2[5]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[5]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[6]]);
			add(r8(&t0), byte_ptr [&byte2[6]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[6]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[7]]);
			add(r8(&t0), byte_ptr [&byte2[7]]);
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[7]], r8(&t0));

			movq(mmi, qword_ptr[byte1]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pavgb(mmi, mmj);
		}
	}

	int CodeGenerator::pavgb(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			static char byte1[8];

			movq(qword_ptr [byte1], mm);

			static int t0;

			mov(x8(&t0), byte_ptr [&byte1[0]]);
			add(r8(&t0), (OperandMEM8&)(m64+0));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[0]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[1]]);
			add(r8(&t0), (OperandMEM8&)(m64+1));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[1]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[2]]);
			add(r8(&t0), (OperandMEM8&)(m64+2));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[2]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[3]]);
			add(r8(&t0), (OperandMEM8&)(m64+3));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[3]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[4]]);
			add(r8(&t0), (OperandMEM8&)(m64+4));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[4]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[5]]);
			add(r8(&t0), (OperandMEM8&)(m64+5));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[5]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[6]]);
			add(r8(&t0), (OperandMEM8&)(m64+6));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[6]], r8(&t0));

			mov(x8(&t0), byte_ptr [&byte1[7]]);
			add(r8(&t0), (OperandMEM8&)(m64+7));
			shr(r32(&t0), 1);
			mov(byte_ptr [&byte1[7]], r8(&t0));

			movq(mm, qword_ptr [byte1]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pavgb(mm, m64);
		}
	}

	int CodeGenerator::pavgb(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			if(r_m64.type == Operand::MMREG) return pavgb(mm, (OperandMMREG&)r_m64);
			else                             return pavgb(mm, (OperandMEM64&)r_m64);
		}
		else
		{
			return Assembler::pavgb(mm, r_m64);
		}
	}

	int CodeGenerator::pavgw(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			static char word1[4];
			static char word2[4];

			movq(qword_ptr [word1], mmi);
			movq(qword_ptr [word2], mmj);

			static int t0;

			mov(x16(&t0), word_ptr [&word1[0]]);
			add(r16(&t0), word_ptr [&word2[0]]);
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[0]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[1]]);
			add(r16(&t0), word_ptr [&word2[1]]);
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[1]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[2]]);
			add(r16(&t0), word_ptr [&word2[2]]);
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[2]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[3]]);
			add(r16(&t0), word_ptr [&word2[3]]);
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[3]], r16(&t0));

			movq(mmi, qword_ptr [word1]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pavgw(mmi, mmj);
		}
	}

	int CodeGenerator::pavgw(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			static char word1[8];

			movq(qword_ptr [word1], mm);

			static int t0;

			mov(x16(&t0), word_ptr [&word1[0]]);
			add(r16(&t0), (OperandMEM16&)(m64+0));
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[0]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[1]]);
			add(r16(&t0), (OperandMEM16&)(m64+2));
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[1]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[2]]);
			add(r16(&t0), (OperandMEM16&)(m64+4));
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[2]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[3]]);
			add(r16(&t0), (OperandMEM16&)(m64+6));
			shr(r32(&t0), 1);
			mov(word_ptr [&word1[3]], r16(&t0));

			movq(mm, qword_ptr [word1]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pavgw(mm, m64);
		}
	}

	int CodeGenerator::pavgw(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			if(r_m64.type == Operand::MMREG) return pavgw(mm, (OperandMMREG&)r_m64);
			else                             return pavgw(mm, (OperandMEM64&)r_m64);
		}
		else
		{
			return Assembler::pavgw(mm, r_m64);
		}
	}

	int CodeGenerator::pextrw(OperandREG32 r32, OperandMMREG mm, unsigned char c)
	{
		if(emulateSSE)
		{
			static short word[4];

			movq(qword_ptr [word], mm);
			xor(r32, r32);
			mov((OperandREG16&)r32, word_ptr [&word[c & 0x03]]);

			return -1;
		}
		else
		{
			return Assembler::pextrw(r32, mm, c);
		}
	}

	int CodeGenerator::pinsrw(OperandMMREG mm, OperandREG16 r16, unsigned char c)
	{
		if(emulateSSE)
		{
			static short word[4];

			movq(qword_ptr [word], mm);
			mov(word_ptr [&word[c & 0x03]], r16);
			movq(mm, qword_ptr [word]);

			return -1;
		}
		else
		{
			return Assembler::pinsrw(mm, r16, c);
		}
	}

	int CodeGenerator::pinsrw(OperandMMREG mm, OperandMEM16 m16, unsigned char c)
	{
		if(emulateSSE)
		{
			static short word[4];
			static int t0;

			movq(qword_ptr [word], mm);
			mov(x16(&t0), m16);
			mov(word_ptr [&word[c & 0x03]], r16(&t0));
			movq(mm, qword_ptr [word]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pinsrw(mm, m16, c);
		}
	}

	int CodeGenerator::pinsrw(OperandMMREG mm, OperandR_M16 r_m16, unsigned char c)
	{
		if(emulateSSE)
		{
			if(r_m16.type == Operand::REG16) return pinsrw(mm, (OperandREG16&)r_m16, c);
			else                             return pinsrw(mm, (OperandMEM16&)r_m16, c);
		}
		else
		{
			return Assembler::pinsrw(mm, r_m16, c);
		}
	}

	int CodeGenerator::pmaxsw(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxsw(mmi, mmj);
		}
	}

	int CodeGenerator::pmaxsw(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxsw(mm, m64);
		}
	}

	int CodeGenerator::pmaxsw(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxsw(mm, r_m64);
		}
	}

	int CodeGenerator::pmaxub(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxub(mmi, mmj);
		}
	}

	int CodeGenerator::pmaxub(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxub(mm, m64);
		}
	}

	int CodeGenerator::pmaxub(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pmaxub(mm, r_m64);
		}
	}

	int CodeGenerator::pminsw(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminsw(mmi, mmj);
		}
	}

	int CodeGenerator::pminsw(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminsw(mm, m64);
		}
	}

	int CodeGenerator::pminsw(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminsw(mm, r_m64);
		}
	}

	int CodeGenerator::pminub(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminub(mmi, mmj);
		}
	}

	int CodeGenerator::pminub(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminub(mm, m64);
		}
	}

	int CodeGenerator::pminub(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			throw Error("Unimplemented SSE instruction emulation");
		}
		else
		{
			return Assembler::pminub(mm, r_m64);
		}
	}

	int CodeGenerator::pmulhuw(OperandMMREG mmi, OperandMMREG mmj)
	{
		if(emulateSSE)
		{
			static short word1[4];
			static short word2[4];

			movq(qword_ptr [word1], mmi);
			movq(qword_ptr [word2], mmj);
			push(eax);
			push(edx);

			mov(ax, word_ptr [&word1[0]]);
			mul(word_ptr [&word2[0]]);
			mov(word_ptr [&word1[0]], dx);

			mov(ax, word_ptr [&word1[1]]);
			mul(word_ptr [&word2[1]]);
			mov(word_ptr [&word1[1]], dx);

			mov(ax, word_ptr [&word1[2]]);
			mul(word_ptr [&word2[2]]);
			mov(word_ptr [&word1[2]], dx);

			mov(ax, word_ptr [&word1[3]]);
			mul(word_ptr [&word2[3]]);
			mov(word_ptr [&word1[3]], dx);

			pop(edx);
			pop(eax);
			movq(mmi, qword_ptr [word1]);

			return -1;
		}
		else
		{
			return Assembler::pmulhuw(mmi, mmj);
		}
	}

	int CodeGenerator::pmulhuw(OperandMMREG mm, OperandMEM64 m64)
	{
		if(emulateSSE)
		{
			static short word1[4];
			static short word2[4];

			movq(qword_ptr [word1], mm);
			movq(mm, m64);
			movq(qword_ptr [word2], mm);
			push(eax);
			push(edx);

			mov(ax, word_ptr [&word1[0]]);
			mul(word_ptr [&word2[0]]);
			mov(word_ptr [&word1[0]], dx);

			mov(ax, word_ptr [&word1[1]]);
			mul(word_ptr [&word2[1]]);
			mov(word_ptr [&word1[1]], dx);

			mov(ax, word_ptr [&word1[2]]);
			mul(word_ptr [&word2[2]]);
			mov(word_ptr [&word1[2]], dx);

			mov(ax, word_ptr [&word1[3]]);
			mul(word_ptr [&word2[3]]);
			mov(word_ptr [&word1[3]], dx);

			pop(edx);
			pop(eax);
			movq(mm, qword_ptr [word1]);

			return -1;
		}
		else
		{
			return Assembler::pmulhuw(mm, m64);
		}
	}

	int CodeGenerator::pmulhuw(OperandMMREG mm, OperandR_M64 r_m64)
	{
		if(emulateSSE)
		{
			if(r_m64.type == Operand::MMREG) return pmulhuw(mm, (OperandMMREG&)r_m64);
			else                             return pmulhuw(mm, (OperandMEM64&)r_m64);
		}
		else
		{
			return Assembler::pmulhuw(mm, r_m64);
		}
	}

	int CodeGenerator::prefetchnta(OperandMEM mem)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return prefetchnta(mem);
		}
	}

	int CodeGenerator::prefetcht0(OperandMEM mem)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return prefetcht0(mem);
		}
	}

	int CodeGenerator::prefetcht1(OperandMEM mem)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return prefetcht1(mem);
		}
	}

	int CodeGenerator::prefetcht2(OperandMEM mem)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return prefetcht2(mem);
		}
	}

	int CodeGenerator::pshufw(OperandMMREG mmi, OperandMMREG mmj, unsigned char c)
	{
		if(c == 0xE4)
		{
			if(mmi == mmj) return -1;
			else return movq(mmi, mmj);
		}

		if(emulateSSE)
		{
			static short word1[4];
			static short word2[4];
			static int t0;

			movq(qword_ptr [word1], mmj);

			mov(x16(&t0), word_ptr [&word1[(c >> 0) & 0x03]]);
			mov(word_ptr [&word2[0]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[(c >> 2) & 0x03]]);
			mov(word_ptr [&word2[1]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[(c >> 4) & 0x03]]);
			mov(word_ptr [&word2[2]], r16(&t0));

			mov(x16(&t0), word_ptr [&word1[(c >> 6) & 0x03]]);
			mov(word_ptr [&word2[3]], r16(&t0));

			movq(mmi, qword_ptr [word2]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pshufw(mmi, mmj, c);
		}
	}

	int CodeGenerator::pshufw(OperandMMREG mm, OperandMEM64 m64, unsigned char c)
	{
		if(emulateSSE)
		{
			static short word[4];
			static int t0;

			mov(x16(&t0), (OperandMEM16&)(m64+((c>>0)&0x03)*2));
			mov(word_ptr [&word[0]], r16(&t0));

			mov(x16(&t0), (OperandMEM16&)(m64+((c>>2)&0x03)*2));
			mov(word_ptr [&word[1]], r16(&t0));

			mov(x16(&t0), (OperandMEM16&)(m64+((c>>4)&0x03)*2));
			mov(word_ptr [&word[2]], r16(&t0));

			mov(x16(&t0), (OperandMEM16&)(m64+((c>>6)&0x03)*2));
			mov(word_ptr [&word[3]], r16(&t0));

			movq(mm, qword_ptr [word]);

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::pshufw(mm, m64, c);
		}
	}

	int CodeGenerator::pshufw(OperandMMREG mm, OperandR_M64 r_m64, unsigned char c)
	{
		if(emulateSSE)
		{
			if(r_m64.type == Operand::MMREG) return pshufw(mm, (OperandMMREG&)r_m64, c);
			else                             return pshufw(mm, (OperandMEM64&)r_m64, c);
		}
		else
		{
			return Assembler::pshufw(mm, r_m64, c);
		}
	}

	int CodeGenerator::rcpps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][1]]);
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][2]]);
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][3]]);
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::rcpps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::rcpps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+0));
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+4));
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+8));
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+12));
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::rcpps(xmm, mem128);
		}
	}
	
	int CodeGenerator::rcpps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return rcpps(xmm, (OperandXMMREG&)r_m128);
			else                               return rcpps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::rcpps(xmm, r_m128);
		}
	}

	int CodeGenerator::rcpss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::rcpss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::rcpss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)mem32);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::rcpss(xmm, mem32);
		}
	}
	
	int CodeGenerator::rcpss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return rcpss(xmm, (OperandXMMREG&)r_m32);
			else                              return rcpss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::rcpss(xmm, r_m32);
		}
	}

	int CodeGenerator::rsqrtps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][0]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][1]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][2]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][3]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::rsqrtps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::rsqrtps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+0));
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+4));
			fsqrt();
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+8));
			fsqrt();
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)(mem128+12));
			fsqrt();
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::rsqrtps(xmm, mem128);
		}
	}
	
	int CodeGenerator::rsqrtps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return rsqrtps(xmm, (OperandXMMREG&)r_m128);
			else                               return rsqrtps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::rsqrtps(xmm, r_m128);
		}
	}

	int CodeGenerator::rsqrtss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv(dword_ptr [&sse[j][0]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::rsqrtss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::rsqrtss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float one = 1.0f;
			fld(dword_ptr [&one]);
			fdiv((OperandMEM32&)mem32);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::rsqrtss(xmm, mem32);
		}
	}
	
	int CodeGenerator::rsqrtss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return rsqrtss(xmm, (OperandXMMREG&)r_m32);
			else                              return rsqrtss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::rsqrtss(xmm, r_m32);
		}
	}

	int CodeGenerator::sfence()
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return Assembler::sfence();
		}
	}

	int CodeGenerator::shufps(OperandXMMREG xmmi, OperandXMMREG xmmj, unsigned char c)
	{
		if(c == 0xE4)
		{
			if(xmmi == xmmj) return -1;
			else return movaps(xmmi, xmmj);
		}

		if(emulateSSE)
		{
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			static int t0;

			mov(x32(&t0), dword_ptr [&sse[i][(c >> 0) & 0x03]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][(c >> 2) & 0x03]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][(c >> 4) & 0x03]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][(c >> 6) & 0x03]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::shufps(xmmi, xmmj, c);
		}
	}

	int CodeGenerator::shufps(OperandXMMREG xmm, OperandMEM128 m128, unsigned char c)
	{
		if(emulateSSE)
		{
			const int i = xmm.reg;
			static int t0;

			mov(x32(&t0), dword_ptr [&sse[i][(c >> 0) & 0x03]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][(c >> 2) & 0x03]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m128+((c>>4)&0x03)*4));
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(m128+((c>>6)&0x03)*4));
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::shufps(xmm, m128, c);
		}
	}

	int CodeGenerator::shufps(OperandXMMREG xmm, OperandR_M128 r_m128, unsigned char c)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return shufps(xmm, (OperandXMMREG&)r_m128, c);
			else                               return shufps(xmm, (OperandMEM128&)r_m128, c);
		}
		else
		{
			return Assembler::shufps(xmm, r_m128, c);
		}
	}

	int CodeGenerator::sqrtps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[j][0]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[j][1]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[j][2]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[j][3]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::sqrtps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::sqrtps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld((OperandMEM32&)(mem128+0));
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			fld((OperandMEM32&)(mem128+4));
			fsqrt();
			fstp(dword_ptr [&sse[i][1]]);
			fld((OperandMEM32&)(mem128+8));
			fsqrt();
			fstp(dword_ptr [&sse[i][2]]);
			fld((OperandMEM32&)(mem128+12));
			fsqrt();
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::sqrtps(xmm, mem128);
		}
	}
	
	int CodeGenerator::sqrtps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return sqrtps(xmm, (OperandXMMREG&)r_m128);
			else                               return sqrtps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::sqrtps(xmm, r_m128);
		}
	}

	int CodeGenerator::sqrtss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[j][0]]);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::sqrtss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::sqrtss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			static float one = 1.0f;
			fld(mem32);
			fsqrt();
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::sqrtss(xmm, mem32);
		}
	}
	
	int CodeGenerator::sqrtss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return sqrtss(xmm, (OperandXMMREG&)r_m32);
			else                              return sqrtss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::sqrtss(xmm, r_m32);
		}
	}

	int CodeGenerator::stmxcsr(OperandMEM32 m32)
	{
		if(emulateSSE)
		{
			return -1;
		}
		else
		{
			return Assembler::stmxcsr(m32);
		}
	}

	int CodeGenerator::subps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fsub(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fsub(dword_ptr [&sse[j][1]]);
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fsub(dword_ptr [&sse[j][2]]);
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fsub(dword_ptr [&sse[j][3]]);
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::subps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::subps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fsub((OperandMEM32&)(mem128+0));
			fstp(dword_ptr [&sse[i][0]]);
			fld(dword_ptr [&sse[i][1]]);
			fsub((OperandMEM32&)(mem128+4));
			fstp(dword_ptr [&sse[i][1]]);
			fld(dword_ptr [&sse[i][2]]);
			fsub((OperandMEM32&)(mem128+8));
			fstp(dword_ptr [&sse[i][2]]);
			fld(dword_ptr [&sse[i][3]]);
			fsub((OperandMEM32&)(mem128+12));
			fstp(dword_ptr [&sse[i][3]]);
			return -1;
		}
		else
		{
			return Assembler::subps(xmm, mem128);
		}
	}
	
	int CodeGenerator::subps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return subps(xmm, (OperandXMMREG&)r_m128);
			else                               return subps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::subps(xmm, r_m128);
		}
	}

	int CodeGenerator::subss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;
			fld(dword_ptr [&sse[i][0]]);
			fsub(dword_ptr [&sse[j][0]]);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::subss(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::subss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			fld(dword_ptr [&sse[i][0]]);
			fsub((OperandMEM32&)mem32);
			fstp(dword_ptr [&sse[i][0]]);
			return -1;
		}
		else
		{
			return Assembler::subss(xmm, mem32);
		}
	}
	
	int CodeGenerator::subss(OperandXMMREG xmm, OperandXMM32 r_m32)
	{
		if(emulateSSE)
		{
			if(r_m32.type == Operand::XMMREG) return subss(xmm, (OperandXMMREG&)r_m32);
			else                              return subss(xmm, (OperandMEM32&)r_m32);
		}
		else
		{
			return Assembler::subss(xmm, r_m32);
		}
	}

	int CodeGenerator::ucomiss(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			fld(dword_ptr [&sse[j][0]]);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::ucomiss(xmmi, xmmj);
		}
	}

	int CodeGenerator::ucomiss(OperandXMMREG xmm, OperandMEM32 mem32)
	{
		if(emulateSSE)
		{
			spillMMX();
			const int i = xmm.reg;
			
			fld(mem32);
			fld(dword_ptr [&sse[i][0]]);
			fcomip(st0, st1);
			ffree(st0);

			return -1;
		}
		else
		{
			return Assembler::ucomiss(xmm, mem32);
		}
	}

	int CodeGenerator::ucomiss(OperandXMMREG xmm, OperandXMM32 xmm32)
	{
		if(emulateSSE)
		{
			if(xmm32.type == Operand::XMMREG) return ucomiss(xmm, (OperandXMMREG&)xmm32);
			else                              return ucomiss(xmm, (OperandMEM32&)xmm32);
		}
		else
		{
			return Assembler::ucomiss(xmm, xmm32);
		}
	}

	int CodeGenerator::unpckhps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::unpckhps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::unpckhps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][2]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][3]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+8));
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+12));
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::unpckhps(xmm, mem128);
		}
	}
	
	int CodeGenerator::unpckhps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return unpckhps(xmm, (OperandXMMREG&)r_m128);
			else                               return unpckhps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::unpckhps(xmm, r_m128);
		}
	}

	int CodeGenerator::unpcklps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::unpcklps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::unpcklps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), dword_ptr [&sse[i][0]]);
			mov(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[i][1]]);
			mov(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+0));
			mov(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+4));
			mov(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::unpcklps(xmm, mem128);
		}
	}
	
	int CodeGenerator::unpcklps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return unpcklps(xmm, (OperandXMMREG&)r_m128);
			else                               return unpcklps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::unpcklps(xmm, r_m128);
		}
	}

	int CodeGenerator::xorps(OperandXMMREG xmmi, OperandXMMREG xmmj)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmmi.reg;
			const int j = xmmj.reg;

			mov(x32(&t0), dword_ptr [&sse[j][0]]);
			xor(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][1]]);
			xor(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][2]]);
			xor(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), dword_ptr [&sse[j][3]]);
			xor(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::xorps(xmmi, xmmj);
		}
	}
	
	int CodeGenerator::xorps(OperandXMMREG xmm, OperandMEM128 mem128)
	{
		if(emulateSSE)
		{
			static int t0;
			const int i = xmm.reg;

			mov(x32(&t0), (OperandMEM32&)(mem128+0));
			xor(dword_ptr [&sse[i][0]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+4));
			xor(dword_ptr [&sse[i][1]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+8));
			xor(dword_ptr [&sse[i][2]], r32(&t0));

			mov(x32(&t0), (OperandMEM32&)(mem128+12));
			xor(dword_ptr [&sse[i][3]], r32(&t0));

			free(&t0);
			return -1;
		}
		else
		{
			return Assembler::xorps(xmm, mem128);
		}
	}
	
	int CodeGenerator::xorps(OperandXMMREG xmm, OperandR_M128 r_m128)
	{
		if(emulateSSE)
		{
			if(r_m128.type == Operand::XMMREG) return xorps(xmm, (OperandXMMREG&)r_m128);
			else                               return xorps(xmm, (OperandMEM128&)r_m128);
		}
		else
		{
			return Assembler::xorps(xmm, r_m128);
		}
	}

	void CodeGenerator::dumpSSE()
	{
		pushad();
		emms();

		static float sse[8][4];

		movups(xword_ptr [sse[0]], xmm0);
		movups(xword_ptr [sse[1]], xmm1);
		movups(xword_ptr [sse[2]], xmm2);
		movups(xword_ptr [sse[3]], xmm3);
		movups(xword_ptr [sse[4]], xmm4);
		movups(xword_ptr [sse[5]], xmm5);
		movups(xword_ptr [sse[6]], xmm6);
		movups(xword_ptr [sse[7]], xmm7);

		static FILE *file;
		static char *perm = "a";
		static char *name;

		if(emulateSSE)
		{
			name = "dumpEmulate.txt";
		}
		else
		{
			name = "dumpNative.txt";
		}

		mov(eax, dword_ptr [&perm]); 
		push(eax);
		mov(ecx, dword_ptr [&name]); 
		push(ecx);
		call((int)fopen);
		add(esp, 8);
		mov(dword_ptr [&file], eax);

		static char *string0 = "xmm0: %f, %f, %f, %f\n";
		static char *string1 = "xmm1: %f, %f, %f, %f\n";
		static char *string2 = "xmm2: %f, %f, %f, %f\n";
		static char *string3 = "xmm3: %f, %f, %f, %f\n";
		static char *string4 = "xmm4: %f, %f, %f, %f\n";
		static char *string5 = "xmm5: %f, %f, %f, %f\n";
		static char *string6 = "xmm6: %f, %f, %f, %f\n";
		static char *string7 = "xmm7: %f, %f, %f, %f\n";
		static char *newline = "\n";

		// fprintf(file, string0, sse[0][0], sse[0][1], sse[0][2], sse[0][3]);
		fld(dword_ptr [&sse[0][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[0][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[0][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[0][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string0]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string1, sse[1][0], sse[1][1], sse[1][2], sse[1][3]);
		fld(dword_ptr [&sse[1][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[1][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[1][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[1][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string1]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string2, sse[2][0], sse[2][1], sse[2][2], sse[2][3]);
		fld(dword_ptr [&sse[2][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[2][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[2][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[2][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string2]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string3, sse[3][0], sse[3][1], sse[3][2], sse[3][3]);
		fld(dword_ptr [&sse[3][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[3][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[3][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[3][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string3]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string4, sse[4][0], sse[4][1], sse[4][2], sse[4][3]);
		fld(dword_ptr [&sse[4][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[4][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[4][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[4][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string4]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string5, sse[5][0], sse[5][1], sse[5][2], sse[5][3]);
		fld(dword_ptr [&sse[5][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[5][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[5][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[5][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string5]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28);

		// fprintf(file, string6, sse[6][0], sse[6][1], sse[6][2], sse[6][3]);
		fld(dword_ptr [&sse[6][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[6][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[6][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[6][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string6]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, string7, sse[7][0], sse[7][1], sse[7][2], sse[7][3]);
		fld(dword_ptr [&sse[7][3]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[7][2]]);
		sub(esp, 8); 
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[7][1]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		fld(dword_ptr [&sse[7][0]]);
		sub(esp, 8);
		fstp(qword_ptr [esp]);
		mov(eax, dword_ptr [&string7]); 
		push(eax);
		mov(ecx, dword_ptr [&file]); 
		push(ecx);
		call((int)fprintf); 
		add(esp, 0x28); 

		// fprintf(file, newline);
		mov(eax, dword_ptr [&newline]);
		push(eax);
		mov(ecx, dword_ptr [&file]);
		push(ecx);
		call((int)fprintf); 
		add(esp, 8);

		// fclose(file);
		mov(eax, dword_ptr [&file]); 
		push(eax);
		call((int)fclose);
		add(esp, 4);

		popad();

	//	int3();
	}
}
