From 0609b89f74b85eef69beb49447e60629b5c18106 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 03:17:06 -0400
Subject: [PATCH 1/6] emitter: Remove unnecessary inline specifiers

Functions implemented in a class definition are already implicitly inline.
---
 src/common/x64/emitter.h | 66 ++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index e9c9241264..0c35af907f 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -361,10 +361,10 @@ private:
     void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
 
 protected:
-    inline void Write8(u8 value)   {*code++ = value;}
-    inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
-    inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
-    inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
+    void Write8(u8 value)   {*code++ = value;}
+    void Write16(u16 value) {*(u16*)code = (value); code += 2;}
+    void Write32(u32 value) {*(u32*)code = (value); code += 4;}
+    void Write64(u64 value) {*(u64*)code = (value); code += 8;}
 
 public:
     XEmitter() { code = nullptr; flags_locked = false; }
@@ -496,11 +496,11 @@ public:
 
     // Extend EAX into EDX in various ways
     void CWD(int bits = 16);
-    inline void CDQ() {CWD(32);}
-    inline void CQO() {CWD(64);}
+    void CDQ() {CWD(32);}
+    void CQO() {CWD(64);}
     void CBW(int bits = 8);
-    inline void CWDE() {CBW(16);}
-    inline void CDQE() {CBW(32);}
+    void CWDE() {CBW(16);}
+    void CDQE() {CBW(32);}
 
     // Load effective address
     void LEA(int bits, X64Reg dest, OpArg src);
@@ -596,13 +596,13 @@ public:
     void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
     void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
 
-    inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
-    inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
-    inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
-    inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
-    inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
-    inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
-    inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
+    void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
+    void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
+    void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
+    void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
+    void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
+    void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
+    void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
 
     // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
     void ADDPS(X64Reg regOp, OpArg arg);
@@ -859,25 +859,25 @@ public:
     void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
     void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
 
-    inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
-    inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
-    inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
-    inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
 
-    inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
-    inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
-    inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
-    inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
 
-    inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
-    inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
-    inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
-    inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
 
-    inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
-    inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
-    inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
-    inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
 
     // AVX
     void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
@@ -1048,9 +1048,9 @@ public:
     void ABI_EmitEpilogue(int maxCallParams);
 
     #ifdef _M_IX86
-    inline int ABI_GetNumXMMRegs() { return 8; }
+    static int ABI_GetNumXMMRegs() { return 8; }
     #else
-    inline int ABI_GetNumXMMRegs() { return 16; }
+    static int ABI_GetNumXMMRegs() { return 16; }
     #endif
 };  // class XEmitter
 

From 55461125ba913588280f440eb023c940d20a08a5 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 04:14:51 -0400
Subject: [PATCH 2/6] emitter: Pass OpArg by reference where possible

---
 src/common/x64/emitter.cpp | 666 +++++++++++++++----------------
 src/common/x64/emitter.h   | 776 ++++++++++++++++++-------------------
 2 files changed, 721 insertions(+), 721 deletions(-)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 4b79acd1f4..99c682a115 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -374,7 +374,7 @@ void XEmitter::Rex(int w, int r, int x, int b)
         Write8(rx);
 }
 
-void XEmitter::JMP(const u8 *addr, bool force5Bytes)
+void XEmitter::JMP(const u8* addr, bool force5Bytes)
 {
     u64 fn = (u64)addr;
     if (!force5Bytes)
@@ -398,7 +398,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes)
     }
 }
 
-void XEmitter::JMPptr(const OpArg &arg2)
+void XEmitter::JMPptr(const OpArg& arg2)
 {
     OpArg arg = arg2;
     if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
@@ -425,7 +425,7 @@ void XEmitter::CALLptr(OpArg arg)
     arg.WriteRest(this);
 }
 
-void XEmitter::CALL(const void *fnptr)
+void XEmitter::CALL(const void* fnptr)
 {
     u64 distance = u64(fnptr) - (u64(code) + 5);
     ASSERT_MSG(
@@ -496,7 +496,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
     }
 }
 
-void XEmitter::SetJumpTarget(const FixupBranch &branch)
+void XEmitter::SetJumpTarget(const FixupBranch& branch)
 {
     if (branch.type == 0)
     {
@@ -667,7 +667,7 @@ void XEmitter::CBW(int bits)
 void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
 void XEmitter::POP(X64Reg reg)  {WriteSimple1Byte(32, 0x58, reg);}
 
-void XEmitter::PUSH(int bits, const OpArg &reg)
+void XEmitter::PUSH(int bits, const OpArg& reg)
 {
     if (reg.IsSimpleReg())
         PUSH(reg.GetSimpleReg());
@@ -703,7 +703,7 @@ void XEmitter::PUSH(int bits, const OpArg &reg)
     }
 }
 
-void XEmitter::POP(int /*bits*/, const OpArg &reg)
+void XEmitter::POP(int /*bits*/, const OpArg& reg)
 {
     if (reg.IsSimpleReg())
         POP(reg.GetSimpleReg());
@@ -791,12 +791,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
     src.WriteRest(this);
 }
 
-void XEmitter::MUL(int bits, OpArg src)  {WriteMulDivType(bits, src, 4);}
-void XEmitter::DIV(int bits, OpArg src)  {WriteMulDivType(bits, src, 6);}
-void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
-void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
-void XEmitter::NEG(int bits, OpArg src)  {WriteMulDivType(bits, src, 3);}
-void XEmitter::NOT(int bits, OpArg src)  {WriteMulDivType(bits, src, 2);}
+void XEmitter::MUL(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 4);}
+void XEmitter::DIV(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 6);}
+void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);}
+void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);}
+void XEmitter::NEG(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 3);}
+void XEmitter::NOT(int bits, const OpArg& src)  {WriteMulDivType(bits, src, 2);}
 
 void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
 {
@@ -813,24 +813,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo
     src.WriteRest(this);
 }
 
-void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
+void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src)
 {
     if (bits <= 16)
         ASSERT_MSG(0, "MOVNTI - bits<=16");
     WriteBitSearchType(bits, src, dest, 0xC3);
 }
 
-void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
-void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
+void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit
+void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit
 
-void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src)
 {
     CheckFlags();
     if (!Common::GetCPUCaps().bmi1)
         ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
     WriteBitSearchType(bits, dest, src, 0xBC, true);
 }
-void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src)
 {
     CheckFlags();
     if (!Common::GetCPUCaps().lzcnt)
@@ -950,7 +950,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
 }
 
 //shift can be either imm8 or cl
-void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
+void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)
 {
     CheckFlags();
     bool writeImm = false;
@@ -991,16 +991,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
 
 // large rotates and shift are slower on intel than amd
 // intel likes to rotate by 1, and the op is smaller too
-void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
-void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
-void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
-void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
-void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
-void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
-void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
+void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);}
+void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);}
+void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);}
+void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);}
+void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);}
+void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);}
+void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);}
 
 // index can be either imm8 or register, don't use memory destination because it's slow
-void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
+void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext)
 {
     CheckFlags();
     if (dest.IsImm())
@@ -1029,13 +1029,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
     }
 }
 
-void XEmitter::BT(int bits, OpArg dest, OpArg index)  {WriteBitTest(bits, dest, index, 4);}
-void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
-void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
-void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
+void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index)  {WriteBitTest(bits, dest, index, 4);}
+void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);}
+void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);}
+void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);}
 
 //shift can be either imm8 or cl
-void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
 {
     CheckFlags();
     if (dest.IsImm())
@@ -1067,7 +1067,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
     }
 }
 
-void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
 {
     CheckFlags();
     if (dest.IsImm())
@@ -1111,7 +1111,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit
 }
 
 //operand can either be immediate or register
-void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
+void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const
 {
     X64Reg _operandReg;
     if (IsImm())
@@ -1257,7 +1257,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o
     }
 }
 
-void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
+void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2)
 {
     if (a1.IsImm())
     {
@@ -1283,24 +1283,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg
     }
 }
 
-void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
-void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
-void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
-void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
-void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
-void XEmitter::OR  (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
-void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
-void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
+void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
+void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
+void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
+void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
+void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
+void XEmitter::OR  (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
+void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
+void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2)
 {
     if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
         LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
     WriteNormalOp(this, bits, nrmMOV, a1, a2);
 }
-void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
-void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
-void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
+void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
+void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
+void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
 
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
 {
     CheckFlags();
     if (bits == 8)
@@ -1353,7 +1353,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
     }
 }
 
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)
 {
     CheckFlags();
     if (bits == 8)
@@ -1390,7 +1390,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr
     arg.WriteRest(this, extrabytes);
 }
 
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
 {
     WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
 }
@@ -1418,7 +1418,7 @@ static int GetVEXpp(u8 opPrefix)
         return 0;
 }
 
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
 {
     if (!Common::GetCPUCaps().avx)
         ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
@@ -1431,7 +1431,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA
 }
 
 // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
-void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
 {
     if (size != 32 && size != 64)
         ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
@@ -1442,7 +1442,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r
     arg.WriteRest(this, extrabytes, regOp1);
 }
 
-void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
 {
     CheckFlags();
     if (!Common::GetCPUCaps().bmi1)
@@ -1450,7 +1450,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg
     WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
 }
 
-void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
 {
     CheckFlags();
     if (!Common::GetCPUCaps().bmi2)
@@ -1517,135 +1517,135 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)
     arg.WriteRest(this);
 }
 
-void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
-void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
+void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);}
+void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);}
 
-void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
-void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
-void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
+void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
+void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
+void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
 
-void XEmitter::ADDSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseADD, regOp, arg);}
-void XEmitter::ADDSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseADD, regOp, arg);}
-void XEmitter::SUBSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
-void XEmitter::SUBSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
-void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::MULSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
-void XEmitter::MULSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
-void XEmitter::DIVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
-void XEmitter::DIVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
-void XEmitter::MINSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
-void XEmitter::MINSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
-void XEmitter::MAXSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
-void XEmitter::MAXSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
-void XEmitter::SQRTSS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
-void XEmitter::SQRTSD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
-void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
+void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseADD, regOp, arg);}
+void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseADD, regOp, arg);}
+void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
+void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
+void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::MULSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
+void XEmitter::MULSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
+void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
+void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
+void XEmitter::MINSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
+void XEmitter::MINSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
+void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
+void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
+void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
+void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
+void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
 
-void XEmitter::ADDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseADD, regOp, arg);}
-void XEmitter::ADDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseADD, regOp, arg);}
-void XEmitter::SUBPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseSUB, regOp, arg);}
-void XEmitter::SUBPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseSUB, regOp, arg);}
-void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare)   {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::ANDPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseAND, regOp, arg);}
-void XEmitter::ANDPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseAND, regOp, arg);}
-void XEmitter::ANDNPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseANDN, regOp, arg);}
-void XEmitter::ANDNPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseANDN, regOp, arg);}
-void XEmitter::ORPS(X64Reg regOp, OpArg arg)    {WriteSSEOp(0x00, sseOR, regOp, arg);}
-void XEmitter::ORPD(X64Reg regOp, OpArg arg)    {WriteSSEOp(0x66, sseOR, regOp, arg);}
-void XEmitter::XORPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseXOR, regOp, arg);}
-void XEmitter::XORPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseXOR, regOp, arg);}
-void XEmitter::MULPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMUL, regOp, arg);}
-void XEmitter::MULPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMUL, regOp, arg);}
-void XEmitter::DIVPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseDIV, regOp, arg);}
-void XEmitter::DIVPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseDIV, regOp, arg);}
-void XEmitter::MINPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMIN, regOp, arg);}
-void XEmitter::MINPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMIN, regOp, arg);}
-void XEmitter::MAXPS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x00, sseMAX, regOp, arg);}
-void XEmitter::MAXPD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0x66, sseMAX, regOp, arg);}
-void XEmitter::SQRTPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
-void XEmitter::SQRTPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
-void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
-void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
-void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
-void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
+void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseADD, regOp, arg);}
+void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseADD, regOp, arg);}
+void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseSUB, regOp, arg);}
+void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseSUB, regOp, arg);}
+void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare)   {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseAND, regOp, arg);}
+void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseAND, regOp, arg);}
+void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseANDN, regOp, arg);}
+void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseANDN, regOp, arg);}
+void XEmitter::ORPS(X64Reg regOp, const OpArg& arg)    {WriteSSEOp(0x00, sseOR, regOp, arg);}
+void XEmitter::ORPD(X64Reg regOp, const OpArg& arg)    {WriteSSEOp(0x66, sseOR, regOp, arg);}
+void XEmitter::XORPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseXOR, regOp, arg);}
+void XEmitter::XORPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseXOR, regOp, arg);}
+void XEmitter::MULPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMUL, regOp, arg);}
+void XEmitter::MULPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMUL, regOp, arg);}
+void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseDIV, regOp, arg);}
+void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseDIV, regOp, arg);}
+void XEmitter::MINPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMIN, regOp, arg);}
+void XEmitter::MINPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMIN, regOp, arg);}
+void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x00, sseMAX, regOp, arg);}
+void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0x66, sseMAX, regOp, arg);}
+void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
+void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
+void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
+void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
+void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
+void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
 
-void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
+void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
 
-void XEmitter::COMISS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
-void XEmitter::COMISD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
-void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
-void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
+void XEmitter::COMISS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
+void XEmitter::COMISD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
+void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
+void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
 
-void XEmitter::MOVAPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
-void XEmitter::MOVAPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
+void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
+void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
 
-void XEmitter::MOVUPS(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPD(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPS(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVUPD(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
 
-void XEmitter::MOVDQA(X64Reg regOp, OpArg arg)  {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQA(OpArg arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
-void XEmitter::MOVDQU(X64Reg regOp, OpArg arg)  {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQU(OpArg arg, X64Reg regOp)  {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
+void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
+void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg)  {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp)  {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
 
-void XEmitter::MOVSS(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSD(X64Reg regOp, OpArg arg)   {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSS(OpArg arg, X64Reg regOp)   {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVSD(OpArg arg, X64Reg regOp)   {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg)   {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp)   {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp)   {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
 
-void XEmitter::MOVLPS(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPD(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPS(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
-void XEmitter::MOVLPD(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
+void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
+void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
 
-void XEmitter::MOVHPS(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPD(X64Reg regOp, OpArg arg)  { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPS(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
-void XEmitter::MOVHPD(OpArg arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg)  { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp)  { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
 
 void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
 void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
 
-void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
-void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
+void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
+void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
 
-void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
-void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
-void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
-void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
-void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
-void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
+void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
+void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
+void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
+void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
+void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
+void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
 
-void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
-void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
-void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
-void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
+void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
+void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
+void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
+void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
 
-void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
-void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
-void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
-void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
+void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
+void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
+void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
+void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
 
 void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src)  {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
 
-void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
-void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
+void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
+void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
 
-void XEmitter::LDDQU(X64Reg dest, OpArg arg)    {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
+void XEmitter::LDDQU(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
 
 // THESE TWO ARE UNTESTED.
-void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
-void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
+void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
+void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
 
-void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
-void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
+void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
+void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
 
-void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
+void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
 {
     if (Common::GetCPUCaps().sse3)
     {
@@ -1663,9 +1663,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
 //There are a few more left
 
 // Also some integer instructions are missing
-void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
-void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
-void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
+void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
+void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
+void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
 
 void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
 void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
@@ -1690,7 +1690,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)
     Write8(shift);
 }
 
-void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
+void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg)
 {
     WriteSSEOp(0x66, 0xd3, reg, arg);
 }
@@ -1735,212 +1735,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift)
     Write8(shift);
 }
 
-void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
 {
     if (!Common::GetCPUCaps().ssse3)
         ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
     WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
 }
 
-void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
 {
     if (!Common::GetCPUCaps().sse4_1)
         ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
     WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
 }
 
-void XEmitter::PSHUFB(X64Reg dest, OpArg arg)   {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
-void XEmitter::PTEST(X64Reg dest, OpArg arg)    {WriteSSE41Op(0x66, 0x3817, dest, arg);}
-void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
-void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
+void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg)   {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
+void XEmitter::PTEST(X64Reg dest, const OpArg& arg)    {WriteSSE41Op(0x66, 0x3817, dest, arg);}
+void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
+void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
 
-void XEmitter::PMINSB(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x3838, dest, arg);}
-void XEmitter::PMINSD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x3839, dest, arg);}
-void XEmitter::PMINUW(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383a, dest, arg);}
-void XEmitter::PMINUD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383b, dest, arg);}
-void XEmitter::PMAXSB(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383c, dest, arg);}
-void XEmitter::PMAXSD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383d, dest, arg);}
-void XEmitter::PMAXUW(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383e, dest, arg);}
-void XEmitter::PMAXUD(X64Reg dest, OpArg arg)   {WriteSSE41Op(0x66, 0x383f, dest, arg);}
+void XEmitter::PMINSB(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x3838, dest, arg);}
+void XEmitter::PMINSD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x3839, dest, arg);}
+void XEmitter::PMINUW(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383a, dest, arg);}
+void XEmitter::PMINUD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383b, dest, arg);}
+void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383c, dest, arg);}
+void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383d, dest, arg);}
+void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383e, dest, arg);}
+void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg)   {WriteSSE41Op(0x66, 0x383f, dest, arg);}
 
-void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
-void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
-void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
-void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
-void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
-void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
-void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
-void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
-void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
-void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
-void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
-void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
+void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
+void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
+void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
+void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
+void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
+void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
+void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
+void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
+void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
+void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
+void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
+void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
 
-void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
-void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
-void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
+void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
+void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
+void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
 void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
 void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
 
-void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
 
-void XEmitter::PAND(X64Reg dest, OpArg arg)     {WriteSSEOp(0x66, 0xDB, dest, arg);}
-void XEmitter::PANDN(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xDF, dest, arg);}
-void XEmitter::PXOR(X64Reg dest, OpArg arg)     {WriteSSEOp(0x66, 0xEF, dest, arg);}
-void XEmitter::POR(X64Reg dest, OpArg arg)      {WriteSSEOp(0x66, 0xEB, dest, arg);}
+void XEmitter::PAND(X64Reg dest, const OpArg& arg)     {WriteSSEOp(0x66, 0xDB, dest, arg);}
+void XEmitter::PANDN(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xDF, dest, arg);}
+void XEmitter::PXOR(X64Reg dest, const OpArg& arg)     {WriteSSEOp(0x66, 0xEF, dest, arg);}
+void XEmitter::POR(X64Reg dest, const OpArg& arg)      {WriteSSEOp(0x66, 0xEB, dest, arg);}
 
-void XEmitter::PADDB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFC, dest, arg);}
-void XEmitter::PADDW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFD, dest, arg);}
-void XEmitter::PADDD(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFE, dest, arg);}
-void XEmitter::PADDQ(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xD4, dest, arg);}
+void XEmitter::PADDB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFC, dest, arg);}
+void XEmitter::PADDW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFD, dest, arg);}
+void XEmitter::PADDD(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFE, dest, arg);}
+void XEmitter::PADDQ(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xD4, dest, arg);}
 
-void XEmitter::PADDSB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEC, dest, arg);}
-void XEmitter::PADDSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xED, dest, arg);}
-void XEmitter::PADDUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xDC, dest, arg);}
-void XEmitter::PADDUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xDD, dest, arg);}
+void XEmitter::PADDSB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEC, dest, arg);}
+void XEmitter::PADDSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xED, dest, arg);}
+void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xDC, dest, arg);}
+void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xDD, dest, arg);}
 
-void XEmitter::PSUBB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xF8, dest, arg);}
-void XEmitter::PSUBW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xF9, dest, arg);}
-void XEmitter::PSUBD(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFA, dest, arg);}
-void XEmitter::PSUBQ(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xFB, dest, arg);}
+void XEmitter::PSUBB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xF8, dest, arg);}
+void XEmitter::PSUBW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xF9, dest, arg);}
+void XEmitter::PSUBD(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFA, dest, arg);}
+void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xFB, dest, arg);}
 
-void XEmitter::PSUBSB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xE8, dest, arg);}
-void XEmitter::PSUBSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xE9, dest, arg);}
-void XEmitter::PSUBUSB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xD8, dest, arg);}
-void XEmitter::PSUBUSW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xD9, dest, arg);}
+void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xE8, dest, arg);}
+void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xE9, dest, arg);}
+void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xD8, dest, arg);}
+void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xD9, dest, arg);}
 
-void XEmitter::PAVGB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xE0, dest, arg);}
-void XEmitter::PAVGW(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xE3, dest, arg);}
+void XEmitter::PAVGB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xE0, dest, arg);}
+void XEmitter::PAVGW(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xE3, dest, arg);}
 
-void XEmitter::PCMPEQB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x74, dest, arg);}
-void XEmitter::PCMPEQW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x75, dest, arg);}
-void XEmitter::PCMPEQD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x76, dest, arg);}
+void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x74, dest, arg);}
+void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x75, dest, arg);}
+void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x76, dest, arg);}
 
-void XEmitter::PCMPGTB(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x64, dest, arg);}
-void XEmitter::PCMPGTW(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x65, dest, arg);}
-void XEmitter::PCMPGTD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0x66, dest, arg);}
+void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x64, dest, arg);}
+void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x65, dest, arg);}
+void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0x66, dest, arg);}
 
-void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
-void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg)    {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
+void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg)    {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
+void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg)    {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
 
-void XEmitter::PMADDWD(X64Reg dest, OpArg arg)  {WriteSSEOp(0x66, 0xF5, dest, arg); }
-void XEmitter::PSADBW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xF6, dest, arg);}
+void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg)  {WriteSSEOp(0x66, 0xF5, dest, arg); }
+void XEmitter::PSADBW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xF6, dest, arg);}
 
-void XEmitter::PMAXSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEE, dest, arg); }
-void XEmitter::PMAXUB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xDE, dest, arg); }
-void XEmitter::PMINSW(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xEA, dest, arg); }
-void XEmitter::PMINUB(X64Reg dest, OpArg arg)   {WriteSSEOp(0x66, 0xDA, dest, arg); }
+void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEE, dest, arg); }
+void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xDE, dest, arg); }
+void XEmitter::PMINSW(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xEA, dest, arg); }
+void XEmitter::PMINUB(X64Reg dest, const OpArg& arg)   {WriteSSEOp(0x66, 0xDA, dest, arg); }
 
-void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg)    {WriteSSEOp(0x66, 0xD7, dest, arg); }
-void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle)    {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle)   {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle)   {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg)    {WriteSSEOp(0x66, 0xD7, dest, arg); }
+void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle)    {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle)   {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle)   {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
 
 // VEX
-void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg)  {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
-void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
-void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
-void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
+void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
+void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
+void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
+void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
 
-void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)  { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)  { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
+void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)  { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
 
-void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
-void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
-void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg)     { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
-void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
+void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
+void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
+void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)     { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
+void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
 
-void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)    { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg)   { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
 
-void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate)      {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
-void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
-void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
-void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg)               {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
-void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
-void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
+void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate)      {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
+void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
+void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
+void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg)               {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
+void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg)                 {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
+void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
 
 // Prefixes
 
@@ -1956,7 +1956,7 @@ void XEmitter::FWAIT()
 }
 
 // TODO: make this more generic
-void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
+void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg)
 {
     int mf = 0;
     ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
@@ -1974,9 +1974,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a
     arg.WriteRest(this, 0, (X64Reg) op);
 }
 
-void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
-void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
-void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
+void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
+void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
+void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
 void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
 
 void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 0c35af907f..84a92f9895 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -344,19 +344,19 @@ private:
     void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
     void WriteMulDivType(int bits, OpArg src, int ext);
     void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
-    void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
-    void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
+    void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
+    void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
     void WriteMXCSR(OpArg arg, int ext);
     void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
-    void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
-    void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
-    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
-    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
-    void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
-    void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
-    void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
-    void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
-    void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
+    void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+    void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+    void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+    void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+    void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+    void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+    void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
+    void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
 
     void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
 
@@ -413,8 +413,8 @@ public:
     // Stack control
     void PUSH(X64Reg reg);
     void POP(X64Reg reg);
-    void PUSH(int bits, const OpArg &reg);
-    void POP(int bits, const OpArg &reg);
+    void PUSH(int bits, const OpArg& reg);
+    void POP(int bits, const OpArg& reg);
     void PUSHF();
     void POPF();
 
@@ -424,21 +424,21 @@ public:
     void UD2();
     FixupBranch J(bool force5bytes = false);
 
-    void JMP(const u8 * addr, bool force5Bytes = false);
+    void JMP(const u8* addr, bool force5Bytes = false);
     void JMP(OpArg arg);
-    void JMPptr(const OpArg &arg);
+    void JMPptr(const OpArg& arg);
     void JMPself(); //infinite loop!
 #ifdef CALL
 #undef CALL
 #endif
-    void CALL(const void *fnptr);
+    void CALL(const void* fnptr);
     void CALLptr(OpArg arg);
 
     FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
     //void J_CC(CCFlags conditionCode, JumpTarget target);
-    void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
+    void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
 
-    void SetJumpTarget(const FixupBranch &branch);
+    void SetJumpTarget(const FixupBranch& branch);
 
     void SETcc(CCFlags flag, OpArg dest);
     // Note: CMOV brings small if any benefit on current cpus.
@@ -450,8 +450,8 @@ public:
     void SFENCE();
 
     // Bit scan
-    void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
-    void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
+    void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
+    void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
 
     // Cache control
     enum PrefetchLevel
@@ -462,37 +462,37 @@ public:
         PF_T2,  //Levels 3+ (aliased to T0 on AMD)
     };
     void PREFETCH(PrefetchLevel level, OpArg arg);
-    void MOVNTI(int bits, OpArg dest, X64Reg src);
-    void MOVNTDQ(OpArg arg, X64Reg regOp);
-    void MOVNTPS(OpArg arg, X64Reg regOp);
-    void MOVNTPD(OpArg arg, X64Reg regOp);
+    void MOVNTI(int bits, const OpArg& dest, X64Reg src);
+    void MOVNTDQ(const OpArg& arg, X64Reg regOp);
+    void MOVNTPS(const OpArg& arg, X64Reg regOp);
+    void MOVNTPD(const OpArg& arg, X64Reg regOp);
 
     // Multiplication / division
-    void MUL(int bits, OpArg src); //UNSIGNED
-    void IMUL(int bits, OpArg src); //SIGNED
-    void IMUL(int bits, X64Reg regOp, OpArg src);
-    void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
-    void DIV(int bits, OpArg src);
-    void IDIV(int bits, OpArg src);
+    void MUL(int bits, const OpArg& src); //UNSIGNED
+    void IMUL(int bits, const OpArg& src); //SIGNED
+    void IMUL(int bits, X64Reg regOp, const OpArg& src);
+    void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
+    void DIV(int bits, const OpArg& src);
+    void IDIV(int bits, const OpArg& src);
 
     // Shift
-    void ROL(int bits, OpArg dest, OpArg shift);
-    void ROR(int bits, OpArg dest, OpArg shift);
-    void RCL(int bits, OpArg dest, OpArg shift);
-    void RCR(int bits, OpArg dest, OpArg shift);
-    void SHL(int bits, OpArg dest, OpArg shift);
-    void SHR(int bits, OpArg dest, OpArg shift);
-    void SAR(int bits, OpArg dest, OpArg shift);
+    void ROL(int bits, const OpArg& dest, const OpArg& shift);
+    void ROR(int bits, const OpArg& dest, const OpArg& shift);
+    void RCL(int bits, const OpArg& dest, const OpArg& shift);
+    void RCR(int bits, const OpArg& dest, const OpArg& shift);
+    void SHL(int bits, const OpArg& dest, const OpArg& shift);
+    void SHR(int bits, const OpArg& dest, const OpArg& shift);
+    void SAR(int bits, const OpArg& dest, const OpArg& shift);
 
     // Bit Test
-    void BT(int bits, OpArg dest, OpArg index);
-    void BTS(int bits, OpArg dest, OpArg index);
-    void BTR(int bits, OpArg dest, OpArg index);
-    void BTC(int bits, OpArg dest, OpArg index);
+    void BT(int bits, const OpArg& dest, const OpArg& index);
+    void BTS(int bits, const OpArg& dest, const OpArg& index);
+    void BTR(int bits, const OpArg& dest, const OpArg& index);
+    void BTC(int bits, const OpArg& dest, const OpArg& index);
 
     // Double-Precision Shift
-    void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
-    void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
+    void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
+    void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
 
     // Extend EAX into EDX in various ways
     void CWD(int bits = 16);
@@ -506,23 +506,23 @@ public:
     void LEA(int bits, X64Reg dest, OpArg src);
 
     // Integer arithmetic
-    void NEG (int bits, OpArg src);
-    void ADD (int bits, const OpArg &a1, const OpArg &a2);
-    void ADC (int bits, const OpArg &a1, const OpArg &a2);
-    void SUB (int bits, const OpArg &a1, const OpArg &a2);
-    void SBB (int bits, const OpArg &a1, const OpArg &a2);
-    void AND (int bits, const OpArg &a1, const OpArg &a2);
-    void CMP (int bits, const OpArg &a1, const OpArg &a2);
+    void NEG(int bits, const OpArg& src);
+    void ADD(int bits, const OpArg& a1, const OpArg& a2);
+    void ADC(int bits, const OpArg& a1, const OpArg& a2);
+    void SUB(int bits, const OpArg& a1, const OpArg& a2);
+    void SBB(int bits, const OpArg& a1, const OpArg& a2);
+    void AND(int bits, const OpArg& a1, const OpArg& a2);
+    void CMP(int bits, const OpArg& a1, const OpArg& a2);
 
     // Bit operations
-    void NOT (int bits, OpArg src);
-    void OR  (int bits, const OpArg &a1, const OpArg &a2);
-    void XOR (int bits, const OpArg &a1, const OpArg &a2);
-    void MOV (int bits, const OpArg &a1, const OpArg &a2);
-    void TEST(int bits, const OpArg &a1, const OpArg &a2);
+    void NOT (int bits, const OpArg& src);
+    void OR(int bits, const OpArg& a1, const OpArg& a2);
+    void XOR(int bits, const OpArg& a1, const OpArg& a2);
+    void MOV(int bits, const OpArg& a1, const OpArg& a2);
+    void TEST(int bits, const OpArg& a1, const OpArg& a2);
 
     // Are these useful at all? Consider removing.
-    void XCHG(int bits, const OpArg &a1, const OpArg &a2);
+    void XCHG(int bits, const OpArg& a1, const OpArg& a2);
     void XCHG_AHAL();
 
     // Byte swapping (32 and 64-bit only).
@@ -536,13 +536,13 @@ public:
     void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
 
     // Available only on AMD >= Phenom or Intel >= Haswell
-    void LZCNT(int bits, X64Reg dest, OpArg src);
+    void LZCNT(int bits, X64Reg dest, const OpArg& src);
     // Note: this one is actually part of BMI1
-    void TZCNT(int bits, X64Reg dest, OpArg src);
+    void TZCNT(int bits, X64Reg dest, const OpArg& src);
 
     // WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
-    void STMXCSR(OpArg memloc);
-    void LDMXCSR(OpArg memloc);
+    void STMXCSR(const OpArg& memloc);
+    void LDMXCSR(const OpArg& memloc);
 
     // Prefixes
     void LOCK();
@@ -569,259 +569,259 @@ public:
         x87_FPUBusy = 0x8000,
     };
 
-    void FLD(int bits, OpArg src);
-    void FST(int bits, OpArg dest);
-    void FSTP(int bits, OpArg dest);
+    void FLD(int bits, const OpArg& src);
+    void FST(int bits, const OpArg& dest);
+    void FSTP(int bits, const OpArg& dest);
     void FNSTSW_AX();
     void FWAIT();
 
     // SSE/SSE2: Floating point arithmetic
-    void ADDSS(X64Reg regOp, OpArg arg);
-    void ADDSD(X64Reg regOp, OpArg arg);
-    void SUBSS(X64Reg regOp, OpArg arg);
-    void SUBSD(X64Reg regOp, OpArg arg);
-    void MULSS(X64Reg regOp, OpArg arg);
-    void MULSD(X64Reg regOp, OpArg arg);
-    void DIVSS(X64Reg regOp, OpArg arg);
-    void DIVSD(X64Reg regOp, OpArg arg);
-    void MINSS(X64Reg regOp, OpArg arg);
-    void MINSD(X64Reg regOp, OpArg arg);
-    void MAXSS(X64Reg regOp, OpArg arg);
-    void MAXSD(X64Reg regOp, OpArg arg);
-    void SQRTSS(X64Reg regOp, OpArg arg);
-    void SQRTSD(X64Reg regOp, OpArg arg);
-    void RSQRTSS(X64Reg regOp, OpArg arg);
+    void ADDSS(X64Reg regOp, const OpArg& arg);
+    void ADDSD(X64Reg regOp, const OpArg& arg);
+    void SUBSS(X64Reg regOp, const OpArg& arg);
+    void SUBSD(X64Reg regOp, const OpArg& arg);
+    void MULSS(X64Reg regOp, const OpArg& arg);
+    void MULSD(X64Reg regOp, const OpArg& arg);
+    void DIVSS(X64Reg regOp, const OpArg& arg);
+    void DIVSD(X64Reg regOp, const OpArg& arg);
+    void MINSS(X64Reg regOp, const OpArg& arg);
+    void MINSD(X64Reg regOp, const OpArg& arg);
+    void MAXSS(X64Reg regOp, const OpArg& arg);
+    void MAXSD(X64Reg regOp, const OpArg& arg);
+    void SQRTSS(X64Reg regOp, const OpArg& arg);
+    void SQRTSD(X64Reg regOp, const OpArg& arg);
+    void RSQRTSS(X64Reg regOp, const OpArg& arg);
 
     // SSE/SSE2: Floating point bitwise (yes)
-    void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
-    void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
+    void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
+    void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
 
-    void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
-    void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
-    void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
-    void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
-    void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
-    void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
-    void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
+    void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
+    void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
+    void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
+    void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
+    void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
+    void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
+    void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
 
     // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
-    void ADDPS(X64Reg regOp, OpArg arg);
-    void ADDPD(X64Reg regOp, OpArg arg);
-    void SUBPS(X64Reg regOp, OpArg arg);
-    void SUBPD(X64Reg regOp, OpArg arg);
-    void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
-    void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
-    void MULPS(X64Reg regOp, OpArg arg);
-    void MULPD(X64Reg regOp, OpArg arg);
-    void DIVPS(X64Reg regOp, OpArg arg);
-    void DIVPD(X64Reg regOp, OpArg arg);
-    void MINPS(X64Reg regOp, OpArg arg);
-    void MINPD(X64Reg regOp, OpArg arg);
-    void MAXPS(X64Reg regOp, OpArg arg);
-    void MAXPD(X64Reg regOp, OpArg arg);
-    void SQRTPS(X64Reg regOp, OpArg arg);
-    void SQRTPD(X64Reg regOp, OpArg arg);
-    void RCPPS(X64Reg regOp, OpArg arg);
-    void RSQRTPS(X64Reg regOp, OpArg arg);
+    void ADDPS(X64Reg regOp, const OpArg& arg);
+    void ADDPD(X64Reg regOp, const OpArg& arg);
+    void SUBPS(X64Reg regOp, const OpArg& arg);
+    void SUBPD(X64Reg regOp, const OpArg& arg);
+    void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
+    void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
+    void MULPS(X64Reg regOp, const OpArg& arg);
+    void MULPD(X64Reg regOp, const OpArg& arg);
+    void DIVPS(X64Reg regOp, const OpArg& arg);
+    void DIVPD(X64Reg regOp, const OpArg& arg);
+    void MINPS(X64Reg regOp, const OpArg& arg);
+    void MINPD(X64Reg regOp, const OpArg& arg);
+    void MAXPS(X64Reg regOp, const OpArg& arg);
+    void MAXPD(X64Reg regOp, const OpArg& arg);
+    void SQRTPS(X64Reg regOp, const OpArg& arg);
+    void SQRTPD(X64Reg regOp, const OpArg& arg);
+    void RCPPS(X64Reg regOp, const OpArg& arg);
+    void RSQRTPS(X64Reg regOp, const OpArg& arg);
 
     // SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
-    void ANDPS(X64Reg regOp, OpArg arg);
-    void ANDPD(X64Reg regOp, OpArg arg);
-    void ANDNPS(X64Reg regOp, OpArg arg);
-    void ANDNPD(X64Reg regOp, OpArg arg);
-    void ORPS(X64Reg regOp, OpArg arg);
-    void ORPD(X64Reg regOp, OpArg arg);
-    void XORPS(X64Reg regOp, OpArg arg);
-    void XORPD(X64Reg regOp, OpArg arg);
+    void ANDPS(X64Reg regOp, const OpArg& arg);
+    void ANDPD(X64Reg regOp, const OpArg& arg);
+    void ANDNPS(X64Reg regOp, const OpArg& arg);
+    void ANDNPD(X64Reg regOp, const OpArg& arg);
+    void ORPS(X64Reg regOp, const OpArg& arg);
+    void ORPD(X64Reg regOp, const OpArg& arg);
+    void XORPS(X64Reg regOp, const OpArg& arg);
+    void XORPD(X64Reg regOp, const OpArg& arg);
 
     // SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
-    void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
-    void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
+    void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
+    void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
 
     // SSE/SSE2: Useful alternative to shuffle in some cases.
-    void MOVDDUP(X64Reg regOp, OpArg arg);
+    void MOVDDUP(X64Reg regOp, const OpArg& arg);
 
     // TODO: Actually implement
 #if 0
     // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
-    void ADDSUBPS(X64Reg dest, OpArg src);
-    void ADDSUBPD(X64Reg dest, OpArg src);
-    void HADDPD(X64Reg dest, OpArg src);
-    void HSUBPS(X64Reg dest, OpArg src);
-    void HSUBPD(X64Reg dest, OpArg src);
+    void ADDSUBPS(X64Reg dest, const OpArg& src);
+    void ADDSUBPD(X64Reg dest, const OpArg& src);
+    void HADDPD(X64Reg dest, const OpArg& src);
+    void HSUBPS(X64Reg dest, const OpArg& src);
+    void HSUBPD(X64Reg dest, const OpArg& src);
 
     // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
-    void DPPD(X64Reg dest, OpArg src, u8 arg);
+    void DPPD(X64Reg dest, const OpArg& src, u8 arg);
 
     // These are probably useful for VFPU emulation.
-    void INSERTPS(X64Reg dest, OpArg src, u8 arg);
-    void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
+    void INSERTPS(X64Reg dest, const OpArg& src, u8 arg);
+    void EXTRACTPS(const OpArg& dest, X64Reg src, u8 arg);
 #endif
 
     // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
-    void HADDPS(X64Reg dest, OpArg src);
+    void HADDPS(X64Reg dest, const OpArg& src);
 
     // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
-    void DPPS(X64Reg dest, OpArg src, u8 arg);
+    void DPPS(X64Reg dest, const OpArg& src, u8 arg);
 
-    void UNPCKLPS(X64Reg dest, OpArg src);
-    void UNPCKHPS(X64Reg dest, OpArg src);
-    void UNPCKLPD(X64Reg dest, OpArg src);
-    void UNPCKHPD(X64Reg dest, OpArg src);
+    void UNPCKLPS(X64Reg dest, const OpArg& src);
+    void UNPCKHPS(X64Reg dest, const OpArg& src);
+    void UNPCKLPD(X64Reg dest, const OpArg& src);
+    void UNPCKHPD(X64Reg dest, const OpArg& src);
 
     // SSE/SSE2: Compares.
-    void COMISS(X64Reg regOp, OpArg arg);
-    void COMISD(X64Reg regOp, OpArg arg);
-    void UCOMISS(X64Reg regOp, OpArg arg);
-    void UCOMISD(X64Reg regOp, OpArg arg);
+    void COMISS(X64Reg regOp, const OpArg& arg);
+    void COMISD(X64Reg regOp, const OpArg& arg);
+    void UCOMISS(X64Reg regOp, const OpArg& arg);
+    void UCOMISD(X64Reg regOp, const OpArg& arg);
 
     // SSE/SSE2: Moves. Use the right data type for your data, in most cases.
-    void MOVAPS(X64Reg regOp, OpArg arg);
-    void MOVAPD(X64Reg regOp, OpArg arg);
-    void MOVAPS(OpArg arg, X64Reg regOp);
-    void MOVAPD(OpArg arg, X64Reg regOp);
+    void MOVAPS(X64Reg regOp, const OpArg& arg);
+    void MOVAPD(X64Reg regOp, const OpArg& arg);
+    void MOVAPS(const OpArg& arg, X64Reg regOp);
+    void MOVAPD(const OpArg& arg, X64Reg regOp);
 
-    void MOVUPS(X64Reg regOp, OpArg arg);
-    void MOVUPD(X64Reg regOp, OpArg arg);
-    void MOVUPS(OpArg arg, X64Reg regOp);
-    void MOVUPD(OpArg arg, X64Reg regOp);
+    void MOVUPS(X64Reg regOp, const OpArg& arg);
+    void MOVUPD(X64Reg regOp, const OpArg& arg);
+    void MOVUPS(const OpArg& arg, X64Reg regOp);
+    void MOVUPD(const OpArg& arg, X64Reg regOp);
 
-    void MOVDQA(X64Reg regOp, OpArg arg);
-    void MOVDQA(OpArg arg, X64Reg regOp);
-    void MOVDQU(X64Reg regOp, OpArg arg);
-    void MOVDQU(OpArg arg, X64Reg regOp);
+    void MOVDQA(X64Reg regOp, const OpArg& arg);
+    void MOVDQA(const OpArg& arg, X64Reg regOp);
+    void MOVDQU(X64Reg regOp, const OpArg& arg);
+    void MOVDQU(const OpArg& arg, X64Reg regOp);
 
-    void MOVSS(X64Reg regOp, OpArg arg);
-    void MOVSD(X64Reg regOp, OpArg arg);
-    void MOVSS(OpArg arg, X64Reg regOp);
-    void MOVSD(OpArg arg, X64Reg regOp);
+    void MOVSS(X64Reg regOp, const OpArg& arg);
+    void MOVSD(X64Reg regOp, const OpArg& arg);
+    void MOVSS(const OpArg& arg, X64Reg regOp);
+    void MOVSD(const OpArg& arg, X64Reg regOp);
 
-    void MOVLPS(X64Reg regOp, OpArg arg);
-    void MOVLPD(X64Reg regOp, OpArg arg);
-    void MOVLPS(OpArg arg, X64Reg regOp);
-    void MOVLPD(OpArg arg, X64Reg regOp);
+    void MOVLPS(X64Reg regOp, const OpArg& arg);
+    void MOVLPD(X64Reg regOp, const OpArg& arg);
+    void MOVLPS(const OpArg& arg, X64Reg regOp);
+    void MOVLPD(const OpArg& arg, X64Reg regOp);
 
-    void MOVHPS(X64Reg regOp, OpArg arg);
-    void MOVHPD(X64Reg regOp, OpArg arg);
-    void MOVHPS(OpArg arg, X64Reg regOp);
-    void MOVHPD(OpArg arg, X64Reg regOp);
+    void MOVHPS(X64Reg regOp, const OpArg& arg);
+    void MOVHPD(X64Reg regOp, const OpArg& arg);
+    void MOVHPS(const OpArg& arg, X64Reg regOp);
+    void MOVHPD(const OpArg& arg, X64Reg regOp);
 
     void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
     void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
 
-    void MOVD_xmm(X64Reg dest, const OpArg &arg);
+    void MOVD_xmm(X64Reg dest, const OpArg& arg);
     void MOVQ_xmm(X64Reg dest, OpArg arg);
-    void MOVD_xmm(const OpArg &arg, X64Reg src);
+    void MOVD_xmm(const OpArg& arg, X64Reg src);
     void MOVQ_xmm(OpArg arg, X64Reg src);
 
     // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
-    void MOVMSKPS(X64Reg dest, OpArg arg);
-    void MOVMSKPD(X64Reg dest, OpArg arg);
+    void MOVMSKPS(X64Reg dest, const OpArg& arg);
+    void MOVMSKPD(X64Reg dest, const OpArg& arg);
 
     // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
     void MASKMOVDQU(X64Reg dest, X64Reg src);
-    void LDDQU(X64Reg dest, OpArg src);
+    void LDDQU(X64Reg dest, const OpArg& src);
 
     // SSE/SSE2: Data type conversions.
-    void CVTPS2PD(X64Reg dest, OpArg src);
-    void CVTPD2PS(X64Reg dest, OpArg src);
-    void CVTSS2SD(X64Reg dest, OpArg src);
-    void CVTSI2SS(X64Reg dest, OpArg src);
-    void CVTSD2SS(X64Reg dest, OpArg src);
-    void CVTSI2SD(X64Reg dest, OpArg src);
-    void CVTDQ2PD(X64Reg regOp, OpArg arg);
-    void CVTPD2DQ(X64Reg regOp, OpArg arg);
-    void CVTDQ2PS(X64Reg regOp, OpArg arg);
-    void CVTPS2DQ(X64Reg regOp, OpArg arg);
+    void CVTPS2PD(X64Reg dest, const OpArg& src);
+    void CVTPD2PS(X64Reg dest, const OpArg& src);
+    void CVTSS2SD(X64Reg dest, const OpArg& src);
+    void CVTSI2SS(X64Reg dest, const OpArg& src);
+    void CVTSD2SS(X64Reg dest, const OpArg& src);
+    void CVTSI2SD(X64Reg dest, const OpArg& src);
+    void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
+    void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
+    void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
+    void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
 
-    void CVTTPS2DQ(X64Reg regOp, OpArg arg);
-    void CVTTPD2DQ(X64Reg regOp, OpArg arg);
+    void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
+    void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
 
     // Destinations are X64 regs (rax, rbx, ...) for these instructions.
-    void CVTSS2SI(X64Reg xregdest, OpArg src);
-    void CVTSD2SI(X64Reg xregdest, OpArg src);
-    void CVTTSS2SI(X64Reg xregdest, OpArg arg);
-    void CVTTSD2SI(X64Reg xregdest, OpArg arg);
+    void CVTSS2SI(X64Reg xregdest, const OpArg& src);
+    void CVTSD2SI(X64Reg xregdest, const OpArg& src);
+    void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
+    void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
 
     // SSE2: Packed integer instructions
-    void PACKSSDW(X64Reg dest, OpArg arg);
-    void PACKSSWB(X64Reg dest, OpArg arg);
-    void PACKUSDW(X64Reg dest, OpArg arg);
-    void PACKUSWB(X64Reg dest, OpArg arg);
+    void PACKSSDW(X64Reg dest, const OpArg& arg);
+    void PACKSSWB(X64Reg dest, const OpArg& arg);
+    void PACKUSDW(X64Reg dest, const OpArg& arg);
+    void PACKUSWB(X64Reg dest, const OpArg& arg);
 
     void PUNPCKLBW(X64Reg dest, const OpArg &arg);
     void PUNPCKLWD(X64Reg dest, const OpArg &arg);
     void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
     void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
 
-    void PTEST(X64Reg dest, OpArg arg);
-    void PAND(X64Reg dest, OpArg arg);
-    void PANDN(X64Reg dest, OpArg arg);
-    void PXOR(X64Reg dest, OpArg arg);
-    void POR(X64Reg dest, OpArg arg);
+    void PTEST(X64Reg dest, const OpArg& arg);
+    void PAND(X64Reg dest, const OpArg& arg);
+    void PANDN(X64Reg dest, const OpArg& arg);
+    void PXOR(X64Reg dest, const OpArg& arg);
+    void POR(X64Reg dest, const OpArg& arg);
 
-    void PADDB(X64Reg dest, OpArg arg);
-    void PADDW(X64Reg dest, OpArg arg);
-    void PADDD(X64Reg dest, OpArg arg);
-    void PADDQ(X64Reg dest, OpArg arg);
+    void PADDB(X64Reg dest, const OpArg& arg);
+    void PADDW(X64Reg dest, const OpArg& arg);
+    void PADDD(X64Reg dest, const OpArg& arg);
+    void PADDQ(X64Reg dest, const OpArg& arg);
 
-    void PADDSB(X64Reg dest, OpArg arg);
-    void PADDSW(X64Reg dest, OpArg arg);
-    void PADDUSB(X64Reg dest, OpArg arg);
-    void PADDUSW(X64Reg dest, OpArg arg);
+    void PADDSB(X64Reg dest, const OpArg& arg);
+    void PADDSW(X64Reg dest, const OpArg& arg);
+    void PADDUSB(X64Reg dest, const OpArg& arg);
+    void PADDUSW(X64Reg dest, const OpArg& arg);
 
-    void PSUBB(X64Reg dest, OpArg arg);
-    void PSUBW(X64Reg dest, OpArg arg);
-    void PSUBD(X64Reg dest, OpArg arg);
-    void PSUBQ(X64Reg dest, OpArg arg);
+    void PSUBB(X64Reg dest, const OpArg& arg);
+    void PSUBW(X64Reg dest, const OpArg& arg);
+    void PSUBD(X64Reg dest, const OpArg& arg);
+    void PSUBQ(X64Reg dest, const OpArg& arg);
 
-    void PSUBSB(X64Reg dest, OpArg arg);
-    void PSUBSW(X64Reg dest, OpArg arg);
-    void PSUBUSB(X64Reg dest, OpArg arg);
-    void PSUBUSW(X64Reg dest, OpArg arg);
+    void PSUBSB(X64Reg dest, const OpArg& arg);
+    void PSUBSW(X64Reg dest, const OpArg& arg);
+    void PSUBUSB(X64Reg dest, const OpArg& arg);
+    void PSUBUSW(X64Reg dest, const OpArg& arg);
 
-    void PAVGB(X64Reg dest, OpArg arg);
-    void PAVGW(X64Reg dest, OpArg arg);
+    void PAVGB(X64Reg dest, const OpArg& arg);
+    void PAVGW(X64Reg dest, const OpArg& arg);
 
-    void PCMPEQB(X64Reg dest, OpArg arg);
-    void PCMPEQW(X64Reg dest, OpArg arg);
-    void PCMPEQD(X64Reg dest, OpArg arg);
+    void PCMPEQB(X64Reg dest, const OpArg& arg);
+    void PCMPEQW(X64Reg dest, const OpArg& arg);
+    void PCMPEQD(X64Reg dest, const OpArg& arg);
 
-    void PCMPGTB(X64Reg dest, OpArg arg);
-    void PCMPGTW(X64Reg dest, OpArg arg);
-    void PCMPGTD(X64Reg dest, OpArg arg);
+    void PCMPGTB(X64Reg dest, const OpArg& arg);
+    void PCMPGTW(X64Reg dest, const OpArg& arg);
+    void PCMPGTD(X64Reg dest, const OpArg& arg);
 
-    void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
-    void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
+    void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
+    void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
 
-    void PMADDWD(X64Reg dest, OpArg arg);
-    void PSADBW(X64Reg dest, OpArg arg);
+    void PMADDWD(X64Reg dest, const OpArg& arg);
+    void PSADBW(X64Reg dest, const OpArg& arg);
 
-    void PMAXSW(X64Reg dest, OpArg arg);
-    void PMAXUB(X64Reg dest, OpArg arg);
-    void PMINSW(X64Reg dest, OpArg arg);
-    void PMINUB(X64Reg dest, OpArg arg);
+    void PMAXSW(X64Reg dest, const OpArg& arg);
+    void PMAXUB(X64Reg dest, const OpArg& arg);
+    void PMINSW(X64Reg dest, const OpArg& arg);
+    void PMINUB(X64Reg dest, const OpArg& arg);
     // SSE4: More MAX/MIN instructions.
-    void PMINSB(X64Reg dest, OpArg arg);
-    void PMINSD(X64Reg dest, OpArg arg);
-    void PMINUW(X64Reg dest, OpArg arg);
-    void PMINUD(X64Reg dest, OpArg arg);
-    void PMAXSB(X64Reg dest, OpArg arg);
-    void PMAXSD(X64Reg dest, OpArg arg);
-    void PMAXUW(X64Reg dest, OpArg arg);
-    void PMAXUD(X64Reg dest, OpArg arg);
+    void PMINSB(X64Reg dest, const OpArg& arg);
+    void PMINSD(X64Reg dest, const OpArg& arg);
+    void PMINUW(X64Reg dest, const OpArg& arg);
+    void PMINUD(X64Reg dest, const OpArg& arg);
+    void PMAXSB(X64Reg dest, const OpArg& arg);
+    void PMAXSD(X64Reg dest, const OpArg& arg);
+    void PMAXUW(X64Reg dest, const OpArg& arg);
+    void PMAXUD(X64Reg dest, const OpArg& arg);
 
-    void PMOVMSKB(X64Reg dest, OpArg arg);
-    void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
-    void PSHUFB(X64Reg dest, OpArg arg);
+    void PMOVMSKB(X64Reg dest, const OpArg& arg);
+    void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
+    void PSHUFB(X64Reg dest, const OpArg& arg);
 
-    void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
-    void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
+    void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
+    void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
 
     void PSRLW(X64Reg reg, int shift);
     void PSRLD(X64Reg reg, int shift);
     void PSRLQ(X64Reg reg, int shift);
-    void PSRLQ(X64Reg reg, OpArg arg);
+    void PSRLQ(X64Reg reg, const OpArg& arg);
     void PSRLDQ(X64Reg reg, int shift);
 
     void PSLLW(X64Reg reg, int shift);
@@ -833,198 +833,198 @@ public:
     void PSRAD(X64Reg reg, int shift);
 
     // SSE4: data type conversions
-    void PMOVSXBW(X64Reg dest, OpArg arg);
-    void PMOVSXBD(X64Reg dest, OpArg arg);
-    void PMOVSXBQ(X64Reg dest, OpArg arg);
-    void PMOVSXWD(X64Reg dest, OpArg arg);
-    void PMOVSXWQ(X64Reg dest, OpArg arg);
-    void PMOVSXDQ(X64Reg dest, OpArg arg);
-    void PMOVZXBW(X64Reg dest, OpArg arg);
-    void PMOVZXBD(X64Reg dest, OpArg arg);
-    void PMOVZXBQ(X64Reg dest, OpArg arg);
-    void PMOVZXWD(X64Reg dest, OpArg arg);
-    void PMOVZXWQ(X64Reg dest, OpArg arg);
-    void PMOVZXDQ(X64Reg dest, OpArg arg);
+    void PMOVSXBW(X64Reg dest, const OpArg& arg);
+    void PMOVSXBD(X64Reg dest, const OpArg& arg);
+    void PMOVSXBQ(X64Reg dest, const OpArg& arg);
+    void PMOVSXWD(X64Reg dest, const OpArg& arg);
+    void PMOVSXWQ(X64Reg dest, const OpArg& arg);
+    void PMOVSXDQ(X64Reg dest, const OpArg& arg);
+    void PMOVZXBW(X64Reg dest, const OpArg& arg);
+    void PMOVZXBD(X64Reg dest, const OpArg& arg);
+    void PMOVZXBQ(X64Reg dest, const OpArg& arg);
+    void PMOVZXWD(X64Reg dest, const OpArg& arg);
+    void PMOVZXWQ(X64Reg dest, const OpArg& arg);
+    void PMOVZXDQ(X64Reg dest, const OpArg& arg);
 
     // SSE4: variable blend instructions (xmm0 implicit argument)
-    void PBLENDVB(X64Reg dest, OpArg arg);
-    void BLENDVPS(X64Reg dest, OpArg arg);
-    void BLENDVPD(X64Reg dest, OpArg arg);
+    void PBLENDVB(X64Reg dest, const OpArg& arg);
+    void BLENDVPS(X64Reg dest, const OpArg& arg);
+    void BLENDVPD(X64Reg dest, const OpArg& arg);
     void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
     void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
 
     // SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
-    void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
-    void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
-    void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
-    void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
+    void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
+    void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
+    void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
+    void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
 
-    void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
-    void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
-    void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
-    void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
 
-    void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
-    void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
-    void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
-    void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
 
-    void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
-    void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
-    void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
-    void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
 
-    void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
-    void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
-    void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
-    void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
+    void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
+    void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
+    void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
+    void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
 
     // AVX
-    void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
-    void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+    void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
+    void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
 
-    void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+    void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
 
-    void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+    void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
 
     // FMA3
-    void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+    void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
 
     // VEX GPR instructions
-    void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
-    void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
-    void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
-    void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
-    void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
-    void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
-    void BLSR(int bits, X64Reg regOp, OpArg arg);
-    void BLSMSK(int bits, X64Reg regOp, OpArg arg);
-    void BLSI(int bits, X64Reg regOp, OpArg arg);
-    void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
-    void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
+    void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+    void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+    void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+    void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
+    void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+    void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+    void BLSR(int bits, X64Reg regOp, const OpArg& arg);
+    void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
+    void BLSI(int bits, X64Reg regOp, const OpArg& arg);
+    void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+    void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
 
     void RDTSC();
 
     // Utility functions
     // The difference between this and CALL is that this aligns the stack
     // where appropriate.
-    void ABI_CallFunction(const void *func);
+    void ABI_CallFunction(const void* func);
     template <typename T>
     void ABI_CallFunction(T (*func)()) {
-        ABI_CallFunction((const void *)func);
+        ABI_CallFunction((const void*)func);
     }
 
-    void ABI_CallFunction(const u8 *func) {
-        ABI_CallFunction((const void *)func);
+    void ABI_CallFunction(const u8* func) {
+        ABI_CallFunction((const void*)func);
     }
-    void ABI_CallFunctionC16(const void *func, u16 param1);
-    void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
+    void ABI_CallFunctionC16(const void* func, u16 param1);
+    void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
 
 
     // These only support u32 parameters, but that's enough for a lot of uses.
     // These will destroy the 1 or 2 first "parameter regs".
-    void ABI_CallFunctionC(const void *func, u32 param1);
-    void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
-    void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
-    void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
-    void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
-    void ABI_CallFunctionP(const void *func, void *param1);
-    void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
-    void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
-    void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
-    void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
-    void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
-    void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
-    void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
+    void ABI_CallFunctionC(const void* func, u32 param1);
+    void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
+    void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
+    void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
+    void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
+    void ABI_CallFunctionP(const void* func, void* param1);
+    void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
+    void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
+    void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
+    void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
+    void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
+    void ABI_CallFunctionA(const void* func, const OpArg& arg1);
+    void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
 
     // Pass a register as a parameter.
-    void ABI_CallFunctionR(const void *func, X64Reg reg1);
-    void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
+    void ABI_CallFunctionR(const void* func, X64Reg reg1);
+    void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
 
     template <typename Tr, typename T1>
     void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
-        ABI_CallFunctionC((const void *)func, param1);
+        ABI_CallFunctionC((const void*)func, param1);
     }
 
     // A function that doesn't have any control over what it will do to regs,

From 8738963bce37e0261c928548d4711c13c15735c9 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 04:26:03 -0400
Subject: [PATCH 3/6] emitter: Remove unimplemented JMP prototype

---
 src/common/x64/emitter.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index 84a92f9895..aa1ead4cab 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -425,7 +425,6 @@ public:
     FixupBranch J(bool force5bytes = false);
 
     void JMP(const u8* addr, bool force5Bytes = false);
-    void JMP(OpArg arg);
     void JMPptr(const OpArg& arg);
     void JMPself(); //infinite loop!
 #ifdef CALL

From e88dc7fc6d329697908a54d14d0e53f40d038655 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 04:28:39 -0400
Subject: [PATCH 4/6] emitter: Remove unused code

---
 src/common/x64/emitter.cpp | 24 ------------------------
 src/common/x64/emitter.h   | 20 --------------------
 2 files changed, 44 deletions(-)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 99c682a115..1229d0654e 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -512,30 +512,6 @@ void XEmitter::SetJumpTarget(const FixupBranch& branch)
     }
 }
 
-// INC/DEC considered harmful on newer CPUs due to partial flag set.
-// Use ADD, SUB instead.
-
-/*
-void XEmitter::INC(int bits, OpArg arg)
-{
-    if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
-    arg.operandReg = 0;
-    if (bits == 16) {Write8(0x66);}
-    arg.WriteRex(this, bits, bits);
-    Write8(bits == 8 ? 0xFE : 0xFF);
-    arg.WriteRest(this);
-}
-void XEmitter::DEC(int bits, OpArg arg)
-{
-    if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
-    arg.operandReg = 1;
-    if (bits == 16) {Write8(0x66);}
-    arg.WriteRex(this, bits, bits);
-    Write8(bits == 8 ? 0xFE : 0xFF);
-    arg.WriteRest(this);
-}
-*/
-
 //Single byte opcodes
 //There is no PUSHAD/POPAD in 64-bit mode.
 void XEmitter::INT3() {Write8(0xCC);}
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index aa1ead4cab..86f4a1fff0 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -328,8 +328,6 @@ enum SSECompare
     ORD,
 };
 
-typedef const u8* JumpTarget;
-
 class XEmitter
 {
     friend struct OpArg;  // for Write8 etc
@@ -434,7 +432,6 @@ public:
     void CALLptr(OpArg arg);
 
     FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
-    //void J_CC(CCFlags conditionCode, JumpTarget target);
     void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
 
     void SetJumpTarget(const FixupBranch& branch);
@@ -640,23 +637,6 @@ public:
     // SSE/SSE2: Useful alternative to shuffle in some cases.
     void MOVDDUP(X64Reg regOp, const OpArg& arg);
 
-    // TODO: Actually implement
-#if 0
-    // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
-    void ADDSUBPS(X64Reg dest, const OpArg& src);
-    void ADDSUBPD(X64Reg dest, const OpArg& src);
-    void HADDPD(X64Reg dest, const OpArg& src);
-    void HSUBPS(X64Reg dest, const OpArg& src);
-    void HSUBPD(X64Reg dest, const OpArg& src);
-
-    // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
-    void DPPD(X64Reg dest, const OpArg& src, u8 arg);
-
-    // These are probably useful for VFPU emulation.
-    void INSERTPS(X64Reg dest, const OpArg& src, u8 arg);
-    void EXTRACTPS(const OpArg& dest, X64Reg src, u8 arg);
-#endif
-
     // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
     void HADDPS(X64Reg dest, const OpArg& src);
 

From a796149c42f2ece4339ea08c106f02bc507cb609 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 04:34:11 -0400
Subject: [PATCH 5/6] emitter: Remove unnecessary else keywords

---
 src/common/x64/emitter.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 1229d0654e..61fe197bd4 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -1376,22 +1376,22 @@ static int GetVEXmmmmm(u16 op)
     // Currently, only 0x38 and 0x3A are used as secondary escape byte.
     if ((op >> 8) == 0x3A)
         return 3;
-    else if ((op >> 8) == 0x38)
+    if ((op >> 8) == 0x38)
         return 2;
-    else
-        return 1;
+
+    return 1;
 }
 
 static int GetVEXpp(u8 opPrefix)
 {
     if (opPrefix == 0x66)
         return 1;
-    else if (opPrefix == 0xF3)
+    if (opPrefix == 0xF3)
         return 2;
-    else if (opPrefix == 0xF2)
+    if (opPrefix == 0xF2)
         return 3;
-    else
-        return 0;
+
+    return 0;
 }
 
 void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)

From e85c5dbb54e6b2f6d14eaca42fb4b9c630614ff0 Mon Sep 17 00:00:00 2001
From: Lioncash <mathew1800@gmail.com>
Date: Thu, 20 Aug 2015 04:36:25 -0400
Subject: [PATCH 6/6] emitter: Remove unnecessary defines

---
 src/common/x64/emitter.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 61fe197bd4..cf31f8d69f 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -15,6 +15,7 @@
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/
 
+#include <cinttypes>
 #include <cstring>
 
 #include "common/assert.h"
@@ -25,11 +26,6 @@
 #include "cpu_detect.h"
 #include "emitter.h"
 
-#define PRIx64 "llx"
-
-// Minimize the diff against Dolphin
-#define DYNA_REC JIT
-
 namespace Gen
 {