am 7fe10ded: am cadb10c9: Merge "Pixelflinger: Support for handling 64-bit addresses in GGL Assembler"

* commit '7fe10dedfdefe952602d3507db3109eaa94d4068':
  Pixelflinger: Support for handling 64-bit addresses in GGL Assembler
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
index 073633c..5041999 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.cpp
@@ -61,6 +61,29 @@
             ((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
 }
 
+// The following four functions are required for address manipulation
+// These are virtual functions, which can be overridden by architectures
+// that need special handling of address values (e.g. 64-bit arch)
 
+void ARMAssemblerInterface::ADDR_LDR(int cc, int Rd,
+     int Rn, uint32_t offset)
+{
+    LDR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerInterface::ADDR_STR(int cc, int Rd,
+     int Rn, uint32_t offset)
+{
+    STR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerInterface::ADDR_ADD(int cc, int s,
+     int Rd, int Rn, uint32_t Op2)
+{
+    dataProcessing(opADD, cc, s, Rd, Rn, Op2);
+}
+void ARMAssemblerInterface::ADDR_SUB(int cc, int s,
+     int Rd, int Rn, uint32_t Op2)
+{
+    dataProcessing(opSUB, cc, s, Rd, Rn, Op2);
+}
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerInterface.h b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
index 9991980..e5a9a26 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerInterface.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerInterface.h
@@ -331,6 +331,16 @@
     inline void
     SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) {
         SMLAW(cc, yT, Rd, Rm, Rs, Rn);    }
+
+    // Address loading/storing/manipulation
+    virtual void ADDR_LDR(int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_STR (int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_ADD(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_SUB(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
 };
 
 }; // namespace android
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
index 1c7bc76..816de48 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.cpp
@@ -294,5 +294,18 @@
     mTarget->UBFX(cc, Rd, Rn, lsb, width);
 }
 
+void ARMAssemblerProxy::ADDR_LDR(int cc, int Rd, int Rn, uint32_t offset) {
+     mTarget->ADDR_LDR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::ADDR_STR(int cc, int Rd, int Rn, uint32_t offset) {
+     mTarget->ADDR_STR(cc, Rd, Rn, offset);
+}
+void ARMAssemblerProxy::ADDR_ADD(int cc, int s, int Rd, int Rn, uint32_t Op2){
+     mTarget->ADDR_ADD(cc, s, Rd, Rn, Op2);
+}
+void ARMAssemblerProxy::ADDR_SUB(int cc, int s, int Rd, int Rn, uint32_t Op2){
+     mTarget->ADDR_SUB(cc, s, Rd, Rn, Op2);
+}
+
 }; // namespace android
 
diff --git a/libpixelflinger/codeflinger/ARMAssemblerProxy.h b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
index 70cb464..b852794 100644
--- a/libpixelflinger/codeflinger/ARMAssemblerProxy.h
+++ b/libpixelflinger/codeflinger/ARMAssemblerProxy.h
@@ -146,6 +146,15 @@
     virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
     virtual void UBFX(int cc, int Rd, int Rn, int lsb, int width);
 
+    virtual void ADDR_LDR(int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_STR (int cc, int Rd,
+                int Rn, uint32_t offset = __immed12_pre(0));
+    virtual void ADDR_ADD(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+    virtual void ADDR_SUB(int cc, int s, int Rd,
+                int Rn, uint32_t Op2);
+
 private:
     ARMAssemblerInterface*  mTarget;
 };
diff --git a/libpixelflinger/codeflinger/GGLAssembler.cpp b/libpixelflinger/codeflinger/GGLAssembler.cpp
index 0cb042e..725495f 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.cpp
+++ b/libpixelflinger/codeflinger/GGLAssembler.cpp
@@ -263,7 +263,7 @@
                 const int mask = GGL_DITHER_SIZE-1;
                 parts.dither = reg_t(regs.obtain());
                 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
-                ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
+                ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg);
                 LDRB(AL, parts.dither.reg, parts.dither.reg,
                         immed12_pre(GGL_OFFSETOF(ditherMatrix)));
             }
@@ -336,7 +336,7 @@
         build_iterate_z(parts);
         build_iterate_f(parts);
         if (!mAllMasked) {
-            ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
+            ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
         }
         SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
         B(PL, "fragment_loop");
@@ -392,7 +392,7 @@
         int Rs = scratches.obtain();
         parts.cbPtr.setTo(obtainReg(), cb_bits);
         CONTEXT_LOAD(Rs, state.buffers.color.stride);
-        CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
+        CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data);
         SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
         base_offset(parts.cbPtr, parts.cbPtr, Rs);
         scratches.recycle(Rs);
@@ -428,11 +428,11 @@
         int Rs = dzdx;
         int zbase = scratches.obtain();
         CONTEXT_LOAD(Rs, state.buffers.depth.stride);
-        CONTEXT_LOAD(zbase, state.buffers.depth.data);
+        CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data);
         SMLABB(AL, Rs, Ry, Rs, Rx);
         ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
-        ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
-        CONTEXT_STORE(zbase, generated_vars.zbase);
+        ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
+        CONTEXT_ADDR_STORE(zbase, generated_vars.zbase);
     }
 
     // init texture coordinates
@@ -445,8 +445,8 @@
     // init coverage factor application (anti-aliasing)
     if (mAA) {
         parts.covPtr.setTo(obtainReg(), 16);
-        CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
-        ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
+        CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage);
+        ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
     }
 }
 
@@ -765,8 +765,8 @@
         int depth = scratches.obtain();
         int z = parts.z.reg;
         
-        CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
-        SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
+        CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase);  // stall
+        ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
             // above does zbase = zbase + ((count >> 16) << 1)
 
         if (mask & Z_TEST) {
@@ -990,22 +990,22 @@
 {
     switch (b.size) {
     case 32:
-        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
+        ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
         break;
     case 24:
         if (d.reg == b.reg) {
-            ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
-            ADD(AL, 0, d.reg, d.reg, o.reg);
+            ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+            ADDR_ADD(AL, 0, d.reg, d.reg, o.reg);
         } else {
-            ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
-            ADD(AL, 0, d.reg, d.reg, b.reg);
+            ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
+            ADDR_ADD(AL, 0, d.reg, d.reg, b.reg);
         }
         break;
     case 16:
-        ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
+        ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
         break;
     case 8:
-        ADD(AL, 0, d.reg, b.reg, o.reg);
+        ADDR_ADD(AL, 0, d.reg, b.reg, o.reg);
         break;
     }
 }
diff --git a/libpixelflinger/codeflinger/GGLAssembler.h b/libpixelflinger/codeflinger/GGLAssembler.h
index d993684..9db20df 100644
--- a/libpixelflinger/codeflinger/GGLAssembler.h
+++ b/libpixelflinger/codeflinger/GGLAssembler.h
@@ -31,6 +31,12 @@
 
 // ----------------------------------------------------------------------------
 
+#define CONTEXT_ADDR_LOAD(REG, FIELD) \
+    ADDR_LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
+#define CONTEXT_ADDR_STORE(REG, FIELD) \
+    ADDR_STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
+
 #define CONTEXT_LOAD(REG, FIELD) \
     LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
 
diff --git a/libpixelflinger/codeflinger/texturing.cpp b/libpixelflinger/codeflinger/texturing.cpp
index 9e3d217..b2cfbb3 100644
--- a/libpixelflinger/codeflinger/texturing.cpp
+++ b/libpixelflinger/codeflinger/texturing.cpp
@@ -356,7 +356,7 @@
             // merge base & offset
             CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride);
             SMLABB(AL, Rx, Ry, txPtr.reg, Rx);               // x+y*stride
-            CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+            CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
             base_offset(txPtr, txPtr, Rx);
         } else {
             Scratch scratches(registerFile());
@@ -629,7 +629,7 @@
                 return;
 
             CONTEXT_LOAD(stride,    generated_vars.texture[i].stride);
-            CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);
+            CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data);
             SMLABB(AL, u, v, stride, u);    // u+v*stride 
             base_offset(txPtr, txPtr, u);