auto import from //depot/cupcake/@135843
diff --git a/libpixelflinger/fixed.cpp b/libpixelflinger/fixed.cpp
new file mode 100644
index 0000000..5b92062
--- /dev/null
+++ b/libpixelflinger/fixed.cpp
@@ -0,0 +1,339 @@
+/* libs/pixelflinger/fixed.cpp
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License"); 
+** you may not use this file except in compliance with the License. 
+** You may obtain a copy of the License at 
+**
+**     http://www.apache.org/licenses/LICENSE-2.0 
+**
+** Unless required by applicable law or agreed to in writing, software 
+** distributed under the License is distributed on an "AS IS" BASIS, 
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+** See the License for the specific language governing permissions and 
+** limitations under the License.
+*/
+
+#include <stdio.h>
+
+#include <private/pixelflinger/ggl_context.h>
+#include <private/pixelflinger/ggl_fixed.h>
+
+
+// ------------------------------------------------------------------------
+
+int32_t gglRecipQNormalized(int32_t x, int* exponent)
+{
+    const int32_t s = x>>31;
+    uint32_t a = s ? -x : x;
+
+    // the result will overflow, so just set it to the biggest/inf value
+    if (ggl_unlikely(a <= 2LU)) {
+        *exponent = 0;
+        return s ? FIXED_MIN : FIXED_MAX;
+    }
+
+    // Newton-Raphson iteration:
+    // x = r*(2 - a*r)
+
+    const int32_t lz = gglClz(a);
+    a <<= lz;  // 0.32
+    uint32_t r = a;
+    // note: if a == 0x80000000, this means x was a power-of-2, in this
+    // case we don't need to compute anything. We get the reciprocal for
+    // (almost) free.
+    if (a != 0x80000000) {
+        r = (0x2E800 << (30-16)) - (r>>(2-1)); // 2.30, r = 2.90625 - 2*a
+        // 0.32 + 2.30 = 2.62 -> 2.30
+        // 2.30 + 2.30 = 4.60 -> 2.30
+        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
+        r = (((2LU<<30) - uint32_t((uint64_t(a)*r) >> 32)) * uint64_t(r)) >> 30;
+    }
+
+    // shift right 1-bit to make room for the sign bit
+    *exponent = 30-lz-1;
+    r >>= 1;
+    return s ? -r : r;
+}
+
+int32_t gglRecipQ(GGLfixed x, int q)
+{
+    int shift;
+    x = gglRecipQNormalized(x, &shift);
+    shift += 16-q;
+    x += 1L << (shift-1);   // rounding
+    x >>= shift;
+    return x;
+}    
+
+// ------------------------------------------------------------------------
+
+GGLfixed gglFastDivx(GGLfixed n, GGLfixed d)
+{
+    if ((d>>24) && ((d>>24)+1)) {
+        n >>= 8;
+        d >>= 8;
+    }
+    return gglMulx(n, gglRecip(d));
+}
+
+// ------------------------------------------------------------------------
+
+static const GGLfixed ggl_sqrt_reciproc_approx_tab[8] = {
+    // 1/sqrt(x) with x = 1-N/16, N=[8...1]
+    0x16A09, 0x15555, 0x143D1, 0x134BF, 0x1279A, 0x11C01, 0x111AC, 0x10865
+};
+
+GGLfixed gglSqrtRecipx(GGLfixed x)
+{
+    if (x == 0)         return FIXED_MAX;
+    if (x == FIXED_ONE) return x;
+    const GGLfixed a = x;
+    const int32_t lz = gglClz(x);
+    x = ggl_sqrt_reciproc_approx_tab[(a>>(28-lz))&0x7];
+    const int32_t exp = lz - 16;
+    if (exp <= 0)   x >>= -exp>>1;
+    else            x <<= (exp>>1) + (exp & 1);        
+    if (exp & 1) {
+        x = gglMulx(x, ggl_sqrt_reciproc_approx_tab[0])>>1;
+    }
+    // 2 Newton-Raphson iterations: x = x/2*(3-(a*x)*x)
+    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
+    x = gglMulx((x>>1),(0x30000 - gglMulx(gglMulx(a,x),x)));
+    return x;
+}
+
+GGLfixed gglSqrtx(GGLfixed a)
+{
+    // Compute a full precision square-root (24 bits accuracy)
+    GGLfixed r = 0;
+    GGLfixed bit = 0x800000;
+    int32_t bshift = 15;
+    do {
+        GGLfixed temp = bit + (r<<1);
+        if (bshift >= 8)    temp <<= (bshift-8);
+        else                temp >>= (8-bshift);
+        if (a >= temp) {
+            r += bit;
+            a -= temp;
+        }
+        bshift--;
+    } while (bit>>=1);
+    return r;
+}
+
+// ------------------------------------------------------------------------
+
+static const GGLfixed ggl_log_approx_tab[] = {
+    // -ln(x)/ln(2) with x = N/16, N=[8...16]
+    0xFFFF, 0xd47f, 0xad96, 0x8a62, 0x6a3f, 0x4caf, 0x3151, 0x17d6, 0x0000
+};
+
+static const GGLfixed ggl_alog_approx_tab[] = { // domain [0 - 1.0]
+	0xffff, 0xeac0, 0xd744, 0xc567, 0xb504, 0xa5fe, 0x9837, 0x8b95, 0x8000
+};
+
+GGLfixed gglPowx(GGLfixed x, GGLfixed y)
+{
+    // prerequisite: 0 <= x <= 1, and y >=0
+
+    // pow(x,y) = 2^(y*log2(x))
+    // =  2^(y*log2(x*(2^exp)*(2^-exp))))
+    // =  2^(y*(log2(X)-exp))
+    // =  2^(log2(X)*y - y*exp)
+    // =  2^( - (-log2(X)*y + y*exp) )
+    
+    int32_t exp = gglClz(x) - 16;
+    GGLfixed f = x << exp;
+    x = (f & 0x0FFF)<<4;
+    f = (f >> 12) & 0x7;
+    GGLfixed p = gglMulAddx(
+            ggl_log_approx_tab[f+1] - ggl_log_approx_tab[f], x,
+            ggl_log_approx_tab[f]);
+    p = gglMulAddx(p, y, y*exp);
+    exp = gglFixedToIntFloor(p);
+    if (exp < 31) {
+        p = gglFracx(p);
+        x = (p & 0x1FFF)<<3;
+        p >>= 13;    
+        p = gglMulAddx(
+                ggl_alog_approx_tab[p+1] - ggl_alog_approx_tab[p], x,
+                ggl_alog_approx_tab[p]);
+        p >>= exp;
+    } else {
+        p = 0;
+    }
+    return p;
+        // ( powf((a*65536.0f), (b*65536.0f)) ) * 65536.0f;
+}
+
+// ------------------------------------------------------------------------
+
+int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i)
+{
+    //int32_t r =int32_t((int64_t(n)<<i)/d);
+    const int32_t ds = n^d;
+    if (n<0) n = -n;
+    if (d<0) d = -d;
+    int nd = gglClz(d) - gglClz(n);
+    i += nd + 1;
+    if (nd > 0) d <<= nd;
+    else        n <<= -nd;
+    uint32_t q = 0;
+
+    int j = i & 7;
+    i >>= 3;
+
+    // gcc deals with the code below pretty well.
+    // we get 3.75 cycles per bit in the main loop
+    // and 8 cycles per bit in the termination loop
+    if (ggl_likely(i)) {
+        n -= d;
+        do {
+            q <<= 8;
+            if (n>=0)   q |= 128;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 64;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 32;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 16;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 8;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 4;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 2;
+            else        n += d;
+            n = n*2 - d;
+            if (n>=0)   q |= 1;
+            else        n += d;
+            
+            if (--i == 0)
+                goto finish;
+
+            n = n*2 - d;
+        } while(true);
+        do {
+            q <<= 1;
+            n = n*2 - d;
+            if (n>=0)   q |= 1;
+            else        n += d;
+        finish: ;
+        } while (j--);
+        return (ds<0) ? -q : q;
+    }
+
+    n -= d;
+    if (n>=0)   q |= 1;
+    else        n += d;
+    j--;
+    goto finish;
+}
+
+// ------------------------------------------------------------------------
+
+// assumes that the int32_t values of a, b, and c are all positive
+// use when both a and b are larger than c
+
+template <typename T>
+static inline void swap(T& a, T& b) {
+    T t(a);
+    a = b;
+    b = t;
+}
+
+static __attribute__((noinline))
+int32_t slow_muldiv(uint32_t a, uint32_t b, uint32_t c)
+{
+	// first we compute a*b as a 64-bit integer
+    // (GCC generates umull with the code below)
+    uint64_t ab = uint64_t(a)*b;
+    uint32_t hi = ab>>32;
+    uint32_t lo = ab;
+    uint32_t result;
+
+	// now perform the division
+	if (hi >= c) {
+	overflow:
+		result = 0x7fffffff;  // basic overflow
+	} else if (hi == 0) {
+		result = lo/c;  // note: c can't be 0
+		if ((result >> 31) != 0)  // result must fit in 31 bits
+			goto overflow;
+	} else {
+		uint32_t r = hi;
+		int bits = 31;
+	    result = 0;
+		do {
+			r = (r << 1) | (lo >> 31);
+			lo <<= 1;
+			result <<= 1;
+			if (r >= c) {
+				r -= c;
+				result |= 1;
+			}
+		} while (bits--);
+	}
+	return int32_t(result);
+}
+
+// assumes a >= 0 and c >= b >= 0
+static inline
+int32_t quick_muldiv(int32_t a, int32_t b, int32_t c)
+{
+    int32_t r = 0, q = 0, i;
+    int leading = gglClz(a);
+    i = 32 - leading;
+    a <<= leading;
+    do {
+        r <<= 1;
+        if (a < 0)
+            r += b;
+        a <<= 1;
+        q <<= 1;
+        if (r >= c) {
+            r -= c;
+            q++;
+        }
+        asm(""::); // gcc generates better code this way
+        if (r >= c) {
+            r -= c;
+            q++;
+        }
+    }
+    while (--i);
+    return q;
+}
+
+// this function computes a*b/c with 64-bit intermediate accuracy
+// overflows (e.g. division by 0) are handled and return INT_MAX
+
+int32_t gglMulDivi(int32_t a, int32_t b, int32_t c)
+{
+	int32_t result;
+	int32_t sign = a^b^c;
+
+	if (a < 0) a = -a;
+	if (b < 0) b = -b;
+	if (c < 0) c = -c;
+
+    if (a < b) {
+        swap(a, b);
+    }
+    
+	if (b <= c) result = quick_muldiv(a, b, c);
+	else        result = slow_muldiv((uint32_t)a, (uint32_t)b, (uint32_t)c);
+	
+	if (sign < 0)
+		result = -result;
+	  
+    return result;
+}