Merge "Add Mips architecture to system/core/include"
diff --git a/include/arch/linux-mips/AndroidConfig.h b/include/arch/linux-mips/AndroidConfig.h
new file mode 100644
index 0000000..2d51dc7
--- /dev/null
+++ b/include/arch/linux-mips/AndroidConfig.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Android config -- "android-mips".  Used for MIPS device builds.
+ */
+#ifndef _ANDROID_CONFIG_H
+#define _ANDROID_CONFIG_H
+
+/*
+ * ===========================================================================
+ *                              !!! IMPORTANT !!!
+ * ===========================================================================
+ *
+ * This file is included by ALL C/C++ source files.  Don't put anything in
+ * here unless you are absolutely certain it can't go anywhere else.
+ *
+ * Any C++ stuff must be wrapped with "#ifdef __cplusplus".  Do not use "//"
+ * comments.
+ */
+
+/*
+ * Threading model.  Choose one:
+ *
+ * HAVE_PTHREADS - use the pthreads library.
+ * HAVE_WIN32_THREADS - use Win32 thread primitives.
+ *  -- combine HAVE_CREATETHREAD, HAVE_CREATEMUTEX, and HAVE__BEGINTHREADEX
+ */
+#define HAVE_PTHREADS
+
+/*
+ * Do we have pthread_setname_np()?
+ *
+ * (HAVE_PTHREAD_SETNAME_NP is used by WebKit to enable a function with
+ * the same name but different parameters, so we can't use that here.)
+ */
+#define HAVE_ANDROID_PTHREAD_SETNAME_NP
+
+/*
+ * Do we have the futex syscall?
+ */
+#define HAVE_FUTEX
+
+/*
+ * Define if we already have the futex wrapper functions defined. Yes if
+ * compiling against bionic.
+ */
+#define HAVE_FUTEX_WRAPPERS 1
+
+/*
+ * Process creation model.  Choose one:
+ *
+ * HAVE_FORKEXEC - use fork() and exec()
+ * HAVE_WIN32_PROC - use CreateProcess()
+ */
+#define HAVE_FORKEXEC
+
+/*
+ * Process out-of-memory adjustment.  Set if running on Linux,
+ * where we can write to /proc/<pid>/oom_adj to modify the out-of-memory
+ * badness adjustment.
+ */
+#define HAVE_OOM_ADJ
+
+/*
+ * IPC model.  Choose one:
+ *
+ * HAVE_SYSV_IPC - use the classic SysV IPC mechanisms (semget, shmget).
+ * HAVE_MACOSX_IPC - use Macintosh IPC mechanisms (sem_open, mmap).
+ * HAVE_WIN32_IPC - use Win32 IPC (CreateSemaphore, CreateFileMapping).
+ * HAVE_ANDROID_IPC - use Android versions (?, mmap).
+ */
+#define HAVE_ANDROID_IPC
+
+/*
+ * Memory-mapping model. Choose one:
+ *
+ * HAVE_POSIX_FILEMAP - use the Posix sys/mmap.h
+ * HAVE_WIN32_FILEMAP - use Win32 filemaps
+ */
+#define  HAVE_POSIX_FILEMAP
+
+/*
+ * Define this if you have <termio.h>
+ */
+#define  HAVE_TERMIO_H 1
+
+/*
+ * Define this if you have <sys/sendfile.h>
+ */
+#define  HAVE_SYS_SENDFILE_H 1
+
+/*
+ * Define this if you build against MSVCRT.DLL
+ */
+/* #define HAVE_MS_C_RUNTIME */
+
+/*
+ * Define this if you have sys/uio.h
+ */
+#define  HAVE_SYS_UIO_H 1
+
+/*
+ * Define this if your platforms implements symbolic links
+ * in its filesystems
+ */
+#define HAVE_SYMLINKS
+
+/*
+ * Define this if we have localtime_r().
+ */
+/* #define HAVE_LOCALTIME_R */
+
+/*
+ * Define this if we have gethostbyname_r().
+ */
+/* #define HAVE_GETHOSTBYNAME_R */
+
+/*
+ * Define this if we have ioctl().
+ */
+#define HAVE_IOCTL
+
+/*
+ * Define this if we want to use WinSock.
+ */
+/* #define HAVE_WINSOCK */
+
+/*
+ * Define this if have clock_gettime() and friends
+ */
+#define HAVE_POSIX_CLOCKS
+
+/*
+ * Define this if we have pthread_cond_timedwait_monotonic() and
+ * clock_gettime(CLOCK_MONOTONIC).
+ */
+#define HAVE_TIMEDWAIT_MONOTONIC
+
+/*
+ * Define this if we have linux style epoll()
+ */
+#define HAVE_EPOLL
+
+/*
+ * Endianness of the target machine.  Choose one:
+ *
+ * HAVE_ENDIAN_H -- have endian.h header we can include.
+ * HAVE_LITTLE_ENDIAN -- we are little endian.
+ * HAVE_BIG_ENDIAN -- we are big endian.
+ */
+#define HAVE_ENDIAN_H
+#if defined(__MIPSEB__)
+#define HAVE_BIG_ENDIAN
+#endif
+#if defined(__MIPSEL__)
+#define HAVE_LITTLE_ENDIAN
+#endif
+
+/*
+ * We need to choose between 32-bit and 64-bit off_t.  All of our code should
+ * agree on the same size.  For desktop systems, use 64-bit values,
+ * because some of our libraries (e.g. wxWidgets) expect to be built that way.
+ */
+/* #define _FILE_OFFSET_BITS 64 */
+/* #define _LARGEFILE_SOURCE 1 */
+
+/*
+ * Define if platform has off64_t (and lseek64 and other xxx64 functions)
+ */
+#define HAVE_OFF64_T
+
+/*
+ * Defined if we have the backtrace() call for retrieving a stack trace.
+ * Needed for CallStack to operate; if not defined, CallStack is
+ * non-functional.
+ */
+#define HAVE_BACKTRACE 0
+
+/*
+ * Defined if we have the dladdr() call for retrieving the symbol associated
+ * with a memory address.  If not defined, stack crawls will not have symbolic
+ * information.
+ */
+#define HAVE_DLADDR 1
+
+/*
+ * Defined if we have the cxxabi.h header for demangling C++ symbols.  If
+ * not defined, stack crawls will be displayed with raw mangled symbols
+ */
+#define HAVE_CXXABI 0
+
+/*
+ * Defined if we have the gettid() system call.
+ */
+#define HAVE_GETTID
+
+/*
+ * Defined if we have the sched_setscheduler() call
+ */
+#define HAVE_SCHED_SETSCHEDULER
+
+/*
+ * Add any extra platform-specific defines here.
+ */
+#ifndef __linux__
+#define __linux__ 1
+#endif
+
+#ifndef __linux
+#define __linux 1
+#endif
+
+#ifdef __unix__
+#undef __unix__
+#endif
+
+#ifdef __unix
+#undef __unix
+#endif
+
+/*
+ * Define if we have <malloc.h> header
+ */
+#define HAVE_MALLOC_H
+
+/*
+ * Define if we're running on *our* linux on device or emulator.
+ */
+#define HAVE_ANDROID_OS 1
+
+/*
+ * Define if we have Linux-style non-filesystem Unix Domain Sockets
+ */
+#define HAVE_LINUX_LOCAL_SOCKET_NAMESPACE 1
+
+/*
+ * Define if we have Linux's inotify in <sys/inotify.h>.
+ */
+#define HAVE_INOTIFY 1
+
+/*
+ * Define if we have madvise() in <sys/mman.h>
+ */
+#define HAVE_MADVISE 1
+
+/*
+ * Define if tm struct has tm_gmtoff field
+ */
+#define HAVE_TM_GMTOFF 1
+
+/*
+ * Define if dirent struct has d_type field
+ */
+#define HAVE_DIRENT_D_TYPE 1
+
+/*
+ * Define if libc includes Android system properties implementation.
+ */
+#define HAVE_LIBC_SYSTEM_PROPERTIES 1
+
+/*
+ * Define if system provides a system property server (should be
+ * mutually exclusive with HAVE_LIBC_SYSTEM_PROPERTIES).
+ */
+/* #define HAVE_SYSTEM_PROPERTY_SERVER */
+
+/*
+ * What CPU architecture does this platform use?
+ */
+#define ARCH_MIPS 1
+
+/*
+ * Define if the size of enums is as short as possible,
+ */
+/* #define HAVE_SHORT_ENUMS */
+
+/*
+ * sprintf() format string for shared library naming.
+ */
+#define OS_SHARED_LIB_FORMAT_STR    "lib%s.so"
+
+/*
+ * Do we have __memcmp16()?
+ */
+#define HAVE__MEMCMP16  1
+
+/*
+ * type for the third argument to mincore().
+ */
+#define MINCORE_POINTER_TYPE unsigned char *
+
+/*
+ * Do we have the sigaction flag SA_NOCLDWAIT?
+ */
+#define HAVE_SA_NOCLDWAIT
+
+/*
+ * The default path separator for the platform
+ */
+#define OS_PATH_SEPARATOR '/'
+
+/*
+ * Is the filesystem case sensitive?
+ */
+#define OS_CASE_SENSITIVE
+
+/*
+ * Define if <sys/socket.h> exists.
+ */
+#define HAVE_SYS_SOCKET_H 1
+
+/*
+ * Define if the strlcpy() function exists on the system.
+ */
+#define HAVE_STRLCPY 1
+
+/*
+ * Define if the open_memstream() function exists on the system.
+ */
+/* #define HAVE_OPEN_MEMSTREAM 1 */
+
+/*
+ * Define if the BSD funopen() function exists on the system.
+ */
+#define HAVE_FUNOPEN 1
+
+/*
+ * Define if prctl() exists
+ */
+#define HAVE_PRCTL 1
+
+/*
+ * Define if writev() exists
+ */
+#define HAVE_WRITEV 1
+
+/*
+ * Define if <stdint.h> exists.
+ */
+#define HAVE_STDINT_H 1
+
+/*
+ * Define if <stdbool.h> exists.
+ */
+#define HAVE_STDBOOL_H 1
+
+/*
+ * Define if <sched.h> exists.
+ */
+#define HAVE_SCHED_H 1
+
+/*
+ * Define if pread() exists
+ */
+#define HAVE_PREAD 1
+
+/*
+ * Define if we have st_mtim in struct stat
+ */
+#define HAVE_STAT_ST_MTIM 1
+
+/*
+ * Define if printf() supports %zd for size_t arguments
+ */
+#define HAVE_PRINTF_ZD 1
+
+/*
+ * Whether or not _Unwind_Context is defined as a struct.
+ */
+#define HAVE_UNWIND_CONTEXT_STRUCT 1
+
+#endif /* _ANDROID_CONFIG_H */
diff --git a/include/cutils/atomic-inline.h b/include/cutils/atomic-inline.h
index 49f3e70..64cdd9d 100644
--- a/include/cutils/atomic-inline.h
+++ b/include/cutils/atomic-inline.h
@@ -49,6 +49,8 @@
 #include <cutils/atomic-x86.h>
 #elif defined(__sh__)
 /* implementation is in atomic-android-sh.c */
+#elif defined(__mips__)
+#include <cutils/atomic-mips.h>
 #else
 #error atomic operations are unsupported
 #endif
diff --git a/include/cutils/atomic-mips.h b/include/cutils/atomic-mips.h
new file mode 100644
index 0000000..49144a3
--- /dev/null
+++ b/include/cutils/atomic-mips.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_CUTILS_ATOMIC_MIPS_H
+#define ANDROID_CUTILS_ATOMIC_MIPS_H
+
+#include <stdint.h>
+
+extern inline void android_compiler_barrier(void)
+{
+    __asm__ __volatile__ ("" : : : "memory");
+}
+
+#if ANDROID_SMP == 0
+extern inline void android_memory_barrier(void)
+{
+    android_compiler_barrier();
+}
+extern inline void android_memory_store_barrier(void)
+{
+    android_compiler_barrier();
+}
+#else
+extern inline void android_memory_barrier(void)
+{
+    __asm__ __volatile__ ("sync" : : : "memory");
+}
+extern inline void android_memory_store_barrier(void)
+{
+    __asm__ __volatile__ ("sync" : : : "memory");
+}
+#endif
+
+extern inline int32_t android_atomic_acquire_load(volatile const int32_t *ptr)
+{
+    int32_t value = *ptr;
+    android_memory_barrier();
+    return value;
+}
+
+extern inline int32_t android_atomic_release_load(volatile const int32_t *ptr)
+{
+    android_memory_barrier();
+    return *ptr;
+}
+
+extern inline void android_atomic_acquire_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    *ptr = value;
+    android_memory_barrier();
+}
+
+extern inline void android_atomic_release_store(int32_t value,
+                                                volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    *ptr = value;
+}
+
+extern inline int android_atomic_cas(int32_t old_value, int32_t new_value,
+                                     volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+        __asm__ __volatile__ (
+            "    ll     %[prev], (%[ptr])\n"
+            "    li     %[status], 1\n"
+            "    bne    %[prev], %[old], 9f\n"
+            "    move   %[status], %[new_value]\n"
+            "    sc     %[status], (%[ptr])\n"
+            "9:\n"
+            : [prev] "=&r" (prev), [status] "=&r" (status)
+            : [ptr] "r" (ptr), [old] "r" (old_value), [new_value] "r" (new_value)
+            );
+    } while (__builtin_expect(status == 0, 0));
+    return prev != old_value;
+}
+
+extern inline int android_atomic_acquire_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    int status = android_atomic_cas(old_value, new_value, ptr);
+    android_memory_barrier();
+    return status;
+}
+
+extern inline int android_atomic_release_cas(int32_t old_value,
+                                             int32_t new_value,
+                                             volatile int32_t *ptr)
+{
+    android_memory_barrier();
+    return android_atomic_cas(old_value, new_value, ptr);
+}
+
+
+extern inline int32_t android_atomic_swap(int32_t new_value,
+                                          volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    do {
+    __asm__ __volatile__ (
+        "    move %[status], %[new_value]\n"
+        "    ll %[prev], (%[ptr])\n"
+        "    sc %[status], (%[ptr])\n"
+        : [prev] "=&r" (prev), [status] "=&r" (status)
+        : [ptr] "r" (ptr), [new_value] "r" (new_value)
+        );
+    } while (__builtin_expect(status == 0, 0));
+    android_memory_barrier();
+    return prev;
+}
+
+extern inline int32_t android_atomic_add(int32_t increment,
+                                         volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ (
+        "    ll    %[prev], (%[ptr])\n"
+        "    addu  %[status], %[prev], %[inc]\n"
+        "    sc    %[status], (%[ptr])\n"
+        :  [status] "=&r" (status), [prev] "=&r" (prev)
+        :  [ptr] "r" (ptr), [inc] "Ir" (increment)
+        );
+    } while (__builtin_expect(status == 0, 0));
+    return prev;
+}
+
+extern inline int32_t android_atomic_inc(volatile int32_t *addr)
+{
+    return android_atomic_add(1, addr);
+}
+
+extern inline int32_t android_atomic_dec(volatile int32_t *addr)
+{
+    return android_atomic_add(-1, addr);
+}
+
+extern inline int32_t android_atomic_and(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ (
+        "    ll    %[prev], (%[ptr])\n"
+        "    and   %[status], %[prev], %[value]\n"
+        "    sc    %[status], (%[ptr])\n"
+        : [prev] "=&r" (prev), [status] "=&r" (status)
+        : [ptr] "r" (ptr), [value] "Ir" (value)
+            );
+    } while (__builtin_expect(status == 0, 0));
+    return prev;
+}
+
+extern inline int32_t android_atomic_or(int32_t value, volatile int32_t *ptr)
+{
+    int32_t prev, status;
+    android_memory_barrier();
+    do {
+        __asm__ __volatile__ (
+        "    ll    %[prev], (%[ptr])\n"
+        "    or    %[status], %[prev], %[value]\n"
+        "    sc    %[status], (%[ptr])\n"
+        : [prev] "=&r" (prev), [status] "=&r" (status)
+        : [ptr] "r" (ptr), [value] "Ir" (value)
+            );
+    } while (__builtin_expect(status == 0, 0));
+    return prev;
+}
+
+#endif /* ANDROID_CUTILS_ATOMIC_MIPS_H */
diff --git a/include/private/pixelflinger/ggl_context.h b/include/private/pixelflinger/ggl_context.h
index 2d7fdcf..4864d5a 100644
--- a/include/private/pixelflinger/ggl_context.h
+++ b/include/private/pixelflinger/ggl_context.h
@@ -42,10 +42,30 @@
 #else
 
 inline uint32_t GGL_RGBA_TO_HOST(uint32_t v) {
+#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+    uint32_t r;
+    __asm__("wsbh %0, %1;"
+        "rotr %0, %0, 16"
+        : "=r" (r)
+        : "r" (v)
+        );
+    return r;
+#else
     return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00);
+#endif
 }
 inline uint32_t GGL_HOST_TO_RGBA(uint32_t v) {
+#if defined(__mips__) && __mips==32 && __mips_isa_rev>=2
+    uint32_t r;
+    __asm__("wsbh %0, %1;"
+        "rotr %0, %0, 16"
+        : "=r" (r)
+        : "r" (v)
+        );
+    return r;
+#else
     return (v<<24) | (v>>24) | ((v<<8)&0xff0000) | ((v>>8)&0xff00);
+#endif
 }
 
 #endif
diff --git a/include/private/pixelflinger/ggl_fixed.h b/include/private/pixelflinger/ggl_fixed.h
index 96fdb32..217ec04 100644
--- a/include/private/pixelflinger/ggl_fixed.h
+++ b/include/private/pixelflinger/ggl_fixed.h
@@ -190,6 +190,272 @@
         );
     return res;
 }
+#elif defined(__mips__)
+
+/*inline MIPS implementations*/
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
+inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
+    GGLfixed result,tmp,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+            asm ("mult %[a], %[b] \t\n"
+              "mflo  %[res]   \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp)
+            : [a]"r"(a),[b]"r"(b)
+            : "%hi","%lo"
+            );
+        } else if (shift == 32)
+        {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1\t\n"
+            "sll  %[tmp],%[tmp],0x1f\t\n"
+            "mflo %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp]\t\n"   /*obit*/
+            "sra %[tmp],%[tmp],0x1f \t\n"
+            "mfhi  %[res]   \t\n"
+            "addu %[res],%[res],%[tmp]\t\n"
+            "addu %[res],%[res],%[tmp1]\t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
+            : [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
+            : "%hi","%lo"
+            );
+        } else if ((shift >0) && (shift < 32))
+        {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "mflo  %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "mfhi  %[tmp]   \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"
+            "sll   %[tmp],%[tmp],%[lshift] \t\n"
+            "srl   %[res],%[res],%[rshift]    \t\n"
+            "or    %[res],%[res],%[tmp] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
+            : "%hi","%lo"
+            );
+        } else {
+            asm ("mult %[a], %[b] \t\n"
+            "li  %[tmp],1 \t\n"
+            "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+            "mflo  %[res]   \t\n"
+            "addu %[tmp1],%[tmp],%[res] \t\n"
+            "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+            "sra  %[tmp2],%[tmp],0x1f \t\n"
+            "addu  %[res],%[res],%[tmp] \t\n"
+            "mfhi  %[tmp]   \t\n"
+            "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+            "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+            "srl   %[tmp2],%[res],%[rshift]    \t\n"
+            "srav  %[res], %[tmp],%[rshift]\t\n"
+            "sll   %[tmp],%[tmp],1 \t\n"
+            "sll   %[tmp],%[tmp],%[norbits] \t\n"
+            "or    %[tmp],%[tmp],%[tmp2] \t\n"
+            "movz  %[res],%[tmp],%[bit5] \t\n"
+            : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+            : [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
+            : "%hi","%lo"
+            );
+        }
+    } else {
+        asm ("mult %[a], %[b] \t\n"
+        "li  %[tmp],1 \t\n"
+        "sll  %[tmp],%[tmp],%[shiftm1] \t\n"
+        "mflo  %[res]   \t\n"
+        "addu %[tmp1],%[tmp],%[res] \t\n"
+        "sltu %[tmp1],%[tmp1],%[tmp] \t\n"  /*obit?*/
+        "sra  %[tmp2],%[tmp],0x1f \t\n"
+        "addu  %[res],%[res],%[tmp] \t\n"
+        "mfhi  %[tmp]   \t\n"
+        "addu  %[tmp],%[tmp],%[tmp2] \t\n"
+        "addu  %[tmp],%[tmp],%[tmp1] \t\n"            /*tmp=hi*/
+        "srl   %[tmp2],%[res],%[rshift]    \t\n"
+        "srav  %[res], %[tmp],%[rshift]\t\n"
+        "sll   %[tmp],%[tmp],1 \t\n"
+        "sll   %[tmp],%[tmp],%[norbits] \t\n"
+        "or    %[tmp],%[tmp],%[tmp2] \t\n"
+        "movz  %[res],%[tmp],%[bit5] \t\n"
+         : [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+         : [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
+         : "%hi","%lo"
+         );
+        }
+
+        return result;
+}
+
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mult %[a], %[b] \t\n"
+                 "mflo  %[lo]   \t\n"
+                 "addu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 : "%hi","%lo"
+                 );
+                } else if (shift == 32) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mfhi  %[lo]   \t\n"
+                    "addu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    : "%hi","%lo"
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                } else {
+                    asm ("mult %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[shift]    \t\n"
+                    "sll   %[tmp2],%[t],1     \t\n"
+                    "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                    "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                    "srav  %[res],%[t],%[shift]     \t\n"
+                    "andi %[tmp2],%[shift],0x20\t\n"
+                    "movz %[res],%[tmp1],%[tmp2]\t\n"
+                    "addu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                }
+            } else {
+                asm ("mult %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "mflo  %[res]   \t\n"
+                "mfhi  %[t]   \t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "movz %[res],%[tmp1],%[tmp2]\t\n"
+                "addu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                : "%hi","%lo"
+                );
+            }
+            return result;
+}
+
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
+inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
+    GGLfixed result,t,tmp1,tmp2;
+
+    if (__builtin_constant_p(shift)) {
+        if (shift == 0) {
+                 asm ("mult %[a], %[b] \t\n"
+                 "mflo  %[lo]   \t\n"
+                 "subu  %[lo],%[lo],%[c]    \t\n"
+                 : [lo]"=&r"(result)
+                 : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                 : "%hi","%lo"
+                 );
+                } else if (shift == 32) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mfhi  %[lo]   \t\n"
+                    "subu  %[lo],%[lo],%[c]    \t\n"
+                    : [lo]"=&r"(result)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+                    : "%hi","%lo"
+                    );
+                } else if ((shift>0) && (shift<32)) {
+                    asm ("mult %[a], %[b] \t\n"
+                    "mflo  %[res]   \t\n"
+                    "mfhi  %[t]   \t\n"
+                    "srl   %[res],%[res],%[rshift]    \t\n"
+                    "sll   %[t],%[t],%[lshift]     \t\n"
+                    "or  %[res],%[res],%[t]    \t\n"
+                    "subu  %[res],%[res],%[c]    \t\n"
+                    : [res]"=&r"(result),[t]"=&r"(t)
+                    : [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
+                    : "%hi","%lo"
+                    );
+                } else {
+                    asm ("mult %[a], %[b] \t\n"
+                    "nor %[tmp1],$zero,%[shift]\t\n"
+                     "mflo  %[res]   \t\n"
+                     "mfhi  %[t]   \t\n"
+                     "srl   %[res],%[res],%[shift]    \t\n"
+                     "sll   %[tmp2],%[t],1     \t\n"
+                     "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                     "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                     "srav  %[res],%[t],%[shift]     \t\n"
+                     "andi %[tmp2],%[shift],0x20\t\n"
+                     "movz %[res],%[tmp1],%[tmp2]\t\n"
+                     "subu  %[res],%[res],%[c]    \t\n"
+                     : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                     : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
+                     : "%hi","%lo"
+                     );
+                    }
+                } else {
+                asm ("mult %[a], %[b] \t\n"
+                "nor %[tmp1],$zero,%[shift]\t\n"
+                "mflo  %[res]   \t\n"
+                "mfhi  %[t]   \t\n"
+                "srl   %[res],%[res],%[shift]    \t\n"
+                "sll   %[tmp2],%[t],1     \t\n"
+                "sllv  %[tmp2],%[tmp2],%[tmp1]     \t\n"
+                "or  %[tmp1],%[tmp2],%[res]    \t\n"
+                "srav  %[res],%[t],%[shift]     \t\n"
+                "andi %[tmp2],%[shift],0x20\t\n"
+                "movz %[res],%[tmp1],%[tmp2]\t\n"
+                "subu  %[res],%[res],%[c]    \t\n"
+                : [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
+                : [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
+                : "%hi","%lo"
+                );
+            }
+    return result;
+}
+
+inline int64_t gglMulii(int32_t x, int32_t y) CONST;
+inline int64_t gglMulii(int32_t x, int32_t y) {
+    union {
+        struct {
+#if defined(__MIPSEL__)
+            int32_t lo;
+            int32_t hi;
+#elif defined(__MIPSEB__)
+            int32_t hi;
+            int32_t lo;
+#endif
+        } s;
+        int64_t res;
+    }u;
+    asm("mult %2, %3 \t\n"
+        "mfhi %1   \t\n"
+        "mflo %0   \t\n"
+        : "=r"(u.s.lo), "=&r"(u.s.hi)
+        : "%r"(x), "r"(y)
+	: "%hi","%lo"
+        );
+    return u.res;
+}
 
 #else // ----------------------------------------------------------------------
 
@@ -232,7 +498,7 @@
 inline int32_t gglClz(int32_t x) CONST;
 inline int32_t gglClz(int32_t x)
 {
-#if defined(__arm__) && !defined(__thumb__)
+#if (defined(__arm__) && !defined(__thumb__)) || defined(__mips__)
     return __builtin_clz(x);
 #else
     if (!x) return 32;