Blame - include/private/pixelflinger/ggl_fixed.h - AOSPA/android_system_core

blob: 217ec04b294e2ed79efc4254af9428bb4780832c [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2005 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ANDROID_GGL_FIXED_H
				18	#define ANDROID_GGL_FIXED_H
				19
				20	#include <math.h>
				21	#include <pixelflinger/pixelflinger.h>
				22
				23	// ----------------------------------------------------------------------------
				24
				25	#define CONST __attribute__((const))
				26	#define ALWAYS_INLINE __attribute__((always_inline))
				27
				28	const GGLfixed FIXED_BITS = 16;
				29	const GGLfixed FIXED_EPSILON = 1;
				30	const GGLfixed FIXED_ONE = 1L<<FIXED_BITS;
				31	const GGLfixed FIXED_HALF = 1L<<(FIXED_BITS-1);
				32	const GGLfixed FIXED_MIN = 0x80000000L;
				33	const GGLfixed FIXED_MAX = 0x7FFFFFFFL;
				34
				35	inline GGLfixed gglIntToFixed(GGLfixed i) ALWAYS_INLINE ;
				36	inline GGLfixed gglFixedToIntRound(GGLfixed f) ALWAYS_INLINE ;
				37	inline GGLfixed gglFixedToIntFloor(GGLfixed f) ALWAYS_INLINE ;
				38	inline GGLfixed gglFixedToIntCeil(GGLfixed f) ALWAYS_INLINE ;
				39	inline GGLfixed gglFracx(GGLfixed v) ALWAYS_INLINE ;
				40	inline GGLfixed gglFloorx(GGLfixed v) ALWAYS_INLINE ;
				41	inline GGLfixed gglCeilx(GGLfixed v) ALWAYS_INLINE ;
				42	inline GGLfixed gglCenterx(GGLfixed v) ALWAYS_INLINE ;
				43	inline GGLfixed gglRoundx(GGLfixed v) ALWAYS_INLINE ;
				44
				45	GGLfixed gglIntToFixed(GGLfixed i) {
				46	return i<<FIXED_BITS;
				47	}
				48	GGLfixed gglFixedToIntRound(GGLfixed f) {
				49	return (f + FIXED_HALF)>>FIXED_BITS;
				50	}
				51	GGLfixed gglFixedToIntFloor(GGLfixed f) {
				52	return f>>FIXED_BITS;
				53	}
				54	GGLfixed gglFixedToIntCeil(GGLfixed f) {
				55	return (f + ((1<<FIXED_BITS) - 1))>>FIXED_BITS;
				56	}
				57
				58	GGLfixed gglFracx(GGLfixed v) {
				59	return v & ((1<<FIXED_BITS)-1);
				60	}
				61	GGLfixed gglFloorx(GGLfixed v) {
				62	return gglFixedToIntFloor(v)<<FIXED_BITS;
				63	}
				64	GGLfixed gglCeilx(GGLfixed v) {
				65	return gglFixedToIntCeil(v)<<FIXED_BITS;
				66	}
				67	GGLfixed gglCenterx(GGLfixed v) {
				68	return gglFloorx(v + FIXED_HALF) \| FIXED_HALF;
				69	}
				70	GGLfixed gglRoundx(GGLfixed v) {
				71	return gglFixedToIntRound(v)<<FIXED_BITS;
				72	}
				73
				74	// conversion from (unsigned) int, short, byte to fixed...
				75	#define GGL_B_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<10 )
				76	#define GGL_S_TO_X(_x) GGLfixed( ((int32_t(_x)+1)>>1)<<2 )
				77	#define GGL_I_TO_X(_x) GGLfixed( ((int32_t(_x)>>1)+1)>>14 )
				78	#define GGL_UB_TO_X(_x) GGLfixed( uint32_t(_x) + \
				79	(uint32_t(_x)<<8) + \
				80	(uint32_t(_x)>>7) )
				81	#define GGL_US_TO_X(_x) GGLfixed( (_x) + ((_x)>>15) )
				82	#define GGL_UI_TO_X(_x) GGLfixed( (((_x)>>1)+1)>>15 )
				83
				84	// ----------------------------------------------------------------------------
				85
				86	GGLfixed gglPowx(GGLfixed x, GGLfixed y) CONST;
				87	GGLfixed gglSqrtx(GGLfixed a) CONST;
				88	GGLfixed gglSqrtRecipx(GGLfixed x) CONST;
				89	GGLfixed gglFastDivx(GGLfixed n, GGLfixed d) CONST;
				90	int32_t gglMulDivi(int32_t a, int32_t b, int32_t c);
				91
				92	int32_t gglRecipQNormalized(int32_t x, int* exponent);
				93	int32_t gglRecipQ(GGLfixed x, int q) CONST;
				94
				95	inline GGLfixed gglRecip(GGLfixed x) CONST;
				96	inline GGLfixed gglRecip(GGLfixed x) {
				97	return gglRecipQ(x, 16);
				98	}
				99
				100	inline GGLfixed gglRecip28(GGLfixed x) CONST;
				101	int32_t gglRecip28(GGLfixed x) {
				102	return gglRecipQ(x, 28);
				103	}
				104
				105	// ----------------------------------------------------------------------------
				106
				107	#if defined(__arm__) && !defined(__thumb__)
				108
				109	// inline ARM implementations
				110	inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) CONST;
				111	inline GGLfixed gglMulx(GGLfixed x, GGLfixed y, int shift) {
				112	GGLfixed result, t;
				113	if (__builtin_constant_p(shift)) {
				114	asm("smull %[lo], %[hi], %[x], %[y] \n"
				115	"movs %[lo], %[lo], lsr %[rshift] \n"
				116	"adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
				117	: [lo]"=r"(result), [hi]"=r"(t), [x]"=r"(x)
				118	: "%[x]"(x), [y]"r"(y), [lshift] "I"(32-shift), [rshift] "I"(shift)
				119	: "cc"
				120	);
				121	} else {
				122	asm("smull %[lo], %[hi], %[x], %[y] \n"
				123	"movs %[lo], %[lo], lsr %[rshift] \n"
				124	"adc %[lo], %[lo], %[hi], lsl %[lshift] \n"
				125	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				126	: "%[x]"(x), [y]"r"(y), [lshift] "r"(32-shift), [rshift] "r"(shift)
				127	: "cc"
				128	);
				129	}
				130	return result;
				131	}
				132
				133	inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				134	inline GGLfixed gglMulAddx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
				135	GGLfixed result, t;
				136	if (__builtin_constant_p(shift)) {
				137	asm("smull %[lo], %[hi], %[x], %[y] \n"
				138	"add %[lo], %[a], %[lo], lsr %[rshift] \n"
				139	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				140	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				141	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
				142	);
				143	} else {
				144	asm("smull %[lo], %[hi], %[x], %[y] \n"
				145	"add %[lo], %[a], %[lo], lsr %[rshift] \n"
				146	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				147	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				148	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
				149	);
				150	}
				151	return result;
				152	}
				153
				154	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) CONST;
				155	inline GGLfixed gglMulSubx(GGLfixed x, GGLfixed y, GGLfixed a, int shift) {
				156	GGLfixed result, t;
				157	if (__builtin_constant_p(shift)) {
				158	asm("smull %[lo], %[hi], %[x], %[y] \n"
				159	"rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
				160	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				161	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				162	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "I"(32-shift), [rshift] "I"(shift)
				163	);
				164	} else {
				165	asm("smull %[lo], %[hi], %[x], %[y] \n"
				166	"rsb %[lo], %[a], %[lo], lsr %[rshift] \n"
				167	"add %[lo], %[lo], %[hi], lsl %[lshift] \n"
				168	: [lo]"=&r"(result), [hi]"=&r"(t), [x]"=&r"(x)
				169	: "%[x]"(x), [y]"r"(y), [a]"r"(a), [lshift] "r"(32-shift), [rshift] "r"(shift)
				170	);
				171	}
				172	return result;
				173	}
				174
				175	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				176	inline int64_t gglMulii(int32_t x, int32_t y)
				177	{
				178	// 64-bits result: r0=low, r1=high
				179	union {
				180	struct {
				181	int32_t lo;
				182	int32_t hi;
				183	} s;
				184	int64_t res;
				185	};
				186	asm("smull %0, %1, %2, %3 \n"
				187	: "=r"(s.lo), "=&r"(s.hi)
				188	: "%r"(x), "r"(y)
				189	:
				190	);
				191	return res;
				192	}
Duane Sand	0960411	2012-05-24 17:40:21 -0700	[diff] [blame]	193	#elif defined(__mips__)
				194
				195	/inline MIPS implementations/
				196	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
				197	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
				198	GGLfixed result,tmp,tmp1,tmp2;
				199
				200	if (__builtin_constant_p(shift)) {
				201	if (shift == 0) {
				202	asm ("mult %[a], %[b] \t\n"
				203	"mflo %[res] \t\n"
				204	: [res]"=&r"(result),[tmp]"=&r"(tmp)
				205	: [a]"r"(a),[b]"r"(b)
				206	: "%hi","%lo"
				207	);
				208	} else if (shift == 32)
				209	{
				210	asm ("mult %[a], %[b] \t\n"
				211	"li %[tmp],1\t\n"
				212	"sll %[tmp],%[tmp],0x1f\t\n"
				213	"mflo %[res] \t\n"
				214	"addu %[tmp1],%[tmp],%[res] \t\n"
				215	"sltu %[tmp1],%[tmp1],%[tmp]\t\n" /obit/
				216	"sra %[tmp],%[tmp],0x1f \t\n"
				217	"mfhi %[res] \t\n"
				218	"addu %[res],%[res],%[tmp]\t\n"
				219	"addu %[res],%[res],%[tmp1]\t\n"
				220	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1)
				221	: [a]"r"(a),[b]"r"(b),[shift]"I"(shift)
				222	: "%hi","%lo"
				223	);
				224	} else if ((shift >0) && (shift < 32))
				225	{
				226	asm ("mult %[a], %[b] \t\n"
				227	"li %[tmp],1 \t\n"
				228	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				229	"mflo %[res] \t\n"
				230	"addu %[tmp1],%[tmp],%[res] \t\n"
				231	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				232	"addu %[res],%[res],%[tmp] \t\n"
				233	"mfhi %[tmp] \t\n"
				234	"addu %[tmp],%[tmp],%[tmp1] \t\n"
				235	"sll %[tmp],%[tmp],%[lshift] \t\n"
				236	"srl %[res],%[res],%[rshift] \t\n"
				237	"or %[res],%[res],%[tmp] \t\n"
				238	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				239	: [a]"r"(a),[b]"r"(b),[lshift]"I"(32-shift),[rshift]"I"(shift),[shiftm1]"I"(shift-1)
				240	: "%hi","%lo"
				241	);
				242	} else {
				243	asm ("mult %[a], %[b] \t\n"
				244	"li %[tmp],1 \t\n"
				245	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				246	"mflo %[res] \t\n"
				247	"addu %[tmp1],%[tmp],%[res] \t\n"
				248	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				249	"sra %[tmp2],%[tmp],0x1f \t\n"
				250	"addu %[res],%[res],%[tmp] \t\n"
				251	"mfhi %[tmp] \t\n"
				252	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				253	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				254	"srl %[tmp2],%[res],%[rshift] \t\n"
				255	"srav %[res], %[tmp],%[rshift]\t\n"
				256	"sll %[tmp],%[tmp],1 \t\n"
				257	"sll %[tmp],%[tmp],%[norbits] \t\n"
				258	"or %[tmp],%[tmp],%[tmp2] \t\n"
				259	"movz %[res],%[tmp],%[bit5] \t\n"
				260	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				261	: [a]"r"(a),[b]"r"(b),[norbits]"I"(~(shift)),[rshift]"I"(shift),[shiftm1] "I"(shift-1),[bit5]"I"(shift & 0x20)
				262	: "%hi","%lo"
				263	);
				264	}
				265	} else {
				266	asm ("mult %[a], %[b] \t\n"
				267	"li %[tmp],1 \t\n"
				268	"sll %[tmp],%[tmp],%[shiftm1] \t\n"
				269	"mflo %[res] \t\n"
				270	"addu %[tmp1],%[tmp],%[res] \t\n"
				271	"sltu %[tmp1],%[tmp1],%[tmp] \t\n" /obit?/
				272	"sra %[tmp2],%[tmp],0x1f \t\n"
				273	"addu %[res],%[res],%[tmp] \t\n"
				274	"mfhi %[tmp] \t\n"
				275	"addu %[tmp],%[tmp],%[tmp2] \t\n"
				276	"addu %[tmp],%[tmp],%[tmp1] \t\n" /tmp=hi/
				277	"srl %[tmp2],%[res],%[rshift] \t\n"
				278	"srav %[res], %[tmp],%[rshift]\t\n"
				279	"sll %[tmp],%[tmp],1 \t\n"
				280	"sll %[tmp],%[tmp],%[norbits] \t\n"
				281	"or %[tmp],%[tmp],%[tmp2] \t\n"
				282	"movz %[res],%[tmp],%[bit5] \t\n"
				283	: [res]"=&r"(result),[tmp]"=&r"(tmp),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				284	: [a]"r"(a),[b]"r"(b),[norbits]"r"(~(shift)),[rshift] "r"(shift),[shiftm1]"r"(shift-1),[bit5] "r"(shift & 0x20)
				285	: "%hi","%lo"
				286	);
				287	}
				288
				289	return result;
				290	}
				291
				292	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				293	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				294	GGLfixed result,t,tmp1,tmp2;
				295
				296	if (__builtin_constant_p(shift)) {
				297	if (shift == 0) {
				298	asm ("mult %[a], %[b] \t\n"
				299	"mflo %[lo] \t\n"
				300	"addu %[lo],%[lo],%[c] \t\n"
				301	: [lo]"=&r"(result)
				302	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				303	: "%hi","%lo"
				304	);
				305	} else if (shift == 32) {
				306	asm ("mult %[a], %[b] \t\n"
				307	"mfhi %[lo] \t\n"
				308	"addu %[lo],%[lo],%[c] \t\n"
				309	: [lo]"=&r"(result)
				310	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				311	: "%hi","%lo"
				312	);
				313	} else if ((shift>0) && (shift<32)) {
				314	asm ("mult %[a], %[b] \t\n"
				315	"mflo %[res] \t\n"
				316	"mfhi %[t] \t\n"
				317	"srl %[res],%[res],%[rshift] \t\n"
				318	"sll %[t],%[t],%[lshift] \t\n"
				319	"or %[res],%[res],%[t] \t\n"
				320	"addu %[res],%[res],%[c] \t\n"
				321	: [res]"=&r"(result),[t]"=&r"(t)
				322	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				323	: "%hi","%lo"
				324	);
				325	} else {
				326	asm ("mult %[a], %[b] \t\n"
				327	"nor %[tmp1],$zero,%[shift]\t\n"
				328	"mflo %[res] \t\n"
				329	"mfhi %[t] \t\n"
				330	"srl %[res],%[res],%[shift] \t\n"
				331	"sll %[tmp2],%[t],1 \t\n"
				332	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				333	"or %[tmp1],%[tmp2],%[res] \t\n"
				334	"srav %[res],%[t],%[shift] \t\n"
				335	"andi %[tmp2],%[shift],0x20\t\n"
				336	"movz %[res],%[tmp1],%[tmp2]\t\n"
				337	"addu %[res],%[res],%[c] \t\n"
				338	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				339	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				340	: "%hi","%lo"
				341	);
				342	}
				343	} else {
				344	asm ("mult %[a], %[b] \t\n"
				345	"nor %[tmp1],$zero,%[shift]\t\n"
				346	"mflo %[res] \t\n"
				347	"mfhi %[t] \t\n"
				348	"srl %[res],%[res],%[shift] \t\n"
				349	"sll %[tmp2],%[t],1 \t\n"
				350	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				351	"or %[tmp1],%[tmp2],%[res] \t\n"
				352	"srav %[res],%[t],%[shift] \t\n"
				353	"andi %[tmp2],%[shift],0x20\t\n"
				354	"movz %[res],%[tmp1],%[tmp2]\t\n"
				355	"addu %[res],%[res],%[c] \t\n"
				356	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				357	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				358	: "%hi","%lo"
				359	);
				360	}
				361	return result;
				362	}
				363
				364	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				365	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				366	GGLfixed result,t,tmp1,tmp2;
				367
				368	if (__builtin_constant_p(shift)) {
				369	if (shift == 0) {
				370	asm ("mult %[a], %[b] \t\n"
				371	"mflo %[lo] \t\n"
				372	"subu %[lo],%[lo],%[c] \t\n"
				373	: [lo]"=&r"(result)
				374	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				375	: "%hi","%lo"
				376	);
				377	} else if (shift == 32) {
				378	asm ("mult %[a], %[b] \t\n"
				379	"mfhi %[lo] \t\n"
				380	"subu %[lo],%[lo],%[c] \t\n"
				381	: [lo]"=&r"(result)
				382	: [a]"r"(a),[b]"r"(b),[c]"r"(c)
				383	: "%hi","%lo"
				384	);
				385	} else if ((shift>0) && (shift<32)) {
				386	asm ("mult %[a], %[b] \t\n"
				387	"mflo %[res] \t\n"
				388	"mfhi %[t] \t\n"
				389	"srl %[res],%[res],%[rshift] \t\n"
				390	"sll %[t],%[t],%[lshift] \t\n"
				391	"or %[res],%[res],%[t] \t\n"
				392	"subu %[res],%[res],%[c] \t\n"
				393	: [res]"=&r"(result),[t]"=&r"(t)
				394	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[lshift]"I"(32-shift),[rshift]"I"(shift)
				395	: "%hi","%lo"
				396	);
				397	} else {
				398	asm ("mult %[a], %[b] \t\n"
				399	"nor %[tmp1],$zero,%[shift]\t\n"
				400	"mflo %[res] \t\n"
				401	"mfhi %[t] \t\n"
				402	"srl %[res],%[res],%[shift] \t\n"
				403	"sll %[tmp2],%[t],1 \t\n"
				404	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				405	"or %[tmp1],%[tmp2],%[res] \t\n"
				406	"srav %[res],%[t],%[shift] \t\n"
				407	"andi %[tmp2],%[shift],0x20\t\n"
				408	"movz %[res],%[tmp1],%[tmp2]\t\n"
				409	"subu %[res],%[res],%[c] \t\n"
				410	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				411	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"I"(shift)
				412	: "%hi","%lo"
				413	);
				414	}
				415	} else {
				416	asm ("mult %[a], %[b] \t\n"
				417	"nor %[tmp1],$zero,%[shift]\t\n"
				418	"mflo %[res] \t\n"
				419	"mfhi %[t] \t\n"
				420	"srl %[res],%[res],%[shift] \t\n"
				421	"sll %[tmp2],%[t],1 \t\n"
				422	"sllv %[tmp2],%[tmp2],%[tmp1] \t\n"
				423	"or %[tmp1],%[tmp2],%[res] \t\n"
				424	"srav %[res],%[t],%[shift] \t\n"
				425	"andi %[tmp2],%[shift],0x20\t\n"
				426	"movz %[res],%[tmp1],%[tmp2]\t\n"
				427	"subu %[res],%[res],%[c] \t\n"
				428	: [res]"=&r"(result),[t]"=&r"(t),[tmp1]"=&r"(tmp1),[tmp2]"=&r"(tmp2)
				429	: [a]"r"(a),[b]"r"(b),[c]"r"(c),[shift]"r"(shift)
				430	: "%hi","%lo"
				431	);
				432	}
				433	return result;
				434	}
				435
				436	inline int64_t gglMulii(int32_t x, int32_t y) CONST;
				437	inline int64_t gglMulii(int32_t x, int32_t y) {
				438	union {
				439	struct {
				440	#if defined(__MIPSEL__)
				441	int32_t lo;
				442	int32_t hi;
				443	#elif defined(__MIPSEB__)
				444	int32_t hi;
				445	int32_t lo;
				446	#endif
				447	} s;
				448	int64_t res;
				449	}u;
				450	asm("mult %2, %3 \t\n"
				451	"mfhi %1 \t\n"
				452	"mflo %0 \t\n"
				453	: "=r"(u.s.lo), "=&r"(u.s.hi)
				454	: "%r"(x), "r"(y)
				455	: "%hi","%lo"
				456	);
				457	return u.res;
				458	}
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	459
				460	#else // ----------------------------------------------------------------------
				461
				462	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) CONST;
				463	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b, int shift) {
				464	return GGLfixed((int64_t(a)*b + (1<<(shift-1)))>>shift);
				465	}
				466	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				467	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				468	return GGLfixed((int64_t(a)*b)>>shift) + c;
				469	}
				470	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) CONST;
				471	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c, int shift) {
				472	return GGLfixed((int64_t(a)*b)>>shift) - c;
				473	}
				474	inline int64_t gglMulii(int32_t a, int32_t b) CONST;
				475	inline int64_t gglMulii(int32_t a, int32_t b) {
				476	return int64_t(a)*b;
				477	}
				478
				479	#endif
				480
				481	// ------------------------------------------------------------------------
				482
				483	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) CONST;
				484	inline GGLfixed gglMulx(GGLfixed a, GGLfixed b) {
				485	return gglMulx(a, b, 16);
				486	}
				487	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
				488	inline GGLfixed gglMulAddx(GGLfixed a, GGLfixed b, GGLfixed c) {
				489	return gglMulAddx(a, b, c, 16);
				490	}
				491	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) CONST;
				492	inline GGLfixed gglMulSubx(GGLfixed a, GGLfixed b, GGLfixed c) {
				493	return gglMulSubx(a, b, c, 16);
				494	}
				495
				496	// ------------------------------------------------------------------------
				497
				498	inline int32_t gglClz(int32_t x) CONST;
				499	inline int32_t gglClz(int32_t x)
				500	{
Duane Sand	0960411	2012-05-24 17:40:21 -0700	[diff] [blame]	501	#if (defined(__arm__) && !defined(__thumb__)) \|\| defined(__mips__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	502	return __builtin_clz(x);
				503	#else
				504	if (!x) return 32;
				505	int32_t exp = 31;
				506	if (x & 0xFFFF0000) { exp -=16; x >>= 16; }
				507	if (x & 0x0000ff00) { exp -= 8; x >>= 8; }
				508	if (x & 0x000000f0) { exp -= 4; x >>= 4; }
				509	if (x & 0x0000000c) { exp -= 2; x >>= 2; }
				510	if (x & 0x00000002) { exp -= 1; }
				511	return exp;
				512	#endif
				513	}
				514
				515	// ------------------------------------------------------------------------
				516
				517	int32_t gglDivQ(GGLfixed n, GGLfixed d, int32_t i) CONST;
				518
				519	inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) CONST;
				520	inline int32_t gglDivQ16(GGLfixed n, GGLfixed d) {
				521	return gglDivQ(n, d, 16);
				522	}
				523
				524	inline int32_t gglDivx(GGLfixed n, GGLfixed d) CONST;
				525	inline int32_t gglDivx(GGLfixed n, GGLfixed d) {
				526	return gglDivQ(n, d, 16);
				527	}
				528
				529	// ------------------------------------------------------------------------
				530
				531	inline GGLfixed gglRecipFast(GGLfixed x) CONST;
				532	inline GGLfixed gglRecipFast(GGLfixed x)
				533	{
				534	// This is a really bad approximation of 1/x, but it's also
				535	// very fast. x must be strictly positive.
				536	// if x between [0.5, 1[ , then 1/x = 3-2*x
				537	// (we use 2.30 fixed-point)
				538	const int32_t lz = gglClz(x);
				539	return (0xC0000000 - (x << (lz - 1))) >> (30-lz);
				540	}
				541
				542	// ------------------------------------------------------------------------
				543
				544	inline GGLfixed gglClampx(GGLfixed c) CONST;
				545	inline GGLfixed gglClampx(GGLfixed c)
				546	{
				547	#if defined(__thumb__)
				548	// clamp without branches
				549	c &= ~(c>>31); c = FIXED_ONE - c;
				550	c &= ~(c>>31); c = FIXED_ONE - c;
				551	#else
				552	#if defined(__arm__)
				553	// I don't know why gcc thinks its smarter than me! The code below
				554	// clamps to zero in one instruction, but gcc won't generate it and
				555	// replace it by a cmp + movlt (it's quite amazing actually).
				556	asm("bic %0, %1, %1, asr #31\n" : "=r"(c) : "r"(c));
				557	#else
				558	c &= ~(c>>31);
				559	#endif
				560	if (c>FIXED_ONE)
				561	c = FIXED_ONE;
				562	#endif
				563	return c;
				564	}
				565
				566	// ------------------------------------------------------------------------
				567
				568	#endif // ANDROID_GGL_FIXED_H