Blame - libpixelflinger/codeflinger/load_store.cpp - AOSPA/android_system_core

blob: ed20a00117eb46be7788bd46cef83137db522971 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/codeflinger/load_store.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18	#include <assert.h>
				19	#include <stdio.h>
				20	#include <cutils/log.h>
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	21	#include "codeflinger/GGLAssembler.h"
				22
Jean-Baptiste Queru	62f4d86	2010-06-15 08:19:56 -0700	[diff] [blame^]	23	#ifdef __ARM_ARCH__
Martyn Capewell	4dc1fa8	2009-12-04 16:44:58 +0000	[diff] [blame]	24	#include <machine/cpu-features.h>
Jean-Baptiste Queru	62f4d86	2010-06-15 08:19:56 -0700	[diff] [blame^]	25	#endif
Martyn Capewell	4dc1fa8	2009-12-04 16:44:58 +0000	[diff] [blame]	26
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	27	namespace android {
				28
				29	// ----------------------------------------------------------------------------
				30
				31	void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
				32	{
				33	const int bits = addr.size;
				34	const int inc = (flags & WRITE_BACK)?1:0;
				35	switch (bits) {
				36	case 32:
				37	if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
				38	else STR(AL, s.reg, addr.reg);
				39	break;
				40	case 24:
				41	// 24 bits formats are a little special and used only for RGB
				42	// 0x00BBGGRR is unpacked as R,G,B
				43	STRB(AL, s.reg, addr.reg, immed12_pre(0));
				44	MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
				45	STRB(AL, s.reg, addr.reg, immed12_pre(1));
				46	MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
				47	STRB(AL, s.reg, addr.reg, immed12_pre(2));
				48	if (!(s.flags & CORRUPTIBLE)) {
				49	MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
				50	}
				51	if (inc)
				52	ADD(AL, 0, addr.reg, addr.reg, imm(3));
				53	break;
				54	case 16:
				55	if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
				56	else STRH(AL, s.reg, addr.reg);
				57	break;
				58	case 8:
				59	if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
				60	else STRB(AL, s.reg, addr.reg);
				61	break;
				62	}
				63	}
				64
				65	void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
				66	{
				67	Scratch scratches(registerFile());
				68	int s0;
				69
				70	const int bits = addr.size;
				71	const int inc = (flags & WRITE_BACK)?1:0;
				72	switch (bits) {
				73	case 32:
				74	if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
				75	else LDR(AL, s.reg, addr.reg);
				76	break;
				77	case 24:
				78	// 24 bits formats are a little special and used only for RGB
				79	// R,G,B is packed as 0x00BBGGRR
				80	s0 = scratches.obtain();
				81	if (s.reg != addr.reg) {
				82	LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
				83	LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
				84	ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
				85	LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
				86	ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
				87	} else {
				88	int s1 = scratches.obtain();
				89	LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
				90	LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
				91	ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
				92	LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
				93	ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
				94	}
				95	if (inc)
				96	ADD(AL, 0, addr.reg, addr.reg, imm(3));
				97	break;
				98	case 16:
				99	if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
				100	else LDRH(AL, s.reg, addr.reg);
				101	break;
				102	case 8:
				103	if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
				104	else LDRB(AL, s.reg, addr.reg);
				105	break;
				106	}
				107	}
				108
				109	void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
				110	{
				111	const int maskLen = h-l;
				112
				113	assert(maskLen<=8);
				114	assert(h);
				115
Martyn Capewell	4dc1fa8	2009-12-04 16:44:58 +0000	[diff] [blame]	116	#if __ARM_ARCH__ >= 7
				117	const int mask = (1<<maskLen)-1;
				118	if ((h == bits) && !l && (s != d.reg)) {
				119	MOV(AL, 0, d.reg, s); // component = packed;
				120	} else if ((h == bits) && l) {
				121	MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
				122	} else if (!l && isValidImmediate(mask)) {
				123	AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
				124	} else if (!l && isValidImmediate(~mask)) {
				125	BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
				126	} else {
				127	UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
				128	}
				129	#else
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	130	if (h != bits) {
				131	const int mask = ((1<<maskLen)-1) << l;
				132	if (isValidImmediate(mask)) {
				133	AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
				134	} else if (isValidImmediate(~mask)) {
				135	BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
				136	} else {
				137	MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
				138	l += 32-h;
				139	h = 32;
				140	}
				141	s = d.reg;
				142	}
				143
				144	if (l) {
				145	MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
				146	s = d.reg;
				147	}
				148
				149	if (s != d.reg) {
				150	MOV(AL, 0, d.reg, s);
				151	}
Martyn Capewell	4dc1fa8	2009-12-04 16:44:58 +0000	[diff] [blame]	152	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	153
				154	d.s = maskLen;
				155	}
				156
				157	void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
				158	{
				159	extract(d, s.reg,
				160	s.format.c[component].h,
				161	s.format.c[component].l,
				162	s.size());
				163	}
				164
				165	void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
				166	{
				167	integer_t r(d.reg, 32, d.flags);
				168	extract(r, s.reg,
				169	s.format.c[component].h,
				170	s.format.c[component].l,
				171	s.size());
				172	d = component_t(r);
				173	}
				174
				175
				176	void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
				177	{
				178	if (s.l \|\| (s.flags & CLEAR_HI)) {
				179	extract(d, s.reg, s.h, s.l, 32);
				180	expand(d, d, dbits);
				181	} else {
				182	expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
				183	}
				184	}
				185
				186	void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
				187	{
				188	integer_t r(d.reg, 32, d.flags);
				189	expand(r, s, dbits);
				190	d = component_t(r);
				191	}
				192
				193	void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
				194	{
				195	assert(src.size());
				196
				197	int sbits = src.size();
				198	int s = src.reg;
				199	int d = dst.reg;
				200
				201	// be sure to set 'dst' after we read 'src' as they may be identical
				202	dst.s = dbits;
				203	dst.flags = 0;
				204
				205	if (dbits<=sbits) {
				206	if (s != d) {
				207	MOV(AL, 0, d, s);
				208	}
				209	return;
				210	}
				211
				212	if (sbits == 1) {
				213	RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
				214	// d = (s<<dbits) - s;
				215	return;
				216	}
				217
				218	if (dbits % sbits) {
				219	MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
				220	// d = s << (dbits-sbits);
				221	dbits -= sbits;
				222	do {
				223	ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
				224	// d \|= d >> sbits;
				225	dbits -= sbits;
				226	sbits *= 2;
				227	} while(dbits>0);
				228	return;
				229	}
				230
				231	dbits -= sbits;
				232	do {
				233	ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
				234	// d \|= d<<sbits;
				235	s = d;
				236	dbits -= sbits;
				237	if (sbits*2 < dbits) {
				238	sbits *= 2;
				239	}
				240	} while(dbits>0);
				241	}
				242
				243	void GGLAssembler::downshift(
				244	pixel_t& d, int component, component_t s, const reg_t& dither)
				245	{
				246	const needs_t& needs = mBuilderContext.needs;
				247	Scratch scratches(registerFile());
				248
				249	int sh = s.h;
				250	int sl = s.l;
				251	int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
				252	int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
				253	int sbits = sh - sl;
				254
				255	int dh = d.format.c[component].h;
				256	int dl = d.format.c[component].l;
				257	int dbits = dh - dl;
				258	int dithering = 0;
				259
				260	LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
				261
				262	if (sbits>dbits) {
				263	// see if we need to dither
				264	dithering = mDithering;
				265	}
				266
				267	int ireg = d.reg;
				268	if (!(d.flags & FIRST)) {
				269	if (s.flags & CORRUPTIBLE) {
				270	ireg = s.reg;
				271	} else {
				272	ireg = scratches.obtain();
				273	}
				274	}
				275	d.flags &= ~FIRST;
				276
				277	if (maskHiBits) {
				278	// we need to mask the high bits (and possibly the lowbits too)
				279	// and we might be able to use immediate mask.
				280	if (!dithering) {
				281	// we don't do this if we only have maskLoBits because we can
				282	// do it more efficiently below (in the case where dl=0)
				283	const int offset = sh - dbits;
				284	if (dbits<=8 && offset >= 0) {
				285	const uint32_t mask = ((1<<dbits)-1) << offset;
				286	if (isValidImmediate(mask) \|\| isValidImmediate(~mask)) {
				287	build_and_immediate(ireg, s.reg, mask, 32);
				288	sl = offset;
				289	s.reg = ireg;
				290	sbits = dbits;
				291	maskLoBits = maskHiBits = 0;
				292	}
				293	}
				294	} else {
				295	// in the dithering case though, we need to preserve the lower bits
				296	const uint32_t mask = ((1<<sbits)-1) << sl;
				297	if (isValidImmediate(mask) \|\| isValidImmediate(~mask)) {
				298	build_and_immediate(ireg, s.reg, mask, 32);
				299	s.reg = ireg;
				300	maskLoBits = maskHiBits = 0;
				301	}
				302	}
				303	}
				304
				305	// XXX: we could special case (maskHiBits & !maskLoBits)
				306	// like we do for maskLoBits below, but it happens very rarely
				307	// that we have maskHiBits only and the conditions necessary to lead
				308	// to better code (like doing d \|= s << 24)
				309
				310	if (maskHiBits) {
				311	MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
				312	sl += 32-sh;
				313	sh = 32;
				314	s.reg = ireg;
				315	maskHiBits = 0;
				316	}
				317
				318	// Downsampling should be performed as follows:
				319	// V * ((1<<dbits)-1) / ((1<<sbits)-1)
				320	// V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
				321	// V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
				322	// V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
				323	// V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
				324	//
				325	// By approximating (1>>dbits) and (1>>sbits) to 0:
				326	//
				327	// V>>(sbits-dbits) - V>>sbits
				328	//
				329	// A good approximation is V>>(sbits-dbits),
				330	// but better one (needed for dithering) is:
				331	//
				332	// (V>>(sbits-dbits)<<sbits - V)>>sbits
				333	// (V<<dbits - V)>>sbits
				334	// (V - V>>dbits)>>(sbits-dbits)
				335
				336	// Dithering is done here
				337	if (dithering) {
				338	comment("dithering");
				339	if (sl) {
				340	MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
				341	sh -= sl;
				342	sl = 0;
				343	s.reg = ireg;
				344	}
				345	// scaling (V-V>>dbits)
				346	SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
				347	const int shift = (GGL_DITHER_BITS - (sbits-dbits));
				348	if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
				349	else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
				350	else ADD(AL, 0, ireg, ireg, dither.reg);
				351	s.reg = ireg;
				352	}
				353
				354	if ((maskLoBits\|dithering) && (sh > dbits)) {
				355	int shift = sh-dbits;
				356	if (dl) {
				357	MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
				358	if (ireg == d.reg) {
				359	MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
				360	} else {
				361	ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
				362	}
				363	} else {
				364	if (ireg == d.reg) {
				365	MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
				366	} else {
				367	ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
				368	}
				369	}
				370	} else {
				371	int shift = sh-dh;
				372	if (shift>0) {
				373	if (ireg == d.reg) {
				374	MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
				375	} else {
				376	ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
				377	}
				378	} else if (shift<0) {
				379	if (ireg == d.reg) {
				380	MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
				381	} else {
				382	ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
				383	}
				384	} else {
				385	if (ireg == d.reg) {
				386	if (s.reg != d.reg) {
				387	MOV(AL, 0, d.reg, s.reg);
				388	}
				389	} else {
				390	ORR(AL, 0, d.reg, d.reg, s.reg);
				391	}
				392	}
				393	}
				394	}
				395
				396	}; // namespace android