Blame - libpixelflinger/scanline.cpp - AOSPA/android_system_core

blob: a2f43eb0816dd1ac42678da700b556df89ef5af8 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/scanline.cpp
				2	**
				3	** Copyright 2006, The Android Open Source Project
				4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	#define LOG_TAG "pixelflinger"
				20
				21	#include <assert.h>
				22	#include <stdlib.h>
				23	#include <stdio.h>
				24	#include <string.h>
				25
				26	#include <cutils/memory.h>
				27	#include <cutils/log.h>
				28
				29	#include "buffer.h"
				30	#include "scanline.h"
				31
				32	#include "codeflinger/CodeCache.h"
				33	#include "codeflinger/GGLAssembler.h"
				34	#include "codeflinger/ARMAssembler.h"
				35	//#include "codeflinger/ARMAssemblerOptimizer.h"
				36
				37	// ----------------------------------------------------------------------------
				38
				39	#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
				40	#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
				41	#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
				42	#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
				43
				44	#ifdef NDEBUG
				45	# define ANDROID_RELEASE
				46	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				47	#else
				48	# define ANDROID_DEBUG
				49	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				50	#endif
				51
				52	#if defined(__arm__)
				53	# define ANDROID_ARM_CODEGEN 1
				54	#else
				55	# define ANDROID_ARM_CODEGEN 0
				56	#endif
				57
				58	#define DEBUG__CODEGEN_ONLY 0
				59
				60
				61	#define ASSEMBLY_SCRATCH_SIZE 2048
				62
				63	// ----------------------------------------------------------------------------
				64	namespace android {
				65	// ----------------------------------------------------------------------------
				66
				67	static void init_y(context_t*, int32_t);
				68	static void init_y_noop(context_t*, int32_t);
				69	static void init_y_packed(context_t*, int32_t);
				70	static void init_y_error(context_t*, int32_t);
				71
				72	static void step_y__generic(context_t* c);
				73	static void step_y__nop(context_t*);
				74	static void step_y__smooth(context_t* c);
				75	static void step_y__tmu(context_t* c);
				76	static void step_y__w(context_t* c);
				77
				78	static void scanline(context_t* c);
				79	static void scanline_perspective(context_t* c);
				80	static void scanline_perspective_single(context_t* c);
				81	static void scanline_t32cb16blend(context_t* c);
				82	static void scanline_t32cb16(context_t* c);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame^]	83	static void scanline_col32cb16blend(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	84	static void scanline_memcpy(context_t* c);
				85	static void scanline_memset8(context_t* c);
				86	static void scanline_memset16(context_t* c);
				87	static void scanline_memset32(context_t* c);
				88	static void scanline_noop(context_t* c);
				89	static void scanline_set(context_t* c);
				90	static void scanline_clear(context_t* c);
				91
				92	static void rect_generic(context_t* c, size_t yc);
				93	static void rect_memcpy(context_t* c, size_t yc);
				94
				95	extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);
				96	extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame^]	97	extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);
				98	extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	99
				100	// ----------------------------------------------------------------------------
				101
				102	struct shortcut_t {
				103	needs_filter_t filter;
				104	const char* desc;
				105	void (scanline)(context_t);
				106	void (init_y)(context_t, int32_t);
				107	};
				108
				109	// Keep in sync with needs
				110	static shortcut_t shortcuts[] = {
				111	{ { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
				112	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				113	"565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop },
				114	{ { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
				115	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				116	"565 fb, 8888 tx", scanline_t32cb16, init_y_noop },
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame^]	117	{ { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
				118	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
				119	"565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	120	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				121	{ 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
				122	"(nop) alpha test", scanline_noop, init_y_noop },
				123	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				124	{ 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
				125	"(nop) depth test", scanline_noop, init_y_noop },
				126	{ { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
				127	{ 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				128	"(nop) logic_op", scanline_noop, init_y_noop },
				129	{ { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
				130	{ 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				131	"(nop) color mask", scanline_noop, init_y_noop },
				132	{ { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
				133	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				134	"(set) logic_op", scanline_set, init_y_noop },
				135	{ { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
				136	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				137	"(clear) logic_op", scanline_clear, init_y_noop },
				138	{ { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
				139	{ 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
				140	"(clear) blending 0/0", scanline_clear, init_y_noop },
				141	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				142	{ 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
				143	"(error) invalid color-buffer format", scanline_noop, init_y_error },
				144	};
				145	static const needs_filter_t noblend1to1 = {
				146	// (disregard dithering, see below)
				147	{ 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
				148	{ 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
				149	};
				150	static const needs_filter_t fill16noblend = {
				151	{ 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
				152	{ 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
				153	};
				154
				155	// ----------------------------------------------------------------------------
				156
				157	#if ANDROID_ARM_CODEGEN
				158	static CodeCache gCodeCache(12 * 1024);
				159
				160	class ScanlineAssembly : public Assembly {
				161	AssemblyKey<needs_t> mKey;
				162	public:
				163	ScanlineAssembly(needs_t needs, size_t size)
				164	: Assembly(size), mKey(needs) { }
				165	const AssemblyKey<needs_t>& key() const { return mKey; }
				166	};
				167	#endif
				168
				169	// ----------------------------------------------------------------------------
				170
				171	void ggl_init_scanline(context_t* c)
				172	{
				173	c->init_y = init_y;
				174	c->step_y = step_y__generic;
				175	c->scanline = scanline;
				176	}
				177
				178	void ggl_uninit_scanline(context_t* c)
				179	{
				180	if (c->state.buffers.coverage)
				181	free(c->state.buffers.coverage);
				182	#if ANDROID_ARM_CODEGEN
				183	if (c->scanline_as)
				184	c->scanline_as->decStrong(c);
				185	#endif
				186	}
				187
				188	// ----------------------------------------------------------------------------
				189
				190	static void pick_scanline(context_t* c)
				191	{
				192	#if (!defined(DEBUG__CODEGEN_ONLY) \|\| (DEBUG__CODEGEN_ONLY == 0))
				193
				194	#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
				195	c->init_y = init_y;
				196	c->step_y = step_y__generic;
				197	c->scanline = scanline;
				198	return;
				199	#endif
				200
				201	//printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
				202	// c->state.needs.n, c->state.needs.p,
				203	// c->state.needs.t[0], c->state.needs.t[1]);
				204
				205	// first handle the special case that we cannot test with a filter
				206	const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
				207	if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
				208	if (c->state.needs.match(noblend1to1)) {
				209	// this will match regardless of dithering state, since both
				210	// src and dest have the same format anyway, there is no dithering
				211	// to be done.
				212	const GGLFormat* f =
				213	&(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
				214	if ((f->components == GGL_RGB) \|\|
				215	(f->components == GGL_RGBA) \|\|
				216	(f->components == GGL_LUMINANCE) \|\|
				217	(f->components == GGL_LUMINANCE_ALPHA))
				218	{
				219	// format must have all of RGB components
				220	// (so the current color doesn't show through)
				221	c->scanline = scanline_memcpy;
				222	c->init_y = init_y_noop;
				223	return;
				224	}
				225	}
				226	}
				227
				228	if (c->state.needs.match(fill16noblend)) {
				229	c->init_y = init_y_packed;
				230	switch (c->formats[cb_format].size) {
				231	case 1: c->scanline = scanline_memset8; return;
				232	case 2: c->scanline = scanline_memset16; return;
				233	case 4: c->scanline = scanline_memset32; return;
				234	}
				235	}
				236
				237	const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
				238	for (int i=0 ; i<numFilters ; i++) {
				239	if (c->state.needs.match(shortcuts[i].filter)) {
				240	c->scanline = shortcuts[i].scanline;
				241	c->init_y = shortcuts[i].init_y;
				242	return;
				243	}
				244	}
				245
				246	#endif // DEBUG__CODEGEN_ONLY
				247
				248	c->init_y = init_y;
				249	c->step_y = step_y__generic;
				250
				251	#if ANDROID_ARM_CODEGEN
				252	// we're going to have to generate some code...
				253	// here, generate code for our pixel pipeline
				254	const AssemblyKey<needs_t> key(c->state.needs);
				255	sp<Assembly> assembly = gCodeCache.lookup(key);
				256	if (assembly == 0) {
				257	// create a new assembly region
				258	sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
				259	ASSEMBLY_SCRATCH_SIZE);
				260	// initialize our assembler
				261	GGLAssembler assembler( new ARMAssembler(a) );
				262	//GGLAssembler assembler(
				263	// new ARMAssemblerOptimizer(new ARMAssembler(a)) );
				264	// generate the scanline code for the given needs
				265	int err = assembler.scanline(c->state.needs, c);
				266	if (ggl_likely(!err)) {
				267	// finally, cache this assembly
				268	err = gCodeCache.cache(a->key(), a);
				269	}
				270	if (ggl_unlikely(err)) {
				271	LOGE("error generating or caching assembly. Reverting to NOP.");
				272	c->scanline = scanline_noop;
				273	c->init_y = init_y_noop;
				274	c->step_y = step_y__nop;
				275	return;
				276	}
				277	assembly = a;
				278	}
				279
				280	// release the previous assembly
				281	if (c->scanline_as) {
				282	c->scanline_as->decStrong(c);
				283	}
				284
				285	//LOGI("using generated pixel-pipeline");
				286	c->scanline_as = assembly.get();
				287	c->scanline_as->incStrong(c); // hold on to assembly
				288	c->scanline = (void()(context_t c))assembly->base();
				289	#else
				290	// LOGW("using generic (slow) pixel-pipeline");
				291	c->scanline = scanline;
				292	#endif
				293	}
				294
				295	void ggl_pick_scanline(context_t* c)
				296	{
				297	pick_scanline(c);
				298	if ((c->state.enables & GGL_ENABLE_W) &&
				299	(c->state.enables & GGL_ENABLE_TMUS))
				300	{
				301	c->span = c->scanline;
				302	c->scanline = scanline_perspective;
				303	if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
				304	// only one TMU enabled
				305	c->scanline = scanline_perspective_single;
				306	}
				307	}
				308	}
				309
				310	// ----------------------------------------------------------------------------
				311
				312	static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
				313	static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
				314	const pixel_t* src, const pixel_t* dst);
				315	static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
				316
				317	#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				318
				319	// no need to compile the generic-pipeline, it can't be reached
				320	void scanline(context_t*)
				321	{
				322	}
				323
				324	#else
				325
				326	void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
				327	{
				328	if (su && sv) {
				329	if (su > sv) {
				330	v = ggl_expand(v, sv, su);
				331	sv = su;
				332	} else if (su < sv) {
				333	u = ggl_expand(u, su, sv);
				334	su = sv;
				335	}
				336	}
				337	}
				338
				339	void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
				340	{
				341	rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
				342	rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
				343	rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
				344	rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
				345
				346	pixel_t sf, df;
				347	blend_factor(c, &sf, c->state.blend.src, fragment, fb);
				348	blend_factor(c, &df, c->state.blend.dst, fragment, fb);
				349
				350	fragment->c[1] =
				351	gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
				352	fragment->c[2] =
				353	gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
				354	fragment->c[3] =
				355	gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
				356
				357	if (c->state.blend.alpha_separate) {
				358	blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
				359	blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
				360	}
				361
				362	fragment->c[0] =
				363	gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
				364
				365	// clamp to 1.0
				366	if (fragment->c[0] >= (1LU<<fragment->s[0]))
				367	fragment->c[0] = (1<<fragment->s[0])-1;
				368	if (fragment->c[1] >= (1LU<<fragment->s[1]))
				369	fragment->c[1] = (1<<fragment->s[1])-1;
				370	if (fragment->c[2] >= (1LU<<fragment->s[2]))
				371	fragment->c[2] = (1<<fragment->s[2])-1;
				372	if (fragment->c[3] >= (1LU<<fragment->s[3]))
				373	fragment->c[3] = (1<<fragment->s[3])-1;
				374	}
				375
				376	static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
				377	{
				378	if (!size)
				379	return def;
				380
				381	// scale to 16 bits
				382	if (size > 16) {
				383	x >>= (size - 16);
				384	} else if (size < 16) {
				385	x = ggl_expand(x, size, 16);
				386	}
				387	x += x >> 15;
				388	return x;
				389	}
				390
				391	void blend_factor(context_t* c, pixel_t* r,
				392	uint32_t factor, const pixel_t* src, const pixel_t* dst)
				393	{
				394	switch (factor) {
				395	case GGL_ZERO:
				396	r->c[1] =
				397	r->c[2] =
				398	r->c[3] =
				399	r->c[0] = 0;
				400	break;
				401	case GGL_ONE:
				402	r->c[1] =
				403	r->c[2] =
				404	r->c[3] =
				405	r->c[0] = FIXED_ONE;
				406	break;
				407	case GGL_DST_COLOR:
				408	r->c[1] = blendfactor(dst->c[1], dst->s[1]);
				409	r->c[2] = blendfactor(dst->c[2], dst->s[2]);
				410	r->c[3] = blendfactor(dst->c[3], dst->s[3]);
				411	r->c[0] = blendfactor(dst->c[0], dst->s[0]);
				412	break;
				413	case GGL_SRC_COLOR:
				414	r->c[1] = blendfactor(src->c[1], src->s[1]);
				415	r->c[2] = blendfactor(src->c[2], src->s[2]);
				416	r->c[3] = blendfactor(src->c[3], src->s[3]);
				417	r->c[0] = blendfactor(src->c[0], src->s[0]);
				418	break;
				419	case GGL_ONE_MINUS_DST_COLOR:
				420	r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
				421	r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
				422	r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
				423	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
				424	break;
				425	case GGL_ONE_MINUS_SRC_COLOR:
				426	r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
				427	r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
				428	r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
				429	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
				430	break;
				431	case GGL_SRC_ALPHA:
				432	r->c[1] =
				433	r->c[2] =
				434	r->c[3] =
				435	r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
				436	break;
				437	case GGL_ONE_MINUS_SRC_ALPHA:
				438	r->c[1] =
				439	r->c[2] =
				440	r->c[3] =
				441	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
				442	break;
				443	case GGL_DST_ALPHA:
				444	r->c[1] =
				445	r->c[2] =
				446	r->c[3] =
				447	r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				448	break;
				449	case GGL_ONE_MINUS_DST_ALPHA:
				450	r->c[1] =
				451	r->c[2] =
				452	r->c[3] =
				453	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				454	break;
				455	case GGL_SRC_ALPHA_SATURATE:
				456	// XXX: GGL_SRC_ALPHA_SATURATE
				457	break;
				458	}
				459	}
				460
				461	static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
				462	{
				463	GGLfixed d;
				464	if (tx_wrap == GGL_REPEAT) {
				465	d = (uint32_t(coord)>>16) * size;
				466	} else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
				467	const GGLfixed clamp_min = FIXED_HALF;
				468	const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
				469	if (coord < clamp_min) coord = clamp_min;
				470	if (coord > clamp_max) coord = clamp_max;
				471	d = coord;
				472	} else { // 1:1
				473	const GGLfixed clamp_min = 0;
				474	const GGLfixed clamp_max = (size << 16);
				475	if (coord < clamp_min) coord = clamp_min;
				476	if (coord > clamp_max) coord = clamp_max;
				477	d = coord;
				478	}
				479	return d;
				480	}
				481
				482	static inline
				483	GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
				484	{
				485	const int32_t end = dvdx * (len-1) + v;
				486	if (end < 0)
				487	v -= end;
				488	v &= ~(v>>31);
				489	return v;
				490	}
				491
				492	void scanline(context_t* c)
				493	{
				494	const uint32_t enables = c->state.enables;
				495	const int xs = c->iterators.xl;
				496	const int x1 = c->iterators.xr;
				497	int xc = x1 - xs;
				498	const int16_t* covPtr = c->state.buffers.coverage + xs;
				499
				500	// All iterated values are sampled at the pixel center
				501
				502	// reset iterators for that scanline...
				503	GGLcolor r, g, b, a;
				504	iterators_t& ci = c->iterators;
				505	if (enables & GGL_ENABLE_SMOOTH) {
				506	r = (xs * c->shade.drdx) + ci.ydrdy;
				507	g = (xs * c->shade.dgdx) + ci.ydgdy;
				508	b = (xs * c->shade.dbdx) + ci.ydbdy;
				509	a = (xs * c->shade.dadx) + ci.ydady;
				510	r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
				511	g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
				512	b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
				513	a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
				514	} else {
				515	r = ci.ydrdy;
				516	g = ci.ydgdy;
				517	b = ci.ydbdy;
				518	a = ci.ydady;
				519	}
				520
				521	// z iterators are 1.31
				522	GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
				523	GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
				524
				525	struct {
				526	GGLfixed s, t;
				527	} tc[GGL_TEXTURE_UNIT_COUNT];
				528	if (enables & GGL_ENABLE_TMUS) {
				529	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				530	if (c->state.texture[i].enable) {
				531	texture_iterators_t& ti = c->state.texture[i].iterators;
				532	if (enables & GGL_ENABLE_W) {
				533	tc[i].s = ti.ydsdy;
				534	tc[i].t = ti.ydtdy;
				535	} else {
				536	tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
				537	tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
				538	}
				539	}
				540	}
				541	}
				542
				543	pixel_t fragment;
				544	pixel_t texel;
				545	pixel_t fb;
				546
				547	uint32_t x = xs;
				548	uint32_t y = c->iterators.y;
				549
				550	while (xc--) {
				551
				552	{ // just a scope
				553
				554	// read color (convert to 8 bits by keeping only the integer part)
				555	fragment.s[1] = fragment.s[2] =
				556	fragment.s[3] = fragment.s[0] = 8;
				557	fragment.c[1] = r >> (GGL_COLOR_BITS-8);
				558	fragment.c[2] = g >> (GGL_COLOR_BITS-8);
				559	fragment.c[3] = b >> (GGL_COLOR_BITS-8);
				560	fragment.c[0] = a >> (GGL_COLOR_BITS-8);
				561
				562	// texturing
				563	if (enables & GGL_ENABLE_TMUS) {
				564	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				565	texture_t& tx = c->state.texture[i];
				566	if (!tx.enable)
				567	continue;
				568	texture_iterators_t& ti = tx.iterators;
				569	int32_t u, v;
				570
				571	// s-coordinate
				572	if (tx.s_coord != GGL_ONE_TO_ONE) {
				573	const int w = tx.surface.width;
				574	u = wrapping(tc[i].s, w, tx.s_wrap);
				575	tc[i].s += ti.dsdx;
				576	} else {
				577	u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
				578	}
				579
				580	// t-coordinate
				581	if (tx.t_coord != GGL_ONE_TO_ONE) {
				582	const int h = tx.surface.height;
				583	v = wrapping(tc[i].t, h, tx.t_wrap);
				584	tc[i].t += ti.dtdx;
				585	} else {
				586	v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
				587	}
				588
				589	// read texture
				590	if (tx.mag_filter == GGL_NEAREST &&
				591	tx.min_filter == GGL_NEAREST)
				592	{
				593	u >>= 16;
				594	v >>= 16;
				595	tx.surface.read(&tx.surface, c, u, v, &texel);
				596	} else {
				597	const int w = tx.surface.width;
				598	const int h = tx.surface.height;
				599	u -= FIXED_HALF;
				600	v -= FIXED_HALF;
				601	int u0 = u >> 16;
				602	int v0 = v >> 16;
				603	int u1 = u0 + 1;
				604	int v1 = v0 + 1;
				605	if (tx.s_wrap == GGL_REPEAT) {
				606	if (u0<0) u0 += w;
				607	if (u1<0) u1 += w;
				608	if (u0>=w) u0 -= w;
				609	if (u1>=w) u1 -= w;
				610	} else {
				611	if (u0<0) u0 = 0;
				612	if (u1<0) u1 = 0;
				613	if (u0>=w) u0 = w-1;
				614	if (u1>=w) u1 = w-1;
				615	}
				616	if (tx.t_wrap == GGL_REPEAT) {
				617	if (v0<0) v0 += h;
				618	if (v1<0) v1 += h;
				619	if (v0>=h) v0 -= h;
				620	if (v1>=h) v1 -= h;
				621	} else {
				622	if (v0<0) v0 = 0;
				623	if (v1<0) v1 = 0;
				624	if (v0>=h) v0 = h-1;
				625	if (v1>=h) v1 = h-1;
				626	}
				627	pixel_t texels[4];
				628	uint32_t mm[4];
				629	tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
				630	tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
				631	tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
				632	tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
				633	u = (u >> 12) & 0xF;
				634	v = (v >> 12) & 0xF;
				635	u += u>>3;
				636	v += v>>3;
				637	mm[0] = (0x10 - u) * (0x10 - v);
				638	mm[1] = (0x10 - u) * v;
				639	mm[2] = u * (0x10 - v);
				640	mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
				641	for (int j=0 ; j<4 ; j++) {
				642	texel.s[j] = texels[0].s[j];
				643	if (!texel.s[j]) continue;
				644	texel.s[j] += 8;
				645	texel.c[j] = texels[0].c[j]*mm[0] +
				646	texels[1].c[j]*mm[1] +
				647	texels[2].c[j]*mm[2] +
				648	texels[3].c[j]*mm[3] ;
				649	}
				650	}
				651
				652	// Texture environnement...
				653	for (int j=0 ; j<4 ; j++) {
				654	uint32_t& Cf = fragment.c[j];
				655	uint32_t& Ct = texel.c[j];
				656	uint8_t& sf = fragment.s[j];
				657	uint8_t& st = texel.s[j];
				658	uint32_t At = texel.c[0];
				659	uint8_t sat = texel.s[0];
				660	switch (tx.env) {
				661	case GGL_REPLACE:
				662	if (st) {
				663	Cf = Ct;
				664	sf = st;
				665	}
				666	break;
				667	case GGL_MODULATE:
				668	if (st) {
				669	uint32_t factor = Ct + (Ct>>(st-1));
				670	Cf = (Cf * factor) >> st;
				671	}
				672	break;
				673	case GGL_DECAL:
				674	if (sat) {
				675	rescale(Cf, sf, Ct, st);
				676	Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
				677	}
				678	break;
				679	case GGL_BLEND:
				680	if (st) {
				681	uint32_t Cc = tx.env_color[i];
				682	if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
				683	else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
				684	uint32_t factor = Ct + (Ct>>(st-1));
				685	Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
				686	}
				687	break;
				688	case GGL_ADD:
				689	if (st) {
				690	rescale(Cf, sf, Ct, st);
				691	Cf += Ct;
				692	}
				693	break;
				694	}
				695	}
				696	}
				697	}
				698
				699	// coverage application
				700	if (enables & GGL_ENABLE_AA) {
				701	int16_t cf = *covPtr++;
				702	fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
				703	}
				704
				705	// alpha-test
				706	if (enables & GGL_ENABLE_ALPHA_TEST) {
				707	GGLcolor ref = c->state.alpha_test.ref;
				708	GGLcolor alpha = (uint64_t(fragment.c[0]) *
				709	((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
				710	switch (c->state.alpha_test.func) {
				711	case GGL_NEVER: goto discard;
				712	case GGL_LESS: if (alpha<ref) break; goto discard;
				713	case GGL_EQUAL: if (alpha==ref) break; goto discard;
				714	case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
				715	case GGL_GREATER: if (alpha>ref) break; goto discard;
				716	case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
				717	case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
				718	}
				719	}
				720
				721	// depth test
				722	if (c->state.buffers.depth.format) {
				723	if (enables & GGL_ENABLE_DEPTH_TEST) {
				724	surface_t* cb = &(c->state.buffers.depth);
				725	uint16_t* p = (uint16_t)(cb->data)+(x+(cb->stridey));
				726	uint16_t zz = uint32_t(z)>>(16);
				727	uint16_t depth = *p;
				728	switch (c->state.depth_test.func) {
				729	case GGL_NEVER: goto discard;
				730	case GGL_LESS: if (zz<depth) break; goto discard;
				731	case GGL_EQUAL: if (zz==depth) break; goto discard;
				732	case GGL_LEQUAL: if (zz<=depth) break; goto discard;
				733	case GGL_GREATER: if (zz>depth) break; goto discard;
				734	case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
				735	case GGL_GEQUAL: if (zz>=depth) break; goto discard;
				736	}
				737	// depth buffer is not enabled, if depth-test is not enabled
				738	/*
				739	fragment.s[1] = fragment.s[2] =
				740	fragment.s[3] = fragment.s[0] = 8;
				741	fragment.c[1] =
				742	fragment.c[2] =
				743	fragment.c[3] =
				744	fragment.c[0] = 255 - (zz>>8);
				745	*/
				746	if (c->state.mask.depth) {
				747	*p = zz;
				748	}
				749	}
				750	}
				751
				752	// fog
				753	if (enables & GGL_ENABLE_FOG) {
				754	for (int i=1 ; i<=3 ; i++) {
				755	GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
				756	uint32_t& c = fragment.c[i];
				757	uint8_t& s = fragment.s[i];
				758	c = (c * 0x10000) / ((1<<s)-1);
				759	c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
				760	s = 16;
				761	}
				762	}
				763
				764	// blending
				765	if (enables & GGL_ENABLE_BLENDING) {
				766	fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
				767	fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
				768	c->state.buffers.color.read(
				769	&(c->state.buffers.color), c, x, y, &fb);
				770	blending( c, &fragment, &fb );
				771	}
				772
				773	// write
				774	c->state.buffers.color.write(
				775	&(c->state.buffers.color), c, x, y, &fragment);
				776	}
				777
				778	discard:
				779	// iterate...
				780	x += 1;
				781	if (enables & GGL_ENABLE_SMOOTH) {
				782	r += c->shade.drdx;
				783	g += c->shade.dgdx;
				784	b += c->shade.dbdx;
				785	a += c->shade.dadx;
				786	}
				787	z += c->shade.dzdx;
				788	f += c->shade.dfdx;
				789	}
				790	}
				791
				792	#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				793
				794	// ----------------------------------------------------------------------------
				795	#if 0
				796	#pragma mark -
				797	#pragma mark Scanline
				798	#endif
				799
				800	template <typename T, typename U>
				801	static inline __attribute__((const))
				802	T interpolate(int y, T v0, U dvdx, U dvdy) {
				803	// interpolates in pixel's centers
				804	// v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
				805	return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
				806	}
				807
				808	// ----------------------------------------------------------------------------
				809	#if 0
				810	#pragma mark -
				811	#endif
				812
				813	void init_y(context_t* c, int32_t ys)
				814	{
				815	const uint32_t enables = c->state.enables;
				816
				817	// compute iterators...
				818	iterators_t& ci = c->iterators;
				819
				820	// sample in the center
				821	ci.y = ys;
				822
				823	if (enables & (GGL_ENABLE_DEPTH_TEST\|GGL_ENABLE_W\|GGL_ENABLE_FOG)) {
				824	ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
				825	ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
				826	ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
				827	}
				828
				829	if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
				830	ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
				831	ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
				832	ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
				833	ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
				834	c->step_y = step_y__smooth;
				835	} else {
				836	ci.ydrdy = c->shade.r0;
				837	ci.ydgdy = c->shade.g0;
				838	ci.ydbdy = c->shade.b0;
				839	ci.ydady = c->shade.a0;
				840	// XXX: do only if needed, or make sure this is fast
				841	c->packed = ggl_pack_color(c, c->state.buffers.color.format,
				842	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				843	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				844	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				845	}
				846
				847	// initialize the variables we need in the shader
				848	generated_vars_t& gen = c->generated_vars;
				849	gen.argb[GGLFormat::ALPHA].c = ci.ydady;
				850	gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
				851	gen.argb[GGLFormat::RED ].c = ci.ydrdy;
				852	gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
				853	gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
				854	gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
				855	gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
				856	gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
				857	gen.dzdx = c->shade.dzdx;
				858	gen.f = ci.ydfdy;
				859	gen.dfdx = c->shade.dfdx;
				860
				861	if (enables & GGL_ENABLE_TMUS) {
				862	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				863	texture_t& t = c->state.texture[i];
				864	if (!t.enable) continue;
				865
				866	texture_iterators_t& ti = t.iterators;
				867	if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
				868	// we need to set all of these to 0 because in some cases
				869	// step_y__generic() or step_y__tmu() will be used and
				870	// therefore will update dtdy, however, in 1:1 mode
				871	// this is always done by the scanline rasterizer.
				872	ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
				873	ti.ydsdy = t.shade.is0;
				874	ti.ydtdy = t.shade.it0;
				875	} else {
				876	const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
				877	const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
				878	ti.sscale = t.shade.sscale + adjustSWrap;
				879	ti.tscale = t.shade.tscale + adjustTWrap;
				880	if (!(enables & GGL_ENABLE_W)) {
				881	// S coordinate
				882	const int32_t sscale = ti.sscale;
				883	const int32_t sy = interpolate(ys,
				884	t.shade.is0, t.shade.idsdx, t.shade.idsdy);
				885	if (sscale>=0) {
				886	ti.ydsdy= sy << sscale;
				887	ti.dsdx = t.shade.idsdx << sscale;
				888	ti.dsdy = t.shade.idsdy << sscale;
				889	} else {
				890	ti.ydsdy= sy >> -sscale;
				891	ti.dsdx = t.shade.idsdx >> -sscale;
				892	ti.dsdy = t.shade.idsdy >> -sscale;
				893	}
				894	// T coordinate
				895	const int32_t tscale = ti.tscale;
				896	const int32_t ty = interpolate(ys,
				897	t.shade.it0, t.shade.idtdx, t.shade.idtdy);
				898	if (tscale>=0) {
				899	ti.ydtdy= ty << tscale;
				900	ti.dtdx = t.shade.idtdx << tscale;
				901	ti.dtdy = t.shade.idtdy << tscale;
				902	} else {
				903	ti.ydtdy= ty >> -tscale;
				904	ti.dtdx = t.shade.idtdx >> -tscale;
				905	ti.dtdy = t.shade.idtdy >> -tscale;
				906	}
				907	}
				908	}
				909	// mirror for generated code...
				910	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				911	gen.width = t.surface.width;
				912	gen.height = t.surface.height;
				913	gen.stride = t.surface.stride;
				914	gen.data = int32_t(t.surface.data);
				915	gen.dsdx = ti.dsdx;
				916	gen.dtdx = ti.dtdx;
				917	}
				918	}
				919
				920	// choose the y-stepper
				921	c->step_y = step_y__nop;
				922	if (enables & GGL_ENABLE_FOG) {
				923	c->step_y = step_y__generic;
				924	} else if (enables & GGL_ENABLE_TMUS) {
				925	if (enables & GGL_ENABLE_SMOOTH) {
				926	c->step_y = step_y__generic;
				927	} else if (enables & GGL_ENABLE_W) {
				928	c->step_y = step_y__w;
				929	} else {
				930	c->step_y = step_y__tmu;
				931	}
				932	} else {
				933	if (enables & GGL_ENABLE_SMOOTH) {
				934	c->step_y = step_y__smooth;
				935	}
				936	}
				937
				938	// choose the rectangle blitter
				939	c->rect = rect_generic;
				940	if ((c->step_y == step_y__nop) &&
				941	(c->scanline == scanline_memcpy))
				942	{
				943	c->rect = rect_memcpy;
				944	}
				945	}
				946
				947	void init_y_packed(context_t* c, int32_t y0)
				948	{
				949	uint8_t f = c->state.buffers.color.format;
				950	c->packed = ggl_pack_color(c, f,
				951	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame^]	952	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				953	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	954	c->iterators.y = y0;
				955	c->step_y = step_y__nop;
				956	// choose the rectangle blitter
				957	c->rect = rect_generic;
				958	if (c->scanline == scanline_memcpy) {
				959	c->rect = rect_memcpy;
				960	}
				961	}
				962
				963	void init_y_noop(context_t* c, int32_t y0)
				964	{
				965	c->iterators.y = y0;
				966	c->step_y = step_y__nop;
				967	// choose the rectangle blitter
				968	c->rect = rect_generic;
				969	if (c->scanline == scanline_memcpy) {
				970	c->rect = rect_memcpy;
				971	}
				972	}
				973
				974	void init_y_error(context_t* c, int32_t y0)
				975	{
				976	// woooops, shoud never happen,
				977	// fail gracefully (don't display anything)
				978	init_y_noop(c, y0);
				979	LOGE("color-buffer has an invalid format!");
				980	}
				981
				982	// ----------------------------------------------------------------------------
				983	#if 0
				984	#pragma mark -
				985	#endif
				986
				987	void step_y__generic(context_t* c)
				988	{
				989	const uint32_t enables = c->state.enables;
				990
				991	// iterate...
				992	iterators_t& ci = c->iterators;
				993	ci.y += 1;
				994
				995	if (enables & GGL_ENABLE_SMOOTH) {
				996	ci.ydrdy += c->shade.drdy;
				997	ci.ydgdy += c->shade.dgdy;
				998	ci.ydbdy += c->shade.dbdy;
				999	ci.ydady += c->shade.dady;
				1000	}
				1001
				1002	const uint32_t mask =
				1003	GGL_ENABLE_DEPTH_TEST \|
				1004	GGL_ENABLE_W \|
				1005	GGL_ENABLE_FOG;
				1006	if (enables & mask) {
				1007	ci.ydzdy += c->shade.dzdy;
				1008	ci.ydwdy += c->shade.dwdy;
				1009	ci.ydfdy += c->shade.dfdy;
				1010	}
				1011
				1012	if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
				1013	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1014	if (c->state.texture[i].enable) {
				1015	texture_iterators_t& ti = c->state.texture[i].iterators;
				1016	ti.ydsdy += ti.dsdy;
				1017	ti.ydtdy += ti.dtdy;
				1018	}
				1019	}
				1020	}
				1021	}
				1022
				1023	void step_y__nop(context_t* c)
				1024	{
				1025	c->iterators.y += 1;
				1026	c->iterators.ydzdy += c->shade.dzdy;
				1027	}
				1028
				1029	void step_y__smooth(context_t* c)
				1030	{
				1031	iterators_t& ci = c->iterators;
				1032	ci.y += 1;
				1033	ci.ydrdy += c->shade.drdy;
				1034	ci.ydgdy += c->shade.dgdy;
				1035	ci.ydbdy += c->shade.dbdy;
				1036	ci.ydady += c->shade.dady;
				1037	ci.ydzdy += c->shade.dzdy;
				1038	}
				1039
				1040	void step_y__w(context_t* c)
				1041	{
				1042	iterators_t& ci = c->iterators;
				1043	ci.y += 1;
				1044	ci.ydzdy += c->shade.dzdy;
				1045	ci.ydwdy += c->shade.dwdy;
				1046	}
				1047
				1048	void step_y__tmu(context_t* c)
				1049	{
				1050	iterators_t& ci = c->iterators;
				1051	ci.y += 1;
				1052	ci.ydzdy += c->shade.dzdy;
				1053	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1054	if (c->state.texture[i].enable) {
				1055	texture_iterators_t& ti = c->state.texture[i].iterators;
				1056	ti.ydsdy += ti.dsdy;
				1057	ti.ydtdy += ti.dtdy;
				1058	}
				1059	}
				1060	}
				1061
				1062	// ----------------------------------------------------------------------------
				1063	#if 0
				1064	#pragma mark -
				1065	#endif
				1066
				1067	void scanline_perspective(context_t* c)
				1068	{
				1069	struct {
				1070	union {
				1071	struct {
				1072	int32_t s, sq;
				1073	int32_t t, tq;
				1074	};
				1075	struct {
				1076	int32_t v, q;
				1077	} st[2];
				1078	};
				1079	} tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
				1080
				1081	// XXX: we should have a special case when dwdx = 0
				1082
				1083	// 32 pixels spans works okay. 16 is a lot better,
				1084	// but hey, it's a software renderer...
				1085	const uint32_t SPAN_BITS = 5;
				1086	const uint32_t ys = c->iterators.y;
				1087	const uint32_t xs = c->iterators.xl;
				1088	const uint32_t x1 = c->iterators.xr;
				1089	const uint32_t xc = x1 - xs;
				1090	uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
				1091	uint32_t numSpans = xc >> SPAN_BITS;
				1092
				1093	const iterators_t& ci = c->iterators;
				1094	int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
				1095	int32_t q0 = gglRecipQ(w0, 30);
				1096	const int iwscale = 32 - gglClz(q0);
				1097
				1098	const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
				1099	int32_t xl = c->iterators.xl;
				1100
				1101	// We process s & t with a loop to reduce the code size
				1102	// (and i-cache pressure).
				1103
				1104	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1105	const texture_t& tmu = c->state.texture[i];
				1106	if (!tmu.enable) continue;
				1107	int32_t s = tmu.shade.is0 +
				1108	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1109	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1110	int32_t t = tmu.shade.it0 +
				1111	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1112	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				1113	tc[i].s = s;
				1114	tc[i].t = t;
				1115	tc[i].sq = gglMulx(s, q0, iwscale);
				1116	tc[i].tq = gglMulx(t, q0, iwscale);
				1117	}
				1118
				1119	int32_t span = 0;
				1120	do {
				1121	int32_t w1;
				1122	if (ggl_likely(numSpans)) {
				1123	w1 = w0 + dwdx;
				1124	} else {
				1125	if (remainder) {
				1126	// finish off the scanline...
				1127	span = remainder;
				1128	w1 = (c->shade.dwdx * span) + w0;
				1129	} else {
				1130	break;
				1131	}
				1132	}
				1133	int32_t q1 = gglRecipQ(w1, 30);
				1134	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1135	texture_t& tmu = c->state.texture[i];
				1136	if (!tmu.enable) continue;
				1137	texture_iterators_t& ti = tmu.iterators;
				1138
				1139	for (int j=0 ; j<2 ; j++) {
				1140	int32_t v = tc[i].st[j].v;
				1141	if (span) v += (tmu.shade.st[j].dx)*span;
				1142	else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
				1143	const int32_t v0 = tc[i].st[j].q;
				1144	const int32_t v1 = gglMulx(v, q1, iwscale);
				1145	int32_t dvdx = v1 - v0;
				1146	if (span) dvdx /= span;
				1147	else dvdx >>= SPAN_BITS;
				1148	tc[i].st[j].v = v;
				1149	tc[i].st[j].q = v1;
				1150
				1151	const int scale = ti.st[j].scale + (iwscale - 30);
				1152	if (scale >= 0) {
				1153	ti.st[j].ydvdy = v0 << scale;
				1154	ti.st[j].dvdx = dvdx << scale;
				1155	} else {
				1156	ti.st[j].ydvdy = v0 >> -scale;
				1157	ti.st[j].dvdx = dvdx >> -scale;
				1158	}
				1159	}
				1160	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1161	gen.dsdx = ti.st[0].dvdx;
				1162	gen.dtdx = ti.st[1].dvdx;
				1163	}
				1164	c->iterators.xl = xl;
				1165	c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
				1166	w0 = w1;
				1167	q0 = q1;
				1168	c->span(c);
				1169	} while(numSpans--);
				1170	}
				1171
				1172	void scanline_perspective_single(context_t* c)
				1173	{
				1174	// 32 pixels spans works okay. 16 is a lot better,
				1175	// but hey, it's a software renderer...
				1176	const uint32_t SPAN_BITS = 5;
				1177	const uint32_t ys = c->iterators.y;
				1178	const uint32_t xs = c->iterators.xl;
				1179	const uint32_t x1 = c->iterators.xr;
				1180	const uint32_t xc = x1 - xs;
				1181
				1182	const iterators_t& ci = c->iterators;
				1183	int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
				1184	int32_t iw = gglRecipQ(w, 30);
				1185	const int iwscale = 32 - gglClz(iw);
				1186
				1187	const int i = 31 - gglClz(c->state.enabled_tmu);
				1188	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1189	texture_t& tmu = c->state.texture[i];
				1190	texture_iterators_t& ti = tmu.iterators;
				1191	const int sscale = ti.sscale + (iwscale - 30);
				1192	const int tscale = ti.tscale + (iwscale - 30);
				1193	int32_t s = tmu.shade.is0 +
				1194	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1195	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1196	int32_t t = tmu.shade.it0 +
				1197	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1198	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				1199	int32_t s0 = gglMulx(s, iw, iwscale);
				1200	int32_t t0 = gglMulx(t, iw, iwscale);
				1201	int32_t xl = c->iterators.xl;
				1202
				1203	int32_t sq, tq, dsdx, dtdx;
				1204	int32_t premainder = xc & ((1<<SPAN_BITS)-1);
				1205	uint32_t numSpans = xc >> SPAN_BITS;
				1206	if (c->shade.dwdx == 0) {
				1207	// XXX: we could choose to do this if the error is small enough
				1208	numSpans = 0;
				1209	premainder = xc;
				1210	goto no_perspective;
				1211	}
				1212
				1213	if (premainder) {
				1214	w += c->shade.dwdx * premainder;
				1215	iw = gglRecipQ(w, 30);
				1216	no_perspective:
				1217	s += tmu.shade.idsdx * premainder;
				1218	t += tmu.shade.idtdx * premainder;
				1219	sq = gglMulx(s, iw, iwscale);
				1220	tq = gglMulx(t, iw, iwscale);
				1221	dsdx = (sq - s0) / premainder;
				1222	dtdx = (tq - t0) / premainder;
				1223	c->iterators.xl = xl;
				1224	c->iterators.xr = xl = xl + premainder;
				1225	goto finish;
				1226	}
				1227
				1228	while (numSpans--) {
				1229	w += c->shade.dwdx << SPAN_BITS;
				1230	s += tmu.shade.idsdx << SPAN_BITS;
				1231	t += tmu.shade.idtdx << SPAN_BITS;
				1232	iw = gglRecipQ(w, 30);
				1233	sq = gglMulx(s, iw, iwscale);
				1234	tq = gglMulx(t, iw, iwscale);
				1235	dsdx = (sq - s0) >> SPAN_BITS;
				1236	dtdx = (tq - t0) >> SPAN_BITS;
				1237	c->iterators.xl = xl;
				1238	c->iterators.xr = xl = xl + (1<<SPAN_BITS);
				1239	finish:
				1240	if (sscale >= 0) {
				1241	ti.ydsdy = s0 << sscale;
				1242	ti.dsdx = dsdx << sscale;
				1243	} else {
				1244	ti.ydsdy = s0 >>-sscale;
				1245	ti.dsdx = dsdx >>-sscale;
				1246	}
				1247	if (tscale >= 0) {
				1248	ti.ydtdy = t0 << tscale;
				1249	ti.dtdx = dtdx << tscale;
				1250	} else {
				1251	ti.ydtdy = t0 >>-tscale;
				1252	ti.dtdx = dtdx >>-tscale;
				1253	}
				1254	s0 = sq;
				1255	t0 = tq;
				1256	gen.dsdx = ti.dsdx;
				1257	gen.dtdx = ti.dtdx;
				1258	c->span(c);
				1259	}
				1260	}
				1261
				1262	// ----------------------------------------------------------------------------
				1263
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame^]	1264	void scanline_col32cb16blend(context_t* c)
				1265	{
				1266	int32_t x = c->iterators.xl;
				1267	size_t ct = c->iterators.xr - x;
				1268	int32_t y = c->iterators.y;
				1269	surface_t* cb = &(c->state.buffers.color);
				1270	union {
				1271	uint16_t* dst;
				1272	uint32_t* dst32;
				1273	};
				1274	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1275
				1276	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
				1277	#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				1278	scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
				1279	#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				1280	scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
				1281	#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				1282	#else
				1283	uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
				1284	int sA = (s>>24);
				1285	int f = 0x100 - (sA + (sA>>7));
				1286	while (ct--) {
				1287	uint16_t d = *dst;
				1288	int dR = (d>>11)&0x1f;
				1289	int dG = (d>>5)&0x3f;
				1290	int dB = (d)&0x1f;
				1291	int sR = (s >> ( 3))&0x1F;
				1292	int sG = (s >> ( 8+2))&0x3F;
				1293	int sB = (s >> (16+3))&0x1F;
				1294	sR += (f*dR)>>8;
				1295	sG += (f*dG)>>8;
				1296	sB += (f*dB)>>8;
				1297	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1298	}
				1299	#endif
				1300
				1301	}
				1302
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1303	void scanline_t32cb16(context_t* c)
				1304	{
				1305	int32_t x = c->iterators.xl;
				1306	size_t ct = c->iterators.xr - x;
				1307	int32_t y = c->iterators.y;
				1308	surface_t* cb = &(c->state.buffers.color);
				1309	union {
				1310	uint16_t* dst;
				1311	uint32_t* dst32;
				1312	};
				1313	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1314
				1315	surface_t* tex = &(c->state.texture[0].surface);
				1316	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				1317	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				1318	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				1319	int sR, sG, sB;
				1320	uint32_t s, d;
				1321
				1322	if (ct==1 \|\| uint32_t(dst)&2) {
				1323	last_one:
				1324	s = GGL_RGBA_TO_HOST( *src++ );
				1325	sR = (s >> ( 3))&0x1F;
				1326	sG = (s >> ( 8+2))&0x3F;
				1327	sB = (s >> (16+3))&0x1F;
				1328	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1329	ct--;
				1330	}
				1331
				1332	while (ct >= 2) {
				1333	s = GGL_RGBA_TO_HOST( *src++ );
				1334	sR = (s >> ( 3))&0x1F;
				1335	sG = (s >> ( 8+2))&0x3F;
				1336	sB = (s >> (16+3))&0x1F;
				1337	d = (sR<<11)\|(sG<<5)\|sB;
				1338
				1339	s = GGL_RGBA_TO_HOST( *src++ );
				1340	sR = (s >> ( 3))&0x1F;
				1341	sG = (s >> ( 8+2))&0x3F;
				1342	sB = (s >> (16+3))&0x1F;
				1343	d \|= ((sR<<11)\|(sG<<5)\|sB)<<16;
				1344
				1345	#if BYTE_ORDER == BIG_ENDIAN
				1346	d = (d>>16) \| (d<<16);
				1347	#endif
				1348
				1349	*dst32++ = d;
				1350	ct -= 2;
				1351	}
				1352
				1353	if (ct > 0) {
				1354	goto last_one;
				1355	}
				1356	}
				1357
				1358	void scanline_t32cb16blend(context_t* c)
				1359	{
				1360	int32_t x = c->iterators.xl;
				1361	size_t ct = c->iterators.xr - x;
				1362	int32_t y = c->iterators.y;
				1363	surface_t* cb = &(c->state.buffers.color);
				1364	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1365
				1366	surface_t* tex = &(c->state.texture[0].surface);
				1367	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				1368	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				1369	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				1370
				1371	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
				1372	scanline_t32cb16blend_arm(dst, src, ct);
				1373	#else
				1374	while (ct--) {
				1375	uint32_t s = *src++;
				1376	if (!s) {
				1377	dst++;
				1378	continue;
				1379	}
				1380	uint16_t d = *dst;
				1381	s = GGL_RGBA_TO_HOST(s);
				1382	int sR = (s >> ( 3))&0x1F;
				1383	int sG = (s >> ( 8+2))&0x3F;
				1384	int sB = (s >> (16+3))&0x1F;
				1385	int sA = (s>>24);
				1386	int f = 0x100 - (sA + (sA>>7));
				1387	int dR = (d>>11)&0x1f;
				1388	int dG = (d>>5)&0x3f;
				1389	int dB = (d)&0x1f;
				1390	sR += (f*dR)>>8;
				1391	sG += (f*dG)>>8;
				1392	sB += (f*dB)>>8;
				1393	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1394	}
				1395	#endif
				1396	}
				1397
				1398	void scanline_memcpy(context_t* c)
				1399	{
				1400	int32_t x = c->iterators.xl;
				1401	size_t ct = c->iterators.xr - x;
				1402	int32_t y = c->iterators.y;
				1403	surface_t* cb = &(c->state.buffers.color);
				1404	const GGLFormat* fp = &(c->formats[cb->format]);
				1405	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				1406	(x + (cb->stride * y)) * fp->size;
				1407
				1408	surface_t* tex = &(c->state.texture[0].surface);
				1409	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				1410	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				1411	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				1412	(u + (tex->stride * v)) * fp->size;
				1413
				1414	const size_t size = ct * fp->size;
				1415	memcpy(dst, src, size);
				1416	}
				1417
				1418	void scanline_memset8(context_t* c)
				1419	{
				1420	int32_t x = c->iterators.xl;
				1421	size_t ct = c->iterators.xr - x;
				1422	int32_t y = c->iterators.y;
				1423	surface_t* cb = &(c->state.buffers.color);
				1424	uint8_t* dst = reinterpret_cast<uint8_t>(cb->data) + (x+(cb->stridey));
				1425	uint32_t packed = c->packed;
				1426	memset(dst, packed, ct);
				1427	}
				1428
				1429	void scanline_memset16(context_t* c)
				1430	{
				1431	int32_t x = c->iterators.xl;
				1432	size_t ct = c->iterators.xr - x;
				1433	int32_t y = c->iterators.y;
				1434	surface_t* cb = &(c->state.buffers.color);
				1435	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1436	uint32_t packed = c->packed;
				1437	android_memset16(dst, packed, ct*2);
				1438	}
				1439
				1440	void scanline_memset32(context_t* c)
				1441	{
				1442	int32_t x = c->iterators.xl;
				1443	size_t ct = c->iterators.xr - x;
				1444	int32_t y = c->iterators.y;
				1445	surface_t* cb = &(c->state.buffers.color);
				1446	uint32_t* dst = reinterpret_cast<uint32_t>(cb->data) + (x+(cb->stridey));
				1447	uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
				1448	android_memset32(dst, packed, ct*4);
				1449	}
				1450
				1451	void scanline_clear(context_t* c)
				1452	{
				1453	int32_t x = c->iterators.xl;
				1454	size_t ct = c->iterators.xr - x;
				1455	int32_t y = c->iterators.y;
				1456	surface_t* cb = &(c->state.buffers.color);
				1457	const GGLFormat* fp = &(c->formats[cb->format]);
				1458	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				1459	(x + (cb->stride * y)) * fp->size;
				1460	const size_t size = ct * fp->size;
				1461	memset(dst, 0, size);
				1462	}
				1463
				1464	void scanline_set(context_t* c)
				1465	{
				1466	int32_t x = c->iterators.xl;
				1467	size_t ct = c->iterators.xr - x;
				1468	int32_t y = c->iterators.y;
				1469	surface_t* cb = &(c->state.buffers.color);
				1470	const GGLFormat* fp = &(c->formats[cb->format]);
				1471	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				1472	(x + (cb->stride * y)) * fp->size;
				1473	const size_t size = ct * fp->size;
				1474	memset(dst, 0xFF, size);
				1475	}
				1476
				1477	void scanline_noop(context_t* c)
				1478	{
				1479	}
				1480
				1481	void rect_generic(context_t* c, size_t yc)
				1482	{
				1483	do {
				1484	c->scanline(c);
				1485	c->step_y(c);
				1486	} while (--yc);
				1487	}
				1488
				1489	void rect_memcpy(context_t* c, size_t yc)
				1490	{
				1491	int32_t x = c->iterators.xl;
				1492	size_t ct = c->iterators.xr - x;
				1493	int32_t y = c->iterators.y;
				1494	surface_t* cb = &(c->state.buffers.color);
				1495	const GGLFormat* fp = &(c->formats[cb->format]);
				1496	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				1497	(x + (cb->stride * y)) * fp->size;
				1498
				1499	surface_t* tex = &(c->state.texture[0].surface);
				1500	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				1501	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				1502	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				1503	(u + (tex->stride * v)) * fp->size;
				1504
				1505	if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
				1506	memcpy(dst, src, ct * fp->size * yc);
				1507	} else {
				1508	const size_t size = ct * fp->size;
				1509	const size_t dbpr = cb->stride * fp->size;
				1510	const size_t sbpr = tex->stride * fp->size;
				1511	do {
				1512	memcpy(dst, src, size);
				1513	dst += dbpr;
				1514	src += sbpr;
				1515	} while (--yc);
				1516	}
				1517	}
				1518	// ----------------------------------------------------------------------------
				1519	}; // namespace android
				1520
				1521	using namespace android;
				1522	extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1)
				1523	{
				1524	#if ANDROID_ARM_CODEGEN
				1525	GGLContext* c;
				1526	gglInit(&c);
				1527	needs_t needs;
				1528	needs.n = n;
				1529	needs.p = p;
				1530	needs.t[0] = t0;
				1531	needs.t[1] = t1;
				1532	sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE));
				1533	GGLAssembler assembler( new ARMAssembler(a) );
				1534	int err = assembler.scanline(needs, (context_t*)c);
				1535	if (err != 0) {
				1536	printf("error %08x (%s)\n", err, strerror(-err));
				1537	}
				1538	gglUninit(c);
				1539	#else
				1540	printf("This test runs only on ARM\n");
				1541	#endif
				1542	}
				1543