Blame - libpixelflinger/scanline.cpp - AOSPA/android_system_core

blob: a5d28b276b28e49be4a0029c0a950d104614d342 [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/scanline.cpp
				2	**
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	3	** Copyright 2006-2011, The Android Open Source Project
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	#define LOG_TAG "pixelflinger"
				20
				21	#include <assert.h>
				22	#include <stdlib.h>
				23	#include <stdio.h>
				24	#include <string.h>
				25
				26	#include <cutils/memory.h>
				27	#include <cutils/log.h>
				28
				29	#include "buffer.h"
				30	#include "scanline.h"
				31
				32	#include "codeflinger/CodeCache.h"
				33	#include "codeflinger/GGLAssembler.h"
				34	#include "codeflinger/ARMAssembler.h"
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	35	#if defined(__mips__)
				36	#include "codeflinger/MIPSAssembler.h"
				37	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	38	//#include "codeflinger/ARMAssemblerOptimizer.h"
				39
				40	// ----------------------------------------------------------------------------
				41
				42	#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
				43	#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
				44	#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
				45	#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
				46
				47	#ifdef NDEBUG
				48	# define ANDROID_RELEASE
				49	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				50	#else
				51	# define ANDROID_DEBUG
				52	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				53	#endif
				54
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	55	#if defined(__arm__) \|\| defined(__mips__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	56	# define ANDROID_ARM_CODEGEN 1
				57	#else
				58	# define ANDROID_ARM_CODEGEN 0
				59	#endif
				60
				61	#define DEBUG__CODEGEN_ONLY 0
				62
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	63	/* Set to 1 to dump to the log the states that need a new
				64	* code-generated scanline callback, i.e. those that don't
				65	* have a corresponding shortcut function.
				66	*/
				67	#define DEBUG_NEEDS 0
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	68
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	69	#ifdef __mips__
				70	#define ASSEMBLY_SCRATCH_SIZE 4096
				71	#else
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	72	#define ASSEMBLY_SCRATCH_SIZE 2048
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	73	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	74
				75	// ----------------------------------------------------------------------------
				76	namespace android {
				77	// ----------------------------------------------------------------------------
				78
				79	static void init_y(context_t*, int32_t);
				80	static void init_y_noop(context_t*, int32_t);
				81	static void init_y_packed(context_t*, int32_t);
				82	static void init_y_error(context_t*, int32_t);
				83
				84	static void step_y__generic(context_t* c);
				85	static void step_y__nop(context_t*);
				86	static void step_y__smooth(context_t* c);
				87	static void step_y__tmu(context_t* c);
				88	static void step_y__w(context_t* c);
				89
				90	static void scanline(context_t* c);
				91	static void scanline_perspective(context_t* c);
				92	static void scanline_perspective_single(context_t* c);
				93	static void scanline_t32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	94	static void scanline_t32cb16blend_dither(context_t* c);
				95	static void scanline_t32cb16blend_srca(context_t* c);
				96	static void scanline_t32cb16blend_clamp(context_t* c);
				97	static void scanline_t32cb16blend_clamp_dither(context_t* c);
				98	static void scanline_t32cb16blend_clamp_mod(context_t* c);
				99	static void scanline_x32cb16blend_clamp_mod(context_t* c);
				100	static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
				101	static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	102	static void scanline_t32cb16(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	103	static void scanline_t32cb16_dither(context_t* c);
				104	static void scanline_t32cb16_clamp(context_t* c);
				105	static void scanline_t32cb16_clamp_dither(context_t* c);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	106	static void scanline_col32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	107	static void scanline_t16cb16_clamp(context_t* c);
				108	static void scanline_t16cb16blend_clamp_mod(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	109	static void scanline_memcpy(context_t* c);
				110	static void scanline_memset8(context_t* c);
				111	static void scanline_memset16(context_t* c);
				112	static void scanline_memset32(context_t* c);
				113	static void scanline_noop(context_t* c);
				114	static void scanline_set(context_t* c);
				115	static void scanline_clear(context_t* c);
				116
				117	static void rect_generic(context_t* c, size_t yc);
				118	static void rect_memcpy(context_t* c, size_t yc);
				119
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	120	#if defined( __arm__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	121	extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);
				122	extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	123	extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);
				124	extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	125	#elif defined(__mips__)
				126	extern "C" void scanline_t32cb16blend_mips(uint16_t, uint32_t, size_t);
				127	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	128
				129	// ----------------------------------------------------------------------------
				130
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	131	static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
				132	{
				133	return uint16_t( ((pix << 8) & 0xf800) \|
				134	((pix >> 5) & 0x07e0) \|
				135	((pix >> 19) & 0x001f) );
				136	}
				137
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	138	struct shortcut_t {
				139	needs_filter_t filter;
				140	const char* desc;
				141	void (scanline)(context_t);
				142	void (init_y)(context_t, int32_t);
				143	};
				144
				145	// Keep in sync with needs
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	146
				147	/* To understand the values here, have a look at:
				148	* system/core/include/private/pixelflinger/ggl_context.h
				149	*
				150	* Especially the lines defining and using GGL_RESERVE_NEEDS
				151	*
				152	* Quick reminders:
				153	* - the last nibble of the first value is the destination buffer format.
				154	* - the last nibble of the third value is the source texture format
				155	* - formats: 4=rgb565 1=abgr8888 2=xbgr8888
				156	*
				157	* In the descriptions below:
				158	*
				159	* SRC means we copy the source pixels to the destination
				160	*
				161	* SRC_OVER means we blend the source pixels to the destination
				162	* with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
				163	* This mode is otherwise called 'blend'.
				164	*
				165	* SRCA_OVER means we blend the source pixels to the destination
				166	* with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
				167	* This mode is otherwise called 'blend_srca'
				168	*
				169	* clamp means we fetch source pixels from a texture with u/v clamping
				170	*
				171	* mod means the source pixels are modulated (multiplied) by the
				172	* a/r/g/b of the current context's color. Typically used for
				173	* fade-in / fade-out.
				174	*
				175	* dither means we dither 32 bit values to 16 bits
				176	*/
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	177	static shortcut_t shortcuts[] = {
				178	{ { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
				179	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	180	"565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	181	{ { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
				182	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	183	"565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
				184	/* same as first entry, but with dithering */
				185	{ { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
				186	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				187	"565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
				188	/* same as second entry, but with dithering */
				189	{ { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
				190	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				191	"565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
				192	/* this is used during the boot animation - CHEAT: ignore dithering */
				193	{ { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
				194	{ 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
				195	"565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
				196	/* special case for arbitrary texture coordinates (think scaling) */
				197	{ { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
				198	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				199	"565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
				200	{ { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
				201	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				202	"565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
				203	/* another case used during emulation */
				204	{ { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
				205	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				206	"565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
				207	/* and this */
				208	{ { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
				209	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				210	"565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
				211	{ { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
				212	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				213	"565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
				214	{ { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
				215	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				216	"565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
				217	{ { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
				218	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				219	"565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				220	{ { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
				221	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				222	"565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				223	{ { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
				224	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				225	"565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				226	{ { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
				227	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				228	"565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				229	{ { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
				230	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				231	"565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
				232	{ { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
				233	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				234	"565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	235	{ { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
				236	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
				237	"565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	238	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				239	{ 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
				240	"(nop) alpha test", scanline_noop, init_y_noop },
				241	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				242	{ 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
				243	"(nop) depth test", scanline_noop, init_y_noop },
				244	{ { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
				245	{ 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				246	"(nop) logic_op", scanline_noop, init_y_noop },
				247	{ { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
				248	{ 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				249	"(nop) color mask", scanline_noop, init_y_noop },
				250	{ { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
				251	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				252	"(set) logic_op", scanline_set, init_y_noop },
				253	{ { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
				254	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				255	"(clear) logic_op", scanline_clear, init_y_noop },
				256	{ { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
				257	{ 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
				258	"(clear) blending 0/0", scanline_clear, init_y_noop },
				259	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				260	{ 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
				261	"(error) invalid color-buffer format", scanline_noop, init_y_error },
				262	};
				263	static const needs_filter_t noblend1to1 = {
				264	// (disregard dithering, see below)
				265	{ 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
				266	{ 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
				267	};
				268	static const needs_filter_t fill16noblend = {
				269	{ 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
				270	{ 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
				271	};
				272
				273	// ----------------------------------------------------------------------------
				274
				275	#if ANDROID_ARM_CODEGEN
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	276
				277	#if defined(__mips__)
				278	static CodeCache gCodeCache(32 * 1024);
				279	#else
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	280	static CodeCache gCodeCache(12 * 1024);
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	281	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	282
				283	class ScanlineAssembly : public Assembly {
				284	AssemblyKey<needs_t> mKey;
				285	public:
				286	ScanlineAssembly(needs_t needs, size_t size)
				287	: Assembly(size), mKey(needs) { }
				288	const AssemblyKey<needs_t>& key() const { return mKey; }
				289	};
				290	#endif
				291
				292	// ----------------------------------------------------------------------------
				293
				294	void ggl_init_scanline(context_t* c)
				295	{
				296	c->init_y = init_y;
				297	c->step_y = step_y__generic;
				298	c->scanline = scanline;
				299	}
				300
				301	void ggl_uninit_scanline(context_t* c)
				302	{
				303	if (c->state.buffers.coverage)
				304	free(c->state.buffers.coverage);
				305	#if ANDROID_ARM_CODEGEN
				306	if (c->scanline_as)
				307	c->scanline_as->decStrong(c);
				308	#endif
				309	}
				310
				311	// ----------------------------------------------------------------------------
				312
				313	static void pick_scanline(context_t* c)
				314	{
				315	#if (!defined(DEBUG__CODEGEN_ONLY) \|\| (DEBUG__CODEGEN_ONLY == 0))
				316
				317	#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
				318	c->init_y = init_y;
				319	c->step_y = step_y__generic;
				320	c->scanline = scanline;
				321	return;
				322	#endif
				323
				324	//printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
				325	// c->state.needs.n, c->state.needs.p,
				326	// c->state.needs.t[0], c->state.needs.t[1]);
				327
				328	// first handle the special case that we cannot test with a filter
				329	const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
				330	if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
				331	if (c->state.needs.match(noblend1to1)) {
				332	// this will match regardless of dithering state, since both
				333	// src and dest have the same format anyway, there is no dithering
				334	// to be done.
				335	const GGLFormat* f =
				336	&(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
				337	if ((f->components == GGL_RGB) \|\|
				338	(f->components == GGL_RGBA) \|\|
				339	(f->components == GGL_LUMINANCE) \|\|
				340	(f->components == GGL_LUMINANCE_ALPHA))
				341	{
				342	// format must have all of RGB components
				343	// (so the current color doesn't show through)
				344	c->scanline = scanline_memcpy;
				345	c->init_y = init_y_noop;
				346	return;
				347	}
				348	}
				349	}
				350
				351	if (c->state.needs.match(fill16noblend)) {
				352	c->init_y = init_y_packed;
				353	switch (c->formats[cb_format].size) {
				354	case 1: c->scanline = scanline_memset8; return;
				355	case 2: c->scanline = scanline_memset16; return;
				356	case 4: c->scanline = scanline_memset32; return;
				357	}
				358	}
				359
				360	const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
				361	for (int i=0 ; i<numFilters ; i++) {
				362	if (c->state.needs.match(shortcuts[i].filter)) {
				363	c->scanline = shortcuts[i].scanline;
				364	c->init_y = shortcuts[i].init_y;
				365	return;
				366	}
				367	}
				368
Vladimir Chtchetkine	dccddee	2011-08-29 10:02:24 -0700	[diff] [blame]	369	#if DEBUG_NEEDS
Steve Block	4163b45	2012-01-04 19:19:03 +0000	[diff] [blame]	370	ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	371	c->state.needs.n, c->state.needs.p,
				372	c->state.needs.t[0], c->state.needs.t[1]);
				373	#endif
				374
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	375	#endif // DEBUG__CODEGEN_ONLY
				376
				377	c->init_y = init_y;
				378	c->step_y = step_y__generic;
				379
				380	#if ANDROID_ARM_CODEGEN
				381	// we're going to have to generate some code...
				382	// here, generate code for our pixel pipeline
				383	const AssemblyKey<needs_t> key(c->state.needs);
				384	sp<Assembly> assembly = gCodeCache.lookup(key);
				385	if (assembly == 0) {
				386	// create a new assembly region
				387	sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
				388	ASSEMBLY_SCRATCH_SIZE);
				389	// initialize our assembler
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	390	#if defined(__arm__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	391	GGLAssembler assembler( new ARMAssembler(a) );
				392	//GGLAssembler assembler(
				393	// new ARMAssemblerOptimizer(new ARMAssembler(a)) );
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame^]	394	#endif
				395	#if defined(__mips__)
				396	GGLAssembler assembler( new ArmToMipsAssembler(a) );
				397	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	398	// generate the scanline code for the given needs
				399	int err = assembler.scanline(c->state.needs, c);
				400	if (ggl_likely(!err)) {
				401	// finally, cache this assembly
				402	err = gCodeCache.cache(a->key(), a);
				403	}
				404	if (ggl_unlikely(err)) {
Steve Block	8aeb6e2	2012-01-06 14:13:42 +0000	[diff] [blame]	405	ALOGE("error generating or caching assembly. Reverting to NOP.");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	406	c->scanline = scanline_noop;
				407	c->init_y = init_y_noop;
				408	c->step_y = step_y__nop;
				409	return;
				410	}
				411	assembly = a;
				412	}
				413
				414	// release the previous assembly
				415	if (c->scanline_as) {
				416	c->scanline_as->decStrong(c);
				417	}
				418
Steve Block	4163b45	2012-01-04 19:19:03 +0000	[diff] [blame]	419	//ALOGI("using generated pixel-pipeline");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	420	c->scanline_as = assembly.get();
				421	c->scanline_as->incStrong(c); // hold on to assembly
				422	c->scanline = (void()(context_t c))assembly->base();
				423	#else
Steve Block	4f07a1f	2012-01-05 22:25:38 +0000	[diff] [blame]	424	// ALOGW("using generic (slow) pixel-pipeline");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	425	c->scanline = scanline;
				426	#endif
				427	}
				428
				429	void ggl_pick_scanline(context_t* c)
				430	{
				431	pick_scanline(c);
				432	if ((c->state.enables & GGL_ENABLE_W) &&
				433	(c->state.enables & GGL_ENABLE_TMUS))
				434	{
				435	c->span = c->scanline;
				436	c->scanline = scanline_perspective;
				437	if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
				438	// only one TMU enabled
				439	c->scanline = scanline_perspective_single;
				440	}
				441	}
				442	}
				443
				444	// ----------------------------------------------------------------------------
				445
				446	static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
				447	static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
				448	const pixel_t* src, const pixel_t* dst);
				449	static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
				450
				451	#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				452
				453	// no need to compile the generic-pipeline, it can't be reached
				454	void scanline(context_t*)
				455	{
				456	}
				457
				458	#else
				459
				460	void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
				461	{
				462	if (su && sv) {
				463	if (su > sv) {
				464	v = ggl_expand(v, sv, su);
				465	sv = su;
				466	} else if (su < sv) {
				467	u = ggl_expand(u, su, sv);
				468	su = sv;
				469	}
				470	}
				471	}
				472
				473	void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
				474	{
				475	rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
				476	rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
				477	rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
				478	rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
				479
				480	pixel_t sf, df;
				481	blend_factor(c, &sf, c->state.blend.src, fragment, fb);
				482	blend_factor(c, &df, c->state.blend.dst, fragment, fb);
				483
				484	fragment->c[1] =
				485	gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
				486	fragment->c[2] =
				487	gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
				488	fragment->c[3] =
				489	gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
				490
				491	if (c->state.blend.alpha_separate) {
				492	blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
				493	blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
				494	}
				495
				496	fragment->c[0] =
				497	gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
				498
				499	// clamp to 1.0
				500	if (fragment->c[0] >= (1LU<<fragment->s[0]))
				501	fragment->c[0] = (1<<fragment->s[0])-1;
				502	if (fragment->c[1] >= (1LU<<fragment->s[1]))
				503	fragment->c[1] = (1<<fragment->s[1])-1;
				504	if (fragment->c[2] >= (1LU<<fragment->s[2]))
				505	fragment->c[2] = (1<<fragment->s[2])-1;
				506	if (fragment->c[3] >= (1LU<<fragment->s[3]))
				507	fragment->c[3] = (1<<fragment->s[3])-1;
				508	}
				509
				510	static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
				511	{
				512	if (!size)
				513	return def;
				514
				515	// scale to 16 bits
				516	if (size > 16) {
				517	x >>= (size - 16);
				518	} else if (size < 16) {
				519	x = ggl_expand(x, size, 16);
				520	}
				521	x += x >> 15;
				522	return x;
				523	}
				524
				525	void blend_factor(context_t* c, pixel_t* r,
				526	uint32_t factor, const pixel_t* src, const pixel_t* dst)
				527	{
				528	switch (factor) {
				529	case GGL_ZERO:
				530	r->c[1] =
				531	r->c[2] =
				532	r->c[3] =
				533	r->c[0] = 0;
				534	break;
				535	case GGL_ONE:
				536	r->c[1] =
				537	r->c[2] =
				538	r->c[3] =
				539	r->c[0] = FIXED_ONE;
				540	break;
				541	case GGL_DST_COLOR:
				542	r->c[1] = blendfactor(dst->c[1], dst->s[1]);
				543	r->c[2] = blendfactor(dst->c[2], dst->s[2]);
				544	r->c[3] = blendfactor(dst->c[3], dst->s[3]);
				545	r->c[0] = blendfactor(dst->c[0], dst->s[0]);
				546	break;
				547	case GGL_SRC_COLOR:
				548	r->c[1] = blendfactor(src->c[1], src->s[1]);
				549	r->c[2] = blendfactor(src->c[2], src->s[2]);
				550	r->c[3] = blendfactor(src->c[3], src->s[3]);
				551	r->c[0] = blendfactor(src->c[0], src->s[0]);
				552	break;
				553	case GGL_ONE_MINUS_DST_COLOR:
				554	r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
				555	r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
				556	r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
				557	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
				558	break;
				559	case GGL_ONE_MINUS_SRC_COLOR:
				560	r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
				561	r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
				562	r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
				563	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
				564	break;
				565	case GGL_SRC_ALPHA:
				566	r->c[1] =
				567	r->c[2] =
				568	r->c[3] =
				569	r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
				570	break;
				571	case GGL_ONE_MINUS_SRC_ALPHA:
				572	r->c[1] =
				573	r->c[2] =
				574	r->c[3] =
				575	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
				576	break;
				577	case GGL_DST_ALPHA:
				578	r->c[1] =
				579	r->c[2] =
				580	r->c[3] =
				581	r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				582	break;
				583	case GGL_ONE_MINUS_DST_ALPHA:
				584	r->c[1] =
				585	r->c[2] =
				586	r->c[3] =
				587	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				588	break;
				589	case GGL_SRC_ALPHA_SATURATE:
				590	// XXX: GGL_SRC_ALPHA_SATURATE
				591	break;
				592	}
				593	}
				594
				595	static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
				596	{
				597	GGLfixed d;
				598	if (tx_wrap == GGL_REPEAT) {
				599	d = (uint32_t(coord)>>16) * size;
				600	} else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
				601	const GGLfixed clamp_min = FIXED_HALF;
				602	const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
				603	if (coord < clamp_min) coord = clamp_min;
				604	if (coord > clamp_max) coord = clamp_max;
				605	d = coord;
				606	} else { // 1:1
				607	const GGLfixed clamp_min = 0;
				608	const GGLfixed clamp_max = (size << 16);
				609	if (coord < clamp_min) coord = clamp_min;
				610	if (coord > clamp_max) coord = clamp_max;
				611	d = coord;
				612	}
				613	return d;
				614	}
				615
				616	static inline
				617	GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
				618	{
				619	const int32_t end = dvdx * (len-1) + v;
				620	if (end < 0)
				621	v -= end;
				622	v &= ~(v>>31);
				623	return v;
				624	}
				625
				626	void scanline(context_t* c)
				627	{
				628	const uint32_t enables = c->state.enables;
				629	const int xs = c->iterators.xl;
				630	const int x1 = c->iterators.xr;
				631	int xc = x1 - xs;
				632	const int16_t* covPtr = c->state.buffers.coverage + xs;
				633
				634	// All iterated values are sampled at the pixel center
				635
				636	// reset iterators for that scanline...
				637	GGLcolor r, g, b, a;
				638	iterators_t& ci = c->iterators;
				639	if (enables & GGL_ENABLE_SMOOTH) {
				640	r = (xs * c->shade.drdx) + ci.ydrdy;
				641	g = (xs * c->shade.dgdx) + ci.ydgdy;
				642	b = (xs * c->shade.dbdx) + ci.ydbdy;
				643	a = (xs * c->shade.dadx) + ci.ydady;
				644	r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
				645	g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
				646	b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
				647	a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
				648	} else {
				649	r = ci.ydrdy;
				650	g = ci.ydgdy;
				651	b = ci.ydbdy;
				652	a = ci.ydady;
				653	}
				654
				655	// z iterators are 1.31
				656	GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
				657	GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
				658
				659	struct {
				660	GGLfixed s, t;
				661	} tc[GGL_TEXTURE_UNIT_COUNT];
				662	if (enables & GGL_ENABLE_TMUS) {
				663	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				664	if (c->state.texture[i].enable) {
				665	texture_iterators_t& ti = c->state.texture[i].iterators;
				666	if (enables & GGL_ENABLE_W) {
				667	tc[i].s = ti.ydsdy;
				668	tc[i].t = ti.ydtdy;
				669	} else {
				670	tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
				671	tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
				672	}
				673	}
				674	}
				675	}
				676
				677	pixel_t fragment;
				678	pixel_t texel;
				679	pixel_t fb;
				680
				681	uint32_t x = xs;
				682	uint32_t y = c->iterators.y;
				683
				684	while (xc--) {
				685
				686	{ // just a scope
				687
				688	// read color (convert to 8 bits by keeping only the integer part)
				689	fragment.s[1] = fragment.s[2] =
				690	fragment.s[3] = fragment.s[0] = 8;
				691	fragment.c[1] = r >> (GGL_COLOR_BITS-8);
				692	fragment.c[2] = g >> (GGL_COLOR_BITS-8);
				693	fragment.c[3] = b >> (GGL_COLOR_BITS-8);
				694	fragment.c[0] = a >> (GGL_COLOR_BITS-8);
				695
				696	// texturing
				697	if (enables & GGL_ENABLE_TMUS) {
				698	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				699	texture_t& tx = c->state.texture[i];
				700	if (!tx.enable)
				701	continue;
				702	texture_iterators_t& ti = tx.iterators;
				703	int32_t u, v;
				704
				705	// s-coordinate
				706	if (tx.s_coord != GGL_ONE_TO_ONE) {
				707	const int w = tx.surface.width;
				708	u = wrapping(tc[i].s, w, tx.s_wrap);
				709	tc[i].s += ti.dsdx;
				710	} else {
				711	u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
				712	}
				713
				714	// t-coordinate
				715	if (tx.t_coord != GGL_ONE_TO_ONE) {
				716	const int h = tx.surface.height;
				717	v = wrapping(tc[i].t, h, tx.t_wrap);
				718	tc[i].t += ti.dtdx;
				719	} else {
				720	v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
				721	}
				722
				723	// read texture
				724	if (tx.mag_filter == GGL_NEAREST &&
				725	tx.min_filter == GGL_NEAREST)
				726	{
				727	u >>= 16;
				728	v >>= 16;
				729	tx.surface.read(&tx.surface, c, u, v, &texel);
				730	} else {
				731	const int w = tx.surface.width;
				732	const int h = tx.surface.height;
				733	u -= FIXED_HALF;
				734	v -= FIXED_HALF;
				735	int u0 = u >> 16;
				736	int v0 = v >> 16;
				737	int u1 = u0 + 1;
				738	int v1 = v0 + 1;
				739	if (tx.s_wrap == GGL_REPEAT) {
				740	if (u0<0) u0 += w;
				741	if (u1<0) u1 += w;
				742	if (u0>=w) u0 -= w;
				743	if (u1>=w) u1 -= w;
				744	} else {
				745	if (u0<0) u0 = 0;
				746	if (u1<0) u1 = 0;
				747	if (u0>=w) u0 = w-1;
				748	if (u1>=w) u1 = w-1;
				749	}
				750	if (tx.t_wrap == GGL_REPEAT) {
				751	if (v0<0) v0 += h;
				752	if (v1<0) v1 += h;
				753	if (v0>=h) v0 -= h;
				754	if (v1>=h) v1 -= h;
				755	} else {
				756	if (v0<0) v0 = 0;
				757	if (v1<0) v1 = 0;
				758	if (v0>=h) v0 = h-1;
				759	if (v1>=h) v1 = h-1;
				760	}
				761	pixel_t texels[4];
				762	uint32_t mm[4];
				763	tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
				764	tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
				765	tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
				766	tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
				767	u = (u >> 12) & 0xF;
				768	v = (v >> 12) & 0xF;
				769	u += u>>3;
				770	v += v>>3;
				771	mm[0] = (0x10 - u) * (0x10 - v);
				772	mm[1] = (0x10 - u) * v;
				773	mm[2] = u * (0x10 - v);
				774	mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
				775	for (int j=0 ; j<4 ; j++) {
				776	texel.s[j] = texels[0].s[j];
				777	if (!texel.s[j]) continue;
				778	texel.s[j] += 8;
				779	texel.c[j] = texels[0].c[j]*mm[0] +
				780	texels[1].c[j]*mm[1] +
				781	texels[2].c[j]*mm[2] +
				782	texels[3].c[j]*mm[3] ;
				783	}
				784	}
				785
				786	// Texture environnement...
				787	for (int j=0 ; j<4 ; j++) {
				788	uint32_t& Cf = fragment.c[j];
				789	uint32_t& Ct = texel.c[j];
				790	uint8_t& sf = fragment.s[j];
				791	uint8_t& st = texel.s[j];
				792	uint32_t At = texel.c[0];
				793	uint8_t sat = texel.s[0];
				794	switch (tx.env) {
				795	case GGL_REPLACE:
				796	if (st) {
				797	Cf = Ct;
				798	sf = st;
				799	}
				800	break;
				801	case GGL_MODULATE:
				802	if (st) {
				803	uint32_t factor = Ct + (Ct>>(st-1));
				804	Cf = (Cf * factor) >> st;
				805	}
				806	break;
				807	case GGL_DECAL:
				808	if (sat) {
				809	rescale(Cf, sf, Ct, st);
				810	Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
				811	}
				812	break;
				813	case GGL_BLEND:
				814	if (st) {
				815	uint32_t Cc = tx.env_color[i];
				816	if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
				817	else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
				818	uint32_t factor = Ct + (Ct>>(st-1));
				819	Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
				820	}
				821	break;
				822	case GGL_ADD:
				823	if (st) {
				824	rescale(Cf, sf, Ct, st);
				825	Cf += Ct;
				826	}
				827	break;
				828	}
				829	}
				830	}
				831	}
				832
				833	// coverage application
				834	if (enables & GGL_ENABLE_AA) {
				835	int16_t cf = *covPtr++;
				836	fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
				837	}
				838
				839	// alpha-test
				840	if (enables & GGL_ENABLE_ALPHA_TEST) {
				841	GGLcolor ref = c->state.alpha_test.ref;
				842	GGLcolor alpha = (uint64_t(fragment.c[0]) *
				843	((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
				844	switch (c->state.alpha_test.func) {
				845	case GGL_NEVER: goto discard;
				846	case GGL_LESS: if (alpha<ref) break; goto discard;
				847	case GGL_EQUAL: if (alpha==ref) break; goto discard;
				848	case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
				849	case GGL_GREATER: if (alpha>ref) break; goto discard;
				850	case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
				851	case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
				852	}
				853	}
				854
				855	// depth test
				856	if (c->state.buffers.depth.format) {
				857	if (enables & GGL_ENABLE_DEPTH_TEST) {
				858	surface_t* cb = &(c->state.buffers.depth);
				859	uint16_t* p = (uint16_t)(cb->data)+(x+(cb->stridey));
				860	uint16_t zz = uint32_t(z)>>(16);
				861	uint16_t depth = *p;
				862	switch (c->state.depth_test.func) {
				863	case GGL_NEVER: goto discard;
				864	case GGL_LESS: if (zz<depth) break; goto discard;
				865	case GGL_EQUAL: if (zz==depth) break; goto discard;
				866	case GGL_LEQUAL: if (zz<=depth) break; goto discard;
				867	case GGL_GREATER: if (zz>depth) break; goto discard;
				868	case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
				869	case GGL_GEQUAL: if (zz>=depth) break; goto discard;
				870	}
				871	// depth buffer is not enabled, if depth-test is not enabled
				872	/*
				873	fragment.s[1] = fragment.s[2] =
				874	fragment.s[3] = fragment.s[0] = 8;
				875	fragment.c[1] =
				876	fragment.c[2] =
				877	fragment.c[3] =
				878	fragment.c[0] = 255 - (zz>>8);
				879	*/
				880	if (c->state.mask.depth) {
				881	*p = zz;
				882	}
				883	}
				884	}
				885
				886	// fog
				887	if (enables & GGL_ENABLE_FOG) {
				888	for (int i=1 ; i<=3 ; i++) {
				889	GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
				890	uint32_t& c = fragment.c[i];
				891	uint8_t& s = fragment.s[i];
				892	c = (c * 0x10000) / ((1<<s)-1);
				893	c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
				894	s = 16;
				895	}
				896	}
				897
				898	// blending
				899	if (enables & GGL_ENABLE_BLENDING) {
				900	fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
				901	fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
				902	c->state.buffers.color.read(
				903	&(c->state.buffers.color), c, x, y, &fb);
				904	blending( c, &fragment, &fb );
				905	}
				906
				907	// write
				908	c->state.buffers.color.write(
				909	&(c->state.buffers.color), c, x, y, &fragment);
				910	}
				911
				912	discard:
				913	// iterate...
				914	x += 1;
				915	if (enables & GGL_ENABLE_SMOOTH) {
				916	r += c->shade.drdx;
				917	g += c->shade.dgdx;
				918	b += c->shade.dbdx;
				919	a += c->shade.dadx;
				920	}
				921	z += c->shade.dzdx;
				922	f += c->shade.dfdx;
				923	}
				924	}
				925
				926	#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				927
				928	// ----------------------------------------------------------------------------
				929	#if 0
				930	#pragma mark -
				931	#pragma mark Scanline
				932	#endif
				933
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	934	/* Used to parse a 32-bit source texture linearly. Usage is:
				935	*
				936	* horz_iterator32 hi(context);
				937	* while (...) {
				938	* uint32_t src_pixel = hi.get_pixel32();
				939	* ...
				940	* }
				941	*
				942	* Use only for one-to-one texture mapping.
				943	*/
				944	struct horz_iterator32 {
				945	horz_iterator32(context_t* c) {
				946	const int x = c->iterators.xl;
				947	const int y = c->iterators.y;
				948	texture_t& tx = c->state.texture[0];
				949	const int32_t u = (tx.shade.is0>>16) + x;
				950	const int32_t v = (tx.shade.it0>>16) + y;
				951	m_src = reinterpret_cast<uint32_t>(tx.surface.data)+(u+(tx.surface.stridev));
				952	}
				953	uint32_t get_pixel32() {
				954	return *m_src++;
				955	}
				956	protected:
				957	uint32_t* m_src;
				958	};
				959
				960	/* A variant for 16-bit source textures. */
				961	struct horz_iterator16 {
				962	horz_iterator16(context_t* c) {
				963	const int x = c->iterators.xl;
				964	const int y = c->iterators.y;
				965	texture_t& tx = c->state.texture[0];
				966	const int32_t u = (tx.shade.is0>>16) + x;
				967	const int32_t v = (tx.shade.it0>>16) + y;
				968	m_src = reinterpret_cast<uint16_t>(tx.surface.data)+(u+(tx.surface.stridev));
				969	}
				970	uint16_t get_pixel16() {
				971	return *m_src++;
				972	}
				973	protected:
				974	uint16_t* m_src;
				975	};
				976
				977	/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
				978	* After initialization, call get_src16() or get_src32() to get the current
				979	* texture pixel value.
				980	*/
				981	struct clamp_iterator {
				982	clamp_iterator(context_t* c) {
				983	const int xs = c->iterators.xl;
				984	texture_t& tx = c->state.texture[0];
				985	texture_iterators_t& ti = tx.iterators;
				986	m_s = (xs * ti.dsdx) + ti.ydsdy;
				987	m_t = (xs * ti.dtdx) + ti.ydtdy;
				988	m_ds = ti.dsdx;
				989	m_dt = ti.dtdx;
				990	m_width_m1 = tx.surface.width - 1;
				991	m_height_m1 = tx.surface.height - 1;
				992	m_data = tx.surface.data;
				993	m_stride = tx.surface.stride;
				994	}
				995	uint16_t get_pixel16() {
				996	int u, v;
				997	get_uv(u, v);
				998	uint16_t* src = reinterpret_cast<uint16_t>(m_data) + (u + (m_stridev));
				999	return src[0];
				1000	}
				1001	uint32_t get_pixel32() {
				1002	int u, v;
				1003	get_uv(u, v);
				1004	uint32_t* src = reinterpret_cast<uint32_t>(m_data) + (u + (m_stridev));
				1005	return src[0];
				1006	}
				1007	private:
				1008	void get_uv(int& u, int& v) {
				1009	int uu = m_s >> 16;
				1010	int vv = m_t >> 16;
				1011	if (uu < 0)
				1012	uu = 0;
				1013	if (uu > m_width_m1)
				1014	uu = m_width_m1;
				1015	if (vv < 0)
				1016	vv = 0;
				1017	if (vv > m_height_m1)
				1018	vv = m_height_m1;
				1019	u = uu;
				1020	v = vv;
				1021	m_s += m_ds;
				1022	m_t += m_dt;
				1023	}
				1024
				1025	GGLfixed m_s, m_t;
				1026	GGLfixed m_ds, m_dt;
				1027	int m_width_m1, m_height_m1;
				1028	uint8_t* m_data;
				1029	int m_stride;
				1030	};
				1031
				1032	/*
				1033	* The 'horizontal clamp iterator' variant corresponds to the case where
				1034	* the 'v' coordinate doesn't change. This is useful to avoid one mult and
				1035	* extra adds / checks per pixels, if the blending/processing operation after
				1036	* this is very fast.
				1037	*/
				1038	static int is_context_horizontal(const context_t* c) {
				1039	return (c->state.texture[0].iterators.dtdx == 0);
				1040	}
				1041
				1042	struct horz_clamp_iterator {
				1043	uint16_t get_pixel16() {
				1044	int u = m_s >> 16;
				1045	m_s += m_ds;
				1046	if (u < 0)
				1047	u = 0;
				1048	if (u > m_width_m1)
				1049	u = m_width_m1;
				1050	const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
				1051	return src[u];
				1052	}
				1053	uint32_t get_pixel32() {
				1054	int u = m_s >> 16;
				1055	m_s += m_ds;
				1056	if (u < 0)
				1057	u = 0;
				1058	if (u > m_width_m1)
				1059	u = m_width_m1;
				1060	const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
				1061	return src[u];
				1062	}
				1063	protected:
				1064	void init(const context_t* c, int shift);
				1065	GGLfixed m_s;
				1066	GGLfixed m_ds;
				1067	int m_width_m1;
				1068	const uint8_t* m_data;
				1069	};
				1070
				1071	void horz_clamp_iterator::init(const context_t* c, int shift)
				1072	{
				1073	const int xs = c->iterators.xl;
				1074	const texture_t& tx = c->state.texture[0];
				1075	const texture_iterators_t& ti = tx.iterators;
				1076	m_s = (xs * ti.dsdx) + ti.ydsdy;
				1077	m_ds = ti.dsdx;
				1078	m_width_m1 = tx.surface.width-1;
				1079	m_data = tx.surface.data;
				1080
				1081	GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
				1082	int v = t >> 16;
				1083	if (v < 0)
				1084	v = 0;
				1085	else if (v >= (int)tx.surface.height)
				1086	v = (int)tx.surface.height-1;
				1087
				1088	m_data += (tx.surface.stride*v) << shift;
				1089	}
				1090
				1091	struct horz_clamp_iterator16 : horz_clamp_iterator {
				1092	horz_clamp_iterator16(const context_t* c) {
				1093	init(c,1);
				1094	};
				1095	};
				1096
				1097	struct horz_clamp_iterator32 : horz_clamp_iterator {
				1098	horz_clamp_iterator32(context_t* c) {
				1099	init(c,2);
				1100	};
				1101	};
				1102
				1103	/* This is used to perform dithering operations.
				1104	*/
				1105	struct ditherer {
				1106	ditherer(const context_t* c) {
				1107	const int x = c->iterators.xl;
				1108	const int y = c->iterators.y;
				1109	m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
				1110	m_index = x & GGL_DITHER_MASK;
				1111	}
				1112	void step(void) {
				1113	m_index++;
				1114	}
				1115	int get_value(void) {
				1116	int ret = m_line[m_index & GGL_DITHER_MASK];
				1117	m_index++;
				1118	return ret;
				1119	}
				1120	uint16_t abgr8888ToRgb565(uint32_t s) {
				1121	uint32_t r = s & 0xff;
				1122	uint32_t g = (s >> 8) & 0xff;
				1123	uint32_t b = (s >> 16) & 0xff;
				1124	return rgb888ToRgb565(r,g,b);
				1125	}
				1126	/* The following assumes that r/g/b are in the 0..255 range each */
				1127	uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
				1128	int threshold = get_value();
				1129	/* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
				1130	r += (threshold >> (GGL_DITHER_BITS-8 +5));
				1131	g += (threshold >> (GGL_DITHER_BITS-8 +6));
				1132	b += (threshold >> (GGL_DITHER_BITS-8 +5));
				1133	if (r > 0xff)
				1134	r = 0xff;
				1135	if (g > 0xff)
				1136	g = 0xff;
				1137	if (b > 0xff)
				1138	b = 0xff;
				1139	return uint16_t(((r & 0xf8) << 8) \| ((g & 0xfc) << 3) \| (b >> 3));
				1140	}
				1141	protected:
				1142	const uint8_t* m_line;
				1143	int m_index;
				1144	};
				1145
				1146	/* This structure is used to blend (SRC_OVER) 32-bit source pixels
				1147	* onto 16-bit destination ones. Usage is simply:
				1148	*
				1149	* blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
				1150	*/
				1151	struct blender_32to16 {
				1152	blender_32to16(context_t* c) { }
				1153	void write(uint32_t s, uint16_t* dst) {
				1154	if (s == 0)
				1155	return;
				1156	s = GGL_RGBA_TO_HOST(s);
				1157	int sA = (s>>24);
				1158	if (sA == 0xff) {
				1159	*dst = convertAbgr8888ToRgb565(s);
				1160	} else {
				1161	int f = 0x100 - (sA + (sA>>7));
				1162	int sR = (s >> ( 3))&0x1F;
				1163	int sG = (s >> ( 8+2))&0x3F;
				1164	int sB = (s >> (16+3))&0x1F;
				1165	uint16_t d = *dst;
				1166	int dR = (d>>11)&0x1f;
				1167	int dG = (d>>5)&0x3f;
				1168	int dB = (d)&0x1f;
				1169	sR += (f*dR)>>8;
				1170	sG += (f*dG)>>8;
				1171	sB += (f*dB)>>8;
				1172	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1173	}
				1174	}
				1175	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1176	if (s == 0) {
				1177	di.step();
				1178	return;
				1179	}
				1180	s = GGL_RGBA_TO_HOST(s);
				1181	int sA = (s>>24);
				1182	if (sA == 0xff) {
				1183	*dst = di.abgr8888ToRgb565(s);
				1184	} else {
				1185	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1186	int f = 0x100 - (sA + (sA>>7));
				1187	int sR = (s >> ( 3))&0x1F;
				1188	int sG = (s >> ( 8+2))&0x3F;
				1189	int sB = (s >> (16+3))&0x1F;
				1190	uint16_t d = *dst;
				1191	int dR = (d>>11)&0x1f;
				1192	int dG = (d>>5)&0x3f;
				1193	int dB = (d)&0x1f;
				1194	sR = ((sR << 8) + f*dR + threshold)>>8;
				1195	sG = ((sG << 8) + f*dG + threshold)>>8;
				1196	sB = ((sB << 8) + f*dB + threshold)>>8;
				1197	if (sR > 0x1f) sR = 0x1f;
				1198	if (sG > 0x3f) sG = 0x3f;
				1199	if (sB > 0x1f) sB = 0x1f;
				1200	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1201	}
				1202	}
				1203	};
				1204
				1205	/* This blender does the same for the 'blend_srca' operation.
				1206	* where dstFactor=srcA*(1-srcA) srcFactor=srcA
				1207	*/
				1208	struct blender_32to16_srcA {
				1209	blender_32to16_srcA(const context_t* c) { }
				1210	void write(uint32_t s, uint16_t* dst) {
				1211	if (!s) {
				1212	return;
				1213	}
				1214	uint16_t d = *dst;
				1215	s = GGL_RGBA_TO_HOST(s);
				1216	int sR = (s >> ( 3))&0x1F;
				1217	int sG = (s >> ( 8+2))&0x3F;
				1218	int sB = (s >> (16+3))&0x1F;
				1219	int sA = (s>>24);
				1220	int f1 = (sA + (sA>>7));
				1221	int f2 = 0x100-f1;
				1222	int dR = (d>>11)&0x1f;
				1223	int dG = (d>>5)&0x3f;
				1224	int dB = (d)&0x1f;
				1225	sR = (f1sR + f2dR)>>8;
				1226	sG = (f1sG + f2dG)>>8;
				1227	sB = (f1sB + f2dB)>>8;
				1228	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1229	}
				1230	};
				1231
				1232	/* Common init code the modulating blenders */
				1233	struct blender_modulate {
				1234	void init(const context_t* c) {
				1235	const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
				1236	const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
				1237	const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
				1238	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				1239	m_r = r + (r >> 7);
				1240	m_g = g + (g >> 7);
				1241	m_b = b + (b >> 7);
				1242	m_a = a + (a >> 7);
				1243	}
				1244	protected:
				1245	int m_r, m_g, m_b, m_a;
				1246	};
				1247
				1248	/* This blender does a normal blend after modulation.
				1249	*/
				1250	struct blender_32to16_modulate : blender_modulate {
				1251	blender_32to16_modulate(const context_t* c) {
				1252	init(c);
				1253	}
				1254	void write(uint32_t s, uint16_t* dst) {
				1255	// blend source and destination
				1256	if (!s) {
				1257	return;
				1258	}
				1259	s = GGL_RGBA_TO_HOST(s);
				1260
				1261	/* We need to modulate s */
				1262	uint32_t sA = (s >> 24);
				1263	uint32_t sB = (s >> 16) & 0xff;
				1264	uint32_t sG = (s >> 8) & 0xff;
				1265	uint32_t sR = s & 0xff;
				1266
				1267	sA = (sA*m_a) >> 8;
				1268	/* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1269	sR = (sR*m_r) >> (8 - 5);
				1270	sG = (sG*m_g) >> (8 - 6);
				1271	sB = (sB*m_b) >> (8 - 5);
				1272
				1273	/* Now do a normal blend */
				1274	int f = 0x100 - (sA + (sA>>7));
				1275	uint16_t d = *dst;
				1276	int dR = (d>>11)&0x1f;
				1277	int dG = (d>>5)&0x3f;
				1278	int dB = (d)&0x1f;
				1279	sR = (sR + f*dR)>>8;
				1280	sG = (sG + f*dG)>>8;
				1281	sB = (sB + f*dB)>>8;
				1282	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1283	}
				1284	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1285	// blend source and destination
				1286	if (!s) {
				1287	di.step();
				1288	return;
				1289	}
				1290	s = GGL_RGBA_TO_HOST(s);
				1291
				1292	/* We need to modulate s */
				1293	uint32_t sA = (s >> 24);
				1294	uint32_t sB = (s >> 16) & 0xff;
				1295	uint32_t sG = (s >> 8) & 0xff;
				1296	uint32_t sR = s & 0xff;
				1297
				1298	sA = (sA*m_a) >> 8;
				1299	/* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1300	sR = (sR*m_r) >> (8 - 5);
				1301	sG = (sG*m_g) >> (8 - 6);
				1302	sB = (sB*m_b) >> (8 - 5);
				1303
				1304	/* Scale threshold to 0.8 fixed float format */
				1305	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1306	int f = 0x100 - (sA + (sA>>7));
				1307	uint16_t d = *dst;
				1308	int dR = (d>>11)&0x1f;
				1309	int dG = (d>>5)&0x3f;
				1310	int dB = (d)&0x1f;
				1311	sR = (sR + f*dR + threshold)>>8;
				1312	sG = (sG + f*dG + threshold)>>8;
				1313	sB = (sB + f*dB + threshold)>>8;
				1314	if (sR > 0x1f) sR = 0x1f;
				1315	if (sG > 0x3f) sG = 0x3f;
				1316	if (sB > 0x1f) sB = 0x1f;
				1317	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1318	}
				1319	};
				1320
				1321	/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
				1322	struct blender_x32to16_modulate : blender_modulate {
				1323	blender_x32to16_modulate(const context_t* c) {
				1324	init(c);
				1325	}
				1326	void write(uint32_t s, uint16_t* dst) {
				1327	s = GGL_RGBA_TO_HOST(s);
				1328
				1329	uint32_t sB = (s >> 16) & 0xff;
				1330	uint32_t sG = (s >> 8) & 0xff;
				1331	uint32_t sR = s & 0xff;
				1332
				1333	/* Keep R/G/B in 5.8 or 6.8 format */
				1334	sR = (sR*m_r) >> (8 - 5);
				1335	sG = (sG*m_g) >> (8 - 6);
				1336	sB = (sB*m_b) >> (8 - 5);
				1337
				1338	int f = 0x100 - m_a;
				1339	uint16_t d = *dst;
				1340	int dR = (d>>11)&0x1f;
				1341	int dG = (d>>5)&0x3f;
				1342	int dB = (d)&0x1f;
				1343	sR = (sR + f*dR)>>8;
				1344	sG = (sG + f*dG)>>8;
				1345	sB = (sB + f*dB)>>8;
				1346	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1347	}
				1348	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1349	s = GGL_RGBA_TO_HOST(s);
				1350
				1351	uint32_t sB = (s >> 16) & 0xff;
				1352	uint32_t sG = (s >> 8) & 0xff;
				1353	uint32_t sR = s & 0xff;
				1354
				1355	sR = (sR*m_r) >> (8 - 5);
				1356	sG = (sG*m_g) >> (8 - 6);
				1357	sB = (sB*m_b) >> (8 - 5);
				1358
				1359	/* Now do a normal blend */
				1360	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1361	int f = 0x100 - m_a;
				1362	uint16_t d = *dst;
				1363	int dR = (d>>11)&0x1f;
				1364	int dG = (d>>5)&0x3f;
				1365	int dB = (d)&0x1f;
				1366	sR = (sR + f*dR + threshold)>>8;
				1367	sG = (sG + f*dG + threshold)>>8;
				1368	sB = (sB + f*dB + threshold)>>8;
				1369	if (sR > 0x1f) sR = 0x1f;
				1370	if (sG > 0x3f) sG = 0x3f;
				1371	if (sB > 0x1f) sB = 0x1f;
				1372	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1373	}
				1374	};
				1375
				1376	/* Same as above, but source is 16bit rgb565 */
				1377	struct blender_16to16_modulate : blender_modulate {
				1378	blender_16to16_modulate(const context_t* c) {
				1379	init(c);
				1380	}
				1381	void write(uint16_t s16, uint16_t* dst) {
				1382	uint32_t s = s16;
				1383
				1384	uint32_t sR = s >> 11;
				1385	uint32_t sG = (s >> 5) & 0x3f;
				1386	uint32_t sB = s & 0x1f;
				1387
				1388	sR = (sR*m_r);
				1389	sG = (sG*m_g);
				1390	sB = (sB*m_b);
				1391
				1392	int f = 0x100 - m_a;
				1393	uint16_t d = *dst;
				1394	int dR = (d>>11)&0x1f;
				1395	int dG = (d>>5)&0x3f;
				1396	int dB = (d)&0x1f;
				1397	sR = (sR + f*dR)>>8;
				1398	sG = (sG + f*dG)>>8;
				1399	sB = (sB + f*dB)>>8;
				1400	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1401	}
				1402	};
				1403
				1404	/* This is used to iterate over a 16-bit destination color buffer.
				1405	* Usage is:
				1406	*
				1407	* dst_iterator16 di(context);
				1408	* while (di.count--) {
				1409	* <do stuff with dest pixel at di.dst>
				1410	* di.dst++;
				1411	* }
				1412	*/
				1413	struct dst_iterator16 {
				1414	dst_iterator16(const context_t* c) {
				1415	const int x = c->iterators.xl;
				1416	const int width = c->iterators.xr - x;
				1417	const int32_t y = c->iterators.y;
				1418	const surface_t* cb = &(c->state.buffers.color);
				1419	count = width;
				1420	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1421	}
				1422	int count;
				1423	uint16_t* dst;
				1424	};
				1425
				1426
				1427	static void scanline_t32cb16_clamp(context_t* c)
				1428	{
				1429	dst_iterator16 di(c);
				1430
				1431	if (is_context_horizontal(c)) {
				1432	/* Special case for simple horizontal scaling */
				1433	horz_clamp_iterator32 ci(c);
				1434	while (di.count--) {
				1435	uint32_t s = ci.get_pixel32();
				1436	*di.dst++ = convertAbgr8888ToRgb565(s);
				1437	}
				1438	} else {
				1439	/* General case */
				1440	clamp_iterator ci(c);
				1441	while (di.count--) {
				1442	uint32_t s = ci.get_pixel32();
				1443	*di.dst++ = convertAbgr8888ToRgb565(s);
				1444	}
				1445	}
				1446	}
				1447
				1448	static void scanline_t32cb16_dither(context_t* c)
				1449	{
				1450	horz_iterator32 si(c);
				1451	dst_iterator16 di(c);
				1452	ditherer dither(c);
				1453
				1454	while (di.count--) {
				1455	uint32_t s = si.get_pixel32();
				1456	*di.dst++ = dither.abgr8888ToRgb565(s);
				1457	}
				1458	}
				1459
				1460	static void scanline_t32cb16_clamp_dither(context_t* c)
				1461	{
				1462	dst_iterator16 di(c);
				1463	ditherer dither(c);
				1464
				1465	if (is_context_horizontal(c)) {
				1466	/* Special case for simple horizontal scaling */
				1467	horz_clamp_iterator32 ci(c);
				1468	while (di.count--) {
				1469	uint32_t s = ci.get_pixel32();
				1470	*di.dst++ = dither.abgr8888ToRgb565(s);
				1471	}
				1472	} else {
				1473	/* General case */
				1474	clamp_iterator ci(c);
				1475	while (di.count--) {
				1476	uint32_t s = ci.get_pixel32();
				1477	*di.dst++ = dither.abgr8888ToRgb565(s);
				1478	}
				1479	}
				1480	}
				1481
				1482	static void scanline_t32cb16blend_dither(context_t* c)
				1483	{
				1484	dst_iterator16 di(c);
				1485	ditherer dither(c);
				1486	blender_32to16 bl(c);
				1487	horz_iterator32 hi(c);
				1488	while (di.count--) {
				1489	uint32_t s = hi.get_pixel32();
				1490	bl.write(s, di.dst, dither);
				1491	di.dst++;
				1492	}
				1493	}
				1494
				1495	static void scanline_t32cb16blend_clamp(context_t* c)
				1496	{
				1497	dst_iterator16 di(c);
				1498	blender_32to16 bl(c);
				1499
				1500	if (is_context_horizontal(c)) {
				1501	horz_clamp_iterator32 ci(c);
				1502	while (di.count--) {
				1503	uint32_t s = ci.get_pixel32();
				1504	bl.write(s, di.dst);
				1505	di.dst++;
				1506	}
				1507	} else {
				1508	clamp_iterator ci(c);
				1509	while (di.count--) {
				1510	uint32_t s = ci.get_pixel32();
				1511	bl.write(s, di.dst);
				1512	di.dst++;
				1513	}
				1514	}
				1515	}
				1516
				1517	static void scanline_t32cb16blend_clamp_dither(context_t* c)
				1518	{
				1519	dst_iterator16 di(c);
				1520	ditherer dither(c);
				1521	blender_32to16 bl(c);
				1522
				1523	clamp_iterator ci(c);
				1524	while (di.count--) {
				1525	uint32_t s = ci.get_pixel32();
				1526	bl.write(s, di.dst, dither);
				1527	di.dst++;
				1528	}
				1529	}
				1530
				1531	void scanline_t32cb16blend_clamp_mod(context_t* c)
				1532	{
				1533	dst_iterator16 di(c);
				1534	blender_32to16_modulate bl(c);
				1535
				1536	clamp_iterator ci(c);
				1537	while (di.count--) {
				1538	uint32_t s = ci.get_pixel32();
				1539	bl.write(s, di.dst);
				1540	di.dst++;
				1541	}
				1542	}
				1543
				1544	void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
				1545	{
				1546	dst_iterator16 di(c);
				1547	blender_32to16_modulate bl(c);
				1548	ditherer dither(c);
				1549
				1550	clamp_iterator ci(c);
				1551	while (di.count--) {
				1552	uint32_t s = ci.get_pixel32();
				1553	bl.write(s, di.dst, dither);
				1554	di.dst++;
				1555	}
				1556	}
				1557
				1558	/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
				1559	void scanline_x32cb16blend_clamp_mod(context_t* c)
				1560	{
				1561	dst_iterator16 di(c);
				1562	blender_x32to16_modulate bl(c);
				1563
				1564	clamp_iterator ci(c);
				1565	while (di.count--) {
				1566	uint32_t s = ci.get_pixel32();
				1567	bl.write(s, di.dst);
				1568	di.dst++;
				1569	}
				1570	}
				1571
				1572	void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
				1573	{
				1574	dst_iterator16 di(c);
				1575	blender_x32to16_modulate bl(c);
				1576	ditherer dither(c);
				1577
				1578	clamp_iterator ci(c);
				1579	while (di.count--) {
				1580	uint32_t s = ci.get_pixel32();
				1581	bl.write(s, di.dst, dither);
				1582	di.dst++;
				1583	}
				1584	}
				1585
				1586	void scanline_t16cb16_clamp(context_t* c)
				1587	{
				1588	dst_iterator16 di(c);
				1589
				1590	/* Special case for simple horizontal scaling */
				1591	if (is_context_horizontal(c)) {
				1592	horz_clamp_iterator16 ci(c);
				1593	while (di.count--) {
				1594	*di.dst++ = ci.get_pixel16();
				1595	}
				1596	} else {
				1597	clamp_iterator ci(c);
				1598	while (di.count--) {
				1599	*di.dst++ = ci.get_pixel16();
				1600	}
				1601	}
				1602	}
				1603
				1604
				1605
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1606	template <typename T, typename U>
				1607	static inline __attribute__((const))
				1608	T interpolate(int y, T v0, U dvdx, U dvdy) {
				1609	// interpolates in pixel's centers
				1610	// v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
				1611	return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
				1612	}
				1613
				1614	// ----------------------------------------------------------------------------
				1615	#if 0
				1616	#pragma mark -
				1617	#endif
				1618
				1619	void init_y(context_t* c, int32_t ys)
				1620	{
				1621	const uint32_t enables = c->state.enables;
				1622
				1623	// compute iterators...
				1624	iterators_t& ci = c->iterators;
				1625
				1626	// sample in the center
				1627	ci.y = ys;
				1628
				1629	if (enables & (GGL_ENABLE_DEPTH_TEST\|GGL_ENABLE_W\|GGL_ENABLE_FOG)) {
				1630	ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
				1631	ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
				1632	ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
				1633	}
				1634
				1635	if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
				1636	ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
				1637	ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
				1638	ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
				1639	ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
				1640	c->step_y = step_y__smooth;
				1641	} else {
				1642	ci.ydrdy = c->shade.r0;
				1643	ci.ydgdy = c->shade.g0;
				1644	ci.ydbdy = c->shade.b0;
				1645	ci.ydady = c->shade.a0;
				1646	// XXX: do only if needed, or make sure this is fast
				1647	c->packed = ggl_pack_color(c, c->state.buffers.color.format,
				1648	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1649	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1650	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1651	}
				1652
				1653	// initialize the variables we need in the shader
				1654	generated_vars_t& gen = c->generated_vars;
				1655	gen.argb[GGLFormat::ALPHA].c = ci.ydady;
				1656	gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
				1657	gen.argb[GGLFormat::RED ].c = ci.ydrdy;
				1658	gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
				1659	gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
				1660	gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
				1661	gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
				1662	gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
				1663	gen.dzdx = c->shade.dzdx;
				1664	gen.f = ci.ydfdy;
				1665	gen.dfdx = c->shade.dfdx;
				1666
				1667	if (enables & GGL_ENABLE_TMUS) {
				1668	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1669	texture_t& t = c->state.texture[i];
				1670	if (!t.enable) continue;
				1671
				1672	texture_iterators_t& ti = t.iterators;
				1673	if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
				1674	// we need to set all of these to 0 because in some cases
				1675	// step_y__generic() or step_y__tmu() will be used and
				1676	// therefore will update dtdy, however, in 1:1 mode
				1677	// this is always done by the scanline rasterizer.
				1678	ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
				1679	ti.ydsdy = t.shade.is0;
				1680	ti.ydtdy = t.shade.it0;
				1681	} else {
				1682	const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
				1683	const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
				1684	ti.sscale = t.shade.sscale + adjustSWrap;
				1685	ti.tscale = t.shade.tscale + adjustTWrap;
				1686	if (!(enables & GGL_ENABLE_W)) {
				1687	// S coordinate
				1688	const int32_t sscale = ti.sscale;
				1689	const int32_t sy = interpolate(ys,
				1690	t.shade.is0, t.shade.idsdx, t.shade.idsdy);
				1691	if (sscale>=0) {
				1692	ti.ydsdy= sy << sscale;
				1693	ti.dsdx = t.shade.idsdx << sscale;
				1694	ti.dsdy = t.shade.idsdy << sscale;
				1695	} else {
				1696	ti.ydsdy= sy >> -sscale;
				1697	ti.dsdx = t.shade.idsdx >> -sscale;
				1698	ti.dsdy = t.shade.idsdy >> -sscale;
				1699	}
				1700	// T coordinate
				1701	const int32_t tscale = ti.tscale;
				1702	const int32_t ty = interpolate(ys,
				1703	t.shade.it0, t.shade.idtdx, t.shade.idtdy);
				1704	if (tscale>=0) {
				1705	ti.ydtdy= ty << tscale;
				1706	ti.dtdx = t.shade.idtdx << tscale;
				1707	ti.dtdy = t.shade.idtdy << tscale;
				1708	} else {
				1709	ti.ydtdy= ty >> -tscale;
				1710	ti.dtdx = t.shade.idtdx >> -tscale;
				1711	ti.dtdy = t.shade.idtdy >> -tscale;
				1712	}
				1713	}
				1714	}
				1715	// mirror for generated code...
				1716	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1717	gen.width = t.surface.width;
				1718	gen.height = t.surface.height;
				1719	gen.stride = t.surface.stride;
				1720	gen.data = int32_t(t.surface.data);
				1721	gen.dsdx = ti.dsdx;
				1722	gen.dtdx = ti.dtdx;
				1723	}
				1724	}
				1725
				1726	// choose the y-stepper
				1727	c->step_y = step_y__nop;
				1728	if (enables & GGL_ENABLE_FOG) {
				1729	c->step_y = step_y__generic;
				1730	} else if (enables & GGL_ENABLE_TMUS) {
				1731	if (enables & GGL_ENABLE_SMOOTH) {
				1732	c->step_y = step_y__generic;
				1733	} else if (enables & GGL_ENABLE_W) {
				1734	c->step_y = step_y__w;
				1735	} else {
				1736	c->step_y = step_y__tmu;
				1737	}
				1738	} else {
				1739	if (enables & GGL_ENABLE_SMOOTH) {
				1740	c->step_y = step_y__smooth;
				1741	}
				1742	}
				1743
				1744	// choose the rectangle blitter
				1745	c->rect = rect_generic;
				1746	if ((c->step_y == step_y__nop) &&
				1747	(c->scanline == scanline_memcpy))
				1748	{
				1749	c->rect = rect_memcpy;
				1750	}
				1751	}
				1752
				1753	void init_y_packed(context_t* c, int32_t y0)
				1754	{
				1755	uint8_t f = c->state.buffers.color.format;
				1756	c->packed = ggl_pack_color(c, f,
				1757	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	1758	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1759	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1760	c->iterators.y = y0;
				1761	c->step_y = step_y__nop;
				1762	// choose the rectangle blitter
				1763	c->rect = rect_generic;
				1764	if (c->scanline == scanline_memcpy) {
				1765	c->rect = rect_memcpy;
				1766	}
				1767	}
				1768
				1769	void init_y_noop(context_t* c, int32_t y0)
				1770	{
				1771	c->iterators.y = y0;
				1772	c->step_y = step_y__nop;
				1773	// choose the rectangle blitter
				1774	c->rect = rect_generic;
				1775	if (c->scanline == scanline_memcpy) {
				1776	c->rect = rect_memcpy;
				1777	}
				1778	}
				1779
				1780	void init_y_error(context_t* c, int32_t y0)
				1781	{
				1782	// woooops, shoud never happen,
				1783	// fail gracefully (don't display anything)
				1784	init_y_noop(c, y0);
Steve Block	8aeb6e2	2012-01-06 14:13:42 +0000	[diff] [blame]	1785	ALOGE("color-buffer has an invalid format!");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1786	}
				1787
				1788	// ----------------------------------------------------------------------------
				1789	#if 0
				1790	#pragma mark -
				1791	#endif
				1792
				1793	void step_y__generic(context_t* c)
				1794	{
				1795	const uint32_t enables = c->state.enables;
				1796
				1797	// iterate...
				1798	iterators_t& ci = c->iterators;
				1799	ci.y += 1;
				1800
				1801	if (enables & GGL_ENABLE_SMOOTH) {
				1802	ci.ydrdy += c->shade.drdy;
				1803	ci.ydgdy += c->shade.dgdy;
				1804	ci.ydbdy += c->shade.dbdy;
				1805	ci.ydady += c->shade.dady;
				1806	}
				1807
				1808	const uint32_t mask =
				1809	GGL_ENABLE_DEPTH_TEST \|
				1810	GGL_ENABLE_W \|
				1811	GGL_ENABLE_FOG;
				1812	if (enables & mask) {
				1813	ci.ydzdy += c->shade.dzdy;
				1814	ci.ydwdy += c->shade.dwdy;
				1815	ci.ydfdy += c->shade.dfdy;
				1816	}
				1817
				1818	if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
				1819	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1820	if (c->state.texture[i].enable) {
				1821	texture_iterators_t& ti = c->state.texture[i].iterators;
				1822	ti.ydsdy += ti.dsdy;
				1823	ti.ydtdy += ti.dtdy;
				1824	}
				1825	}
				1826	}
				1827	}
				1828
				1829	void step_y__nop(context_t* c)
				1830	{
				1831	c->iterators.y += 1;
				1832	c->iterators.ydzdy += c->shade.dzdy;
				1833	}
				1834
				1835	void step_y__smooth(context_t* c)
				1836	{
				1837	iterators_t& ci = c->iterators;
				1838	ci.y += 1;
				1839	ci.ydrdy += c->shade.drdy;
				1840	ci.ydgdy += c->shade.dgdy;
				1841	ci.ydbdy += c->shade.dbdy;
				1842	ci.ydady += c->shade.dady;
				1843	ci.ydzdy += c->shade.dzdy;
				1844	}
				1845
				1846	void step_y__w(context_t* c)
				1847	{
				1848	iterators_t& ci = c->iterators;
				1849	ci.y += 1;
				1850	ci.ydzdy += c->shade.dzdy;
				1851	ci.ydwdy += c->shade.dwdy;
				1852	}
				1853
				1854	void step_y__tmu(context_t* c)
				1855	{
				1856	iterators_t& ci = c->iterators;
				1857	ci.y += 1;
				1858	ci.ydzdy += c->shade.dzdy;
				1859	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1860	if (c->state.texture[i].enable) {
				1861	texture_iterators_t& ti = c->state.texture[i].iterators;
				1862	ti.ydsdy += ti.dsdy;
				1863	ti.ydtdy += ti.dtdy;
				1864	}
				1865	}
				1866	}
				1867
				1868	// ----------------------------------------------------------------------------
				1869	#if 0
				1870	#pragma mark -
				1871	#endif
				1872
				1873	void scanline_perspective(context_t* c)
				1874	{
				1875	struct {
				1876	union {
				1877	struct {
				1878	int32_t s, sq;
				1879	int32_t t, tq;
				1880	};
				1881	struct {
				1882	int32_t v, q;
				1883	} st[2];
				1884	};
				1885	} tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
				1886
				1887	// XXX: we should have a special case when dwdx = 0
				1888
				1889	// 32 pixels spans works okay. 16 is a lot better,
				1890	// but hey, it's a software renderer...
				1891	const uint32_t SPAN_BITS = 5;
				1892	const uint32_t ys = c->iterators.y;
				1893	const uint32_t xs = c->iterators.xl;
				1894	const uint32_t x1 = c->iterators.xr;
				1895	const uint32_t xc = x1 - xs;
				1896	uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
				1897	uint32_t numSpans = xc >> SPAN_BITS;
				1898
				1899	const iterators_t& ci = c->iterators;
				1900	int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
				1901	int32_t q0 = gglRecipQ(w0, 30);
				1902	const int iwscale = 32 - gglClz(q0);
				1903
				1904	const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
				1905	int32_t xl = c->iterators.xl;
				1906
				1907	// We process s & t with a loop to reduce the code size
				1908	// (and i-cache pressure).
				1909
				1910	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1911	const texture_t& tmu = c->state.texture[i];
				1912	if (!tmu.enable) continue;
				1913	int32_t s = tmu.shade.is0 +
				1914	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1915	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1916	int32_t t = tmu.shade.it0 +
				1917	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1918	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				1919	tc[i].s = s;
				1920	tc[i].t = t;
				1921	tc[i].sq = gglMulx(s, q0, iwscale);
				1922	tc[i].tq = gglMulx(t, q0, iwscale);
				1923	}
				1924
				1925	int32_t span = 0;
				1926	do {
				1927	int32_t w1;
				1928	if (ggl_likely(numSpans)) {
				1929	w1 = w0 + dwdx;
				1930	} else {
				1931	if (remainder) {
				1932	// finish off the scanline...
				1933	span = remainder;
				1934	w1 = (c->shade.dwdx * span) + w0;
				1935	} else {
				1936	break;
				1937	}
				1938	}
				1939	int32_t q1 = gglRecipQ(w1, 30);
				1940	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1941	texture_t& tmu = c->state.texture[i];
				1942	if (!tmu.enable) continue;
				1943	texture_iterators_t& ti = tmu.iterators;
				1944
				1945	for (int j=0 ; j<2 ; j++) {
				1946	int32_t v = tc[i].st[j].v;
				1947	if (span) v += (tmu.shade.st[j].dx)*span;
				1948	else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
				1949	const int32_t v0 = tc[i].st[j].q;
				1950	const int32_t v1 = gglMulx(v, q1, iwscale);
				1951	int32_t dvdx = v1 - v0;
				1952	if (span) dvdx /= span;
				1953	else dvdx >>= SPAN_BITS;
				1954	tc[i].st[j].v = v;
				1955	tc[i].st[j].q = v1;
				1956
				1957	const int scale = ti.st[j].scale + (iwscale - 30);
				1958	if (scale >= 0) {
				1959	ti.st[j].ydvdy = v0 << scale;
				1960	ti.st[j].dvdx = dvdx << scale;
				1961	} else {
				1962	ti.st[j].ydvdy = v0 >> -scale;
				1963	ti.st[j].dvdx = dvdx >> -scale;
				1964	}
				1965	}
				1966	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1967	gen.dsdx = ti.st[0].dvdx;
				1968	gen.dtdx = ti.st[1].dvdx;
				1969	}
				1970	c->iterators.xl = xl;
				1971	c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
				1972	w0 = w1;
				1973	q0 = q1;
				1974	c->span(c);
				1975	} while(numSpans--);
				1976	}
				1977
				1978	void scanline_perspective_single(context_t* c)
				1979	{
				1980	// 32 pixels spans works okay. 16 is a lot better,
				1981	// but hey, it's a software renderer...
				1982	const uint32_t SPAN_BITS = 5;
				1983	const uint32_t ys = c->iterators.y;
				1984	const uint32_t xs = c->iterators.xl;
				1985	const uint32_t x1 = c->iterators.xr;
				1986	const uint32_t xc = x1 - xs;
				1987
				1988	const iterators_t& ci = c->iterators;
				1989	int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
				1990	int32_t iw = gglRecipQ(w, 30);
				1991	const int iwscale = 32 - gglClz(iw);
				1992
				1993	const int i = 31 - gglClz(c->state.enabled_tmu);
				1994	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1995	texture_t& tmu = c->state.texture[i];
				1996	texture_iterators_t& ti = tmu.iterators;
				1997	const int sscale = ti.sscale + (iwscale - 30);
				1998	const int tscale = ti.tscale + (iwscale - 30);
				1999	int32_t s = tmu.shade.is0 +
				2000	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				2001	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				2002	int32_t t = tmu.shade.it0 +
				2003	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				2004	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				2005	int32_t s0 = gglMulx(s, iw, iwscale);
				2006	int32_t t0 = gglMulx(t, iw, iwscale);
				2007	int32_t xl = c->iterators.xl;
				2008
				2009	int32_t sq, tq, dsdx, dtdx;
				2010	int32_t premainder = xc & ((1<<SPAN_BITS)-1);
				2011	uint32_t numSpans = xc >> SPAN_BITS;
				2012	if (c->shade.dwdx == 0) {
				2013	// XXX: we could choose to do this if the error is small enough
				2014	numSpans = 0;
				2015	premainder = xc;
				2016	goto no_perspective;
				2017	}
				2018
				2019	if (premainder) {
				2020	w += c->shade.dwdx * premainder;
				2021	iw = gglRecipQ(w, 30);
				2022	no_perspective:
				2023	s += tmu.shade.idsdx * premainder;
				2024	t += tmu.shade.idtdx * premainder;
				2025	sq = gglMulx(s, iw, iwscale);
				2026	tq = gglMulx(t, iw, iwscale);
				2027	dsdx = (sq - s0) / premainder;
				2028	dtdx = (tq - t0) / premainder;
				2029	c->iterators.xl = xl;
				2030	c->iterators.xr = xl = xl + premainder;
				2031	goto finish;
				2032	}
				2033
				2034	while (numSpans--) {
				2035	w += c->shade.dwdx << SPAN_BITS;
				2036	s += tmu.shade.idsdx << SPAN_BITS;
				2037	t += tmu.shade.idtdx << SPAN_BITS;
				2038	iw = gglRecipQ(w, 30);
				2039	sq = gglMulx(s, iw, iwscale);
				2040	tq = gglMulx(t, iw, iwscale);
				2041	dsdx = (sq - s0) >> SPAN_BITS;
				2042	dtdx = (tq - t0) >> SPAN_BITS;
				2043	c->iterators.xl = xl;
				2044	c->iterators.xr = xl = xl + (1<<SPAN_BITS);
				2045	finish:
				2046	if (sscale >= 0) {
				2047	ti.ydsdy = s0 << sscale;
				2048	ti.dsdx = dsdx << sscale;
				2049	} else {
				2050	ti.ydsdy = s0 >>-sscale;
				2051	ti.dsdx = dsdx >>-sscale;
				2052	}
				2053	if (tscale >= 0) {
				2054	ti.ydtdy = t0 << tscale;
				2055	ti.dtdx = dtdx << tscale;
				2056	} else {
				2057	ti.ydtdy = t0 >>-tscale;
				2058	ti.dtdx = dtdx >>-tscale;
				2059	}
				2060	s0 = sq;
				2061	t0 = tq;
				2062	gen.dsdx = ti.dsdx;
				2063	gen.dtdx = ti.dtdx;
				2064	c->span(c);
				2065	}
				2066	}
				2067
				2068	// ----------------------------------------------------------------------------
				2069
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	2070	void scanline_col32cb16blend(context_t* c)
				2071	{
				2072	int32_t x = c->iterators.xl;
				2073	size_t ct = c->iterators.xr - x;
				2074	int32_t y = c->iterators.y;
				2075	surface_t* cb = &(c->state.buffers.color);
				2076	union {
				2077	uint16_t* dst;
				2078	uint32_t* dst32;
				2079	};
				2080	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2081
				2082	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
				2083	#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2084	scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
				2085	#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2086	scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
				2087	#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2088	#else
				2089	uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
				2090	int sA = (s>>24);
				2091	int f = 0x100 - (sA + (sA>>7));
				2092	while (ct--) {
				2093	uint16_t d = *dst;
				2094	int dR = (d>>11)&0x1f;
				2095	int dG = (d>>5)&0x3f;
				2096	int dB = (d)&0x1f;
				2097	int sR = (s >> ( 3))&0x1F;
				2098	int sG = (s >> ( 8+2))&0x3F;
				2099	int sB = (s >> (16+3))&0x1F;
				2100	sR += (f*dR)>>8;
				2101	sG += (f*dG)>>8;
				2102	sB += (f*dB)>>8;
				2103	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				2104	}
				2105	#endif
				2106
				2107	}
				2108
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2109	void scanline_t32cb16(context_t* c)
				2110	{
				2111	int32_t x = c->iterators.xl;
				2112	size_t ct = c->iterators.xr - x;
				2113	int32_t y = c->iterators.y;
				2114	surface_t* cb = &(c->state.buffers.color);
				2115	union {
				2116	uint16_t* dst;
				2117	uint32_t* dst32;
				2118	};
				2119	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2120
				2121	surface_t* tex = &(c->state.texture[0].surface);
				2122	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2123	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2124	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2125	int sR, sG, sB;
				2126	uint32_t s, d;
				2127
				2128	if (ct==1 \|\| uint32_t(dst)&2) {
				2129	last_one:
				2130	s = GGL_RGBA_TO_HOST( *src++ );
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2131	*dst++ = convertAbgr8888ToRgb565(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2132	ct--;
				2133	}
				2134
				2135	while (ct >= 2) {
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2136	#if BYTE_ORDER == BIG_ENDIAN
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2137	s = GGL_RGBA_TO_HOST( *src++ );
				2138	d = convertAbgr8888ToRgb565_hi16(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2139
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2140	s = GGL_RGBA_TO_HOST( *src++ );
				2141	d \|= convertAbgr8888ToRgb565(s);
				2142	#else
				2143	s = GGL_RGBA_TO_HOST( *src++ );
				2144	d = convertAbgr8888ToRgb565(s);
				2145
				2146	s = GGL_RGBA_TO_HOST( *src++ );
				2147	d \|= convertAbgr8888ToRgb565(s) << 16;
				2148	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2149	*dst32++ = d;
				2150	ct -= 2;
				2151	}
				2152
				2153	if (ct > 0) {
				2154	goto last_one;
				2155	}
				2156	}
				2157
				2158	void scanline_t32cb16blend(context_t* c)
				2159	{
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	2160	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) \|\| defined(__mips)))
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2161	int32_t x = c->iterators.xl;
				2162	size_t ct = c->iterators.xr - x;
				2163	int32_t y = c->iterators.y;
				2164	surface_t* cb = &(c->state.buffers.color);
				2165	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2166
				2167	surface_t* tex = &(c->state.texture[0].surface);
				2168	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2169	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2170	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2171
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	2172	#ifdef __arm__
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2173	scanline_t32cb16blend_arm(dst, src, ct);
				2174	#else
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	2175	scanline_t32cb16blend_mips(dst, src, ct);
				2176	#endif
				2177	#else
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2178	dst_iterator16 di(c);
				2179	horz_iterator32 hi(c);
				2180	blender_32to16 bl(c);
				2181	while (di.count--) {
				2182	uint32_t s = hi.get_pixel32();
				2183	bl.write(s, di.dst);
				2184	di.dst++;
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2185	}
				2186	#endif
				2187	}
				2188
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2189	void scanline_t32cb16blend_srca(context_t* c)
				2190	{
				2191	dst_iterator16 di(c);
				2192	horz_iterator32 hi(c);
				2193	blender_32to16_srcA blender(c);
				2194
				2195	while (di.count--) {
				2196	uint32_t s = hi.get_pixel32();
				2197	blender.write(s,di.dst);
				2198	di.dst++;
				2199	}
				2200	}
				2201
				2202	void scanline_t16cb16blend_clamp_mod(context_t* c)
				2203	{
				2204	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				2205	if (a == 0) {
				2206	return;
				2207	}
				2208
				2209	if (a == 255) {
				2210	scanline_t16cb16_clamp(c);
				2211	return;
				2212	}
				2213
				2214	dst_iterator16 di(c);
				2215	blender_16to16_modulate blender(c);
				2216	clamp_iterator ci(c);
				2217
				2218	while (di.count--) {
				2219	uint16_t s = ci.get_pixel16();
				2220	blender.write(s, di.dst);
				2221	di.dst++;
				2222	}
				2223	}
				2224
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2225	void scanline_memcpy(context_t* c)
				2226	{
				2227	int32_t x = c->iterators.xl;
				2228	size_t ct = c->iterators.xr - x;
				2229	int32_t y = c->iterators.y;
				2230	surface_t* cb = &(c->state.buffers.color);
				2231	const GGLFormat* fp = &(c->formats[cb->format]);
				2232	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2233	(x + (cb->stride * y)) * fp->size;
				2234
				2235	surface_t* tex = &(c->state.texture[0].surface);
				2236	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2237	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2238	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2239	(u + (tex->stride * v)) * fp->size;
				2240
				2241	const size_t size = ct * fp->size;
				2242	memcpy(dst, src, size);
				2243	}
				2244
				2245	void scanline_memset8(context_t* c)
				2246	{
				2247	int32_t x = c->iterators.xl;
				2248	size_t ct = c->iterators.xr - x;
				2249	int32_t y = c->iterators.y;
				2250	surface_t* cb = &(c->state.buffers.color);
				2251	uint8_t* dst = reinterpret_cast<uint8_t>(cb->data) + (x+(cb->stridey));
				2252	uint32_t packed = c->packed;
				2253	memset(dst, packed, ct);
				2254	}
				2255
				2256	void scanline_memset16(context_t* c)
				2257	{
				2258	int32_t x = c->iterators.xl;
				2259	size_t ct = c->iterators.xr - x;
				2260	int32_t y = c->iterators.y;
				2261	surface_t* cb = &(c->state.buffers.color);
				2262	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2263	uint32_t packed = c->packed;
				2264	android_memset16(dst, packed, ct*2);
				2265	}
				2266
				2267	void scanline_memset32(context_t* c)
				2268	{
				2269	int32_t x = c->iterators.xl;
				2270	size_t ct = c->iterators.xr - x;
				2271	int32_t y = c->iterators.y;
				2272	surface_t* cb = &(c->state.buffers.color);
				2273	uint32_t* dst = reinterpret_cast<uint32_t>(cb->data) + (x+(cb->stridey));
				2274	uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
				2275	android_memset32(dst, packed, ct*4);
				2276	}
				2277
				2278	void scanline_clear(context_t* c)
				2279	{
				2280	int32_t x = c->iterators.xl;
				2281	size_t ct = c->iterators.xr - x;
				2282	int32_t y = c->iterators.y;
				2283	surface_t* cb = &(c->state.buffers.color);
				2284	const GGLFormat* fp = &(c->formats[cb->format]);
				2285	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2286	(x + (cb->stride * y)) * fp->size;
				2287	const size_t size = ct * fp->size;
				2288	memset(dst, 0, size);
				2289	}
				2290
				2291	void scanline_set(context_t* c)
				2292	{
				2293	int32_t x = c->iterators.xl;
				2294	size_t ct = c->iterators.xr - x;
				2295	int32_t y = c->iterators.y;
				2296	surface_t* cb = &(c->state.buffers.color);
				2297	const GGLFormat* fp = &(c->formats[cb->format]);
				2298	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2299	(x + (cb->stride * y)) * fp->size;
				2300	const size_t size = ct * fp->size;
				2301	memset(dst, 0xFF, size);
				2302	}
				2303
				2304	void scanline_noop(context_t* c)
				2305	{
				2306	}
				2307
				2308	void rect_generic(context_t* c, size_t yc)
				2309	{
				2310	do {
				2311	c->scanline(c);
				2312	c->step_y(c);
				2313	} while (--yc);
				2314	}
				2315
				2316	void rect_memcpy(context_t* c, size_t yc)
				2317	{
				2318	int32_t x = c->iterators.xl;
				2319	size_t ct = c->iterators.xr - x;
				2320	int32_t y = c->iterators.y;
				2321	surface_t* cb = &(c->state.buffers.color);
				2322	const GGLFormat* fp = &(c->formats[cb->format]);
				2323	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2324	(x + (cb->stride * y)) * fp->size;
				2325
				2326	surface_t* tex = &(c->state.texture[0].surface);
				2327	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2328	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2329	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2330	(u + (tex->stride * v)) * fp->size;
				2331
				2332	if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
				2333	memcpy(dst, src, ct * fp->size * yc);
				2334	} else {
				2335	const size_t size = ct * fp->size;
				2336	const size_t dbpr = cb->stride * fp->size;
				2337	const size_t sbpr = tex->stride * fp->size;
				2338	do {
				2339	memcpy(dst, src, size);
				2340	dst += dbpr;
				2341	src += sbpr;
				2342	} while (--yc);
				2343	}
				2344	}
				2345	// ----------------------------------------------------------------------------
				2346	}; // namespace android
				2347