Blame - libpixelflinger/scanline.cpp - AOSPA/android_system_core

blob: bc774f37e9f6beb036a5ead157d1f20789e5c5ba [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/scanline.cpp
				2	**
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	3	** Copyright 2006-2011, The Android Open Source Project
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	#define LOG_TAG "pixelflinger"
				20
				21	#include <assert.h>
				22	#include <stdlib.h>
				23	#include <stdio.h>
				24	#include <string.h>
				25
				26	#include <cutils/memory.h>
				27	#include <cutils/log.h>
				28
				29	#include "buffer.h"
				30	#include "scanline.h"
				31
				32	#include "codeflinger/CodeCache.h"
				33	#include "codeflinger/GGLAssembler.h"
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	34	#if defined(__arm__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	35	#include "codeflinger/ARMAssembler.h"
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	36	#elif defined(__aarch64__)
				37	#include "codeflinger/Aarch64Assembler.h"
				38	#elif defined(__mips__)
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	39	#include "codeflinger/MIPSAssembler.h"
				40	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	41	//#include "codeflinger/ARMAssemblerOptimizer.h"
				42
				43	// ----------------------------------------------------------------------------
				44
				45	#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
				46	#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
				47	#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
				48	#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
				49
				50	#ifdef NDEBUG
				51	# define ANDROID_RELEASE
				52	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				53	#else
				54	# define ANDROID_DEBUG
				55	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				56	#endif
				57
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	58	#if defined(__arm__) \|\| defined(__mips__) \|\| defined(__aarch64__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	59	# define ANDROID_ARM_CODEGEN 1
				60	#else
				61	# define ANDROID_ARM_CODEGEN 0
				62	#endif
				63
				64	#define DEBUG__CODEGEN_ONLY 0
				65
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	66	/* Set to 1 to dump to the log the states that need a new
				67	* code-generated scanline callback, i.e. those that don't
				68	* have a corresponding shortcut function.
				69	*/
				70	#define DEBUG_NEEDS 0
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	71
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	72	#ifdef __mips__
				73	#define ASSEMBLY_SCRATCH_SIZE 4096
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	74	#elif defined(__aarch64__)
				75	#define ASSEMBLY_SCRATCH_SIZE 8192
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	76	#else
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	77	#define ASSEMBLY_SCRATCH_SIZE 2048
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	78	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	79
				80	// ----------------------------------------------------------------------------
				81	namespace android {
				82	// ----------------------------------------------------------------------------
				83
				84	static void init_y(context_t*, int32_t);
				85	static void init_y_noop(context_t*, int32_t);
				86	static void init_y_packed(context_t*, int32_t);
				87	static void init_y_error(context_t*, int32_t);
				88
				89	static void step_y__generic(context_t* c);
				90	static void step_y__nop(context_t*);
				91	static void step_y__smooth(context_t* c);
				92	static void step_y__tmu(context_t* c);
				93	static void step_y__w(context_t* c);
				94
				95	static void scanline(context_t* c);
				96	static void scanline_perspective(context_t* c);
				97	static void scanline_perspective_single(context_t* c);
				98	static void scanline_t32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	99	static void scanline_t32cb16blend_dither(context_t* c);
				100	static void scanline_t32cb16blend_srca(context_t* c);
				101	static void scanline_t32cb16blend_clamp(context_t* c);
				102	static void scanline_t32cb16blend_clamp_dither(context_t* c);
				103	static void scanline_t32cb16blend_clamp_mod(context_t* c);
				104	static void scanline_x32cb16blend_clamp_mod(context_t* c);
				105	static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
				106	static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	107	static void scanline_t32cb16(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	108	static void scanline_t32cb16_dither(context_t* c);
				109	static void scanline_t32cb16_clamp(context_t* c);
				110	static void scanline_t32cb16_clamp_dither(context_t* c);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	111	static void scanline_col32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	112	static void scanline_t16cb16_clamp(context_t* c);
				113	static void scanline_t16cb16blend_clamp_mod(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	114	static void scanline_memcpy(context_t* c);
				115	static void scanline_memset8(context_t* c);
				116	static void scanline_memset16(context_t* c);
				117	static void scanline_memset32(context_t* c);
				118	static void scanline_noop(context_t* c);
				119	static void scanline_set(context_t* c);
				120	static void scanline_clear(context_t* c);
				121
				122	static void rect_generic(context_t* c, size_t yc);
				123	static void rect_memcpy(context_t* c, size_t yc);
				124
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	125	#if defined( __arm__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	126	extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);
				127	extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	128	extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);
				129	extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	130	#elif defined(__aarch64__)
				131	extern "C" void scanline_t32cb16blend_aarch64(uint16_t, uint32_t, size_t);
				132	extern "C" void scanline_col32cb16blend_aarch64(uint16_t *dst, uint32_t col, size_t ct);
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	133	#elif defined(__mips__)
				134	extern "C" void scanline_t32cb16blend_mips(uint16_t, uint32_t, size_t);
				135	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	136
				137	// ----------------------------------------------------------------------------
				138
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	139	static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
				140	{
				141	return uint16_t( ((pix << 8) & 0xf800) \|
				142	((pix >> 5) & 0x07e0) \|
				143	((pix >> 19) & 0x001f) );
				144	}
				145
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	146	struct shortcut_t {
				147	needs_filter_t filter;
				148	const char* desc;
				149	void (scanline)(context_t);
				150	void (init_y)(context_t, int32_t);
				151	};
				152
				153	// Keep in sync with needs
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	154
				155	/* To understand the values here, have a look at:
				156	* system/core/include/private/pixelflinger/ggl_context.h
				157	*
				158	* Especially the lines defining and using GGL_RESERVE_NEEDS
				159	*
				160	* Quick reminders:
				161	* - the last nibble of the first value is the destination buffer format.
				162	* - the last nibble of the third value is the source texture format
				163	* - formats: 4=rgb565 1=abgr8888 2=xbgr8888
				164	*
				165	* In the descriptions below:
				166	*
				167	* SRC means we copy the source pixels to the destination
				168	*
				169	* SRC_OVER means we blend the source pixels to the destination
				170	* with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
				171	* This mode is otherwise called 'blend'.
				172	*
				173	* SRCA_OVER means we blend the source pixels to the destination
				174	* with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
				175	* This mode is otherwise called 'blend_srca'
				176	*
				177	* clamp means we fetch source pixels from a texture with u/v clamping
				178	*
				179	* mod means the source pixels are modulated (multiplied) by the
				180	* a/r/g/b of the current context's color. Typically used for
				181	* fade-in / fade-out.
				182	*
				183	* dither means we dither 32 bit values to 16 bits
				184	*/
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	185	static shortcut_t shortcuts[] = {
				186	{ { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
				187	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	188	"565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	189	{ { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
				190	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	191	"565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
				192	/* same as first entry, but with dithering */
				193	{ { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
				194	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				195	"565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
				196	/* same as second entry, but with dithering */
				197	{ { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
				198	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				199	"565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
				200	/* this is used during the boot animation - CHEAT: ignore dithering */
				201	{ { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
				202	{ 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
				203	"565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
				204	/* special case for arbitrary texture coordinates (think scaling) */
				205	{ { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
				206	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				207	"565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
				208	{ { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
				209	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				210	"565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
				211	/* another case used during emulation */
				212	{ { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
				213	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				214	"565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
				215	/* and this */
				216	{ { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
				217	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				218	"565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
				219	{ { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
				220	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				221	"565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
				222	{ { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
				223	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				224	"565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
				225	{ { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
				226	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				227	"565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				228	{ { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
				229	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				230	"565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				231	{ { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
				232	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				233	"565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				234	{ { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
				235	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				236	"565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				237	{ { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
				238	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				239	"565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
				240	{ { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
				241	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				242	"565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	243	{ { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
				244	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
				245	"565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	246	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				247	{ 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
				248	"(nop) alpha test", scanline_noop, init_y_noop },
				249	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				250	{ 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
				251	"(nop) depth test", scanline_noop, init_y_noop },
				252	{ { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
				253	{ 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				254	"(nop) logic_op", scanline_noop, init_y_noop },
				255	{ { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
				256	{ 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				257	"(nop) color mask", scanline_noop, init_y_noop },
				258	{ { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
				259	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				260	"(set) logic_op", scanline_set, init_y_noop },
				261	{ { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
				262	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				263	"(clear) logic_op", scanline_clear, init_y_noop },
				264	{ { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
				265	{ 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
				266	"(clear) blending 0/0", scanline_clear, init_y_noop },
				267	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				268	{ 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
				269	"(error) invalid color-buffer format", scanline_noop, init_y_error },
				270	};
				271	static const needs_filter_t noblend1to1 = {
				272	// (disregard dithering, see below)
				273	{ 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
				274	{ 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
				275	};
				276	static const needs_filter_t fill16noblend = {
				277	{ 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
				278	{ 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
				279	};
				280
				281	// ----------------------------------------------------------------------------
				282
				283	#if ANDROID_ARM_CODEGEN
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	284
				285	#if defined(__mips__)
				286	static CodeCache gCodeCache(32 * 1024);
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	287	#elif defined(__aarch64__)
				288	static CodeCache gCodeCache(48 * 1024);
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	289	#else
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	290	static CodeCache gCodeCache(12 * 1024);
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	291	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	292
				293	class ScanlineAssembly : public Assembly {
				294	AssemblyKey<needs_t> mKey;
				295	public:
				296	ScanlineAssembly(needs_t needs, size_t size)
				297	: Assembly(size), mKey(needs) { }
				298	const AssemblyKey<needs_t>& key() const { return mKey; }
				299	};
				300	#endif
				301
				302	// ----------------------------------------------------------------------------
				303
				304	void ggl_init_scanline(context_t* c)
				305	{
				306	c->init_y = init_y;
				307	c->step_y = step_y__generic;
				308	c->scanline = scanline;
				309	}
				310
				311	void ggl_uninit_scanline(context_t* c)
				312	{
				313	if (c->state.buffers.coverage)
				314	free(c->state.buffers.coverage);
				315	#if ANDROID_ARM_CODEGEN
				316	if (c->scanline_as)
				317	c->scanline_as->decStrong(c);
				318	#endif
				319	}
				320
				321	// ----------------------------------------------------------------------------
				322
				323	static void pick_scanline(context_t* c)
				324	{
				325	#if (!defined(DEBUG__CODEGEN_ONLY) \|\| (DEBUG__CODEGEN_ONLY == 0))
				326
				327	#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
				328	c->init_y = init_y;
				329	c->step_y = step_y__generic;
				330	c->scanline = scanline;
				331	return;
				332	#endif
				333
				334	//printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
				335	// c->state.needs.n, c->state.needs.p,
				336	// c->state.needs.t[0], c->state.needs.t[1]);
				337
				338	// first handle the special case that we cannot test with a filter
				339	const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
				340	if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
				341	if (c->state.needs.match(noblend1to1)) {
				342	// this will match regardless of dithering state, since both
				343	// src and dest have the same format anyway, there is no dithering
				344	// to be done.
				345	const GGLFormat* f =
				346	&(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
				347	if ((f->components == GGL_RGB) \|\|
				348	(f->components == GGL_RGBA) \|\|
				349	(f->components == GGL_LUMINANCE) \|\|
				350	(f->components == GGL_LUMINANCE_ALPHA))
				351	{
				352	// format must have all of RGB components
				353	// (so the current color doesn't show through)
				354	c->scanline = scanline_memcpy;
				355	c->init_y = init_y_noop;
				356	return;
				357	}
				358	}
				359	}
				360
				361	if (c->state.needs.match(fill16noblend)) {
				362	c->init_y = init_y_packed;
				363	switch (c->formats[cb_format].size) {
				364	case 1: c->scanline = scanline_memset8; return;
				365	case 2: c->scanline = scanline_memset16; return;
				366	case 4: c->scanline = scanline_memset32; return;
				367	}
				368	}
				369
				370	const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
				371	for (int i=0 ; i<numFilters ; i++) {
				372	if (c->state.needs.match(shortcuts[i].filter)) {
				373	c->scanline = shortcuts[i].scanline;
				374	c->init_y = shortcuts[i].init_y;
				375	return;
				376	}
				377	}
				378
Vladimir Chtchetkine	dccddee	2011-08-29 10:02:24 -0700	[diff] [blame]	379	#if DEBUG_NEEDS
Steve Block	4163b45	2012-01-04 19:19:03 +0000	[diff] [blame]	380	ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	381	c->state.needs.n, c->state.needs.p,
				382	c->state.needs.t[0], c->state.needs.t[1]);
				383	#endif
				384
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	385	#endif // DEBUG__CODEGEN_ONLY
				386
				387	c->init_y = init_y;
				388	c->step_y = step_y__generic;
				389
				390	#if ANDROID_ARM_CODEGEN
				391	// we're going to have to generate some code...
				392	// here, generate code for our pixel pipeline
				393	const AssemblyKey<needs_t> key(c->state.needs);
				394	sp<Assembly> assembly = gCodeCache.lookup(key);
				395	if (assembly == 0) {
				396	// create a new assembly region
				397	sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
				398	ASSEMBLY_SCRATCH_SIZE);
				399	// initialize our assembler
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	400	#if defined(__arm__)
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	401	GGLAssembler assembler( new ARMAssembler(a) );
				402	//GGLAssembler assembler(
				403	// new ARMAssemblerOptimizer(new ARMAssembler(a)) );
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	404	#endif
				405	#if defined(__mips__)
				406	GGLAssembler assembler( new ArmToMipsAssembler(a) );
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	407	#elif defined(__aarch64__)
				408	GGLAssembler assembler( new ArmToAarch64Assembler(a) );
Paul Lind	2bc2b79	2012-02-01 10:54:19 -0800	[diff] [blame]	409	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	410	// generate the scanline code for the given needs
				411	int err = assembler.scanline(c->state.needs, c);
				412	if (ggl_likely(!err)) {
				413	// finally, cache this assembly
				414	err = gCodeCache.cache(a->key(), a);
				415	}
				416	if (ggl_unlikely(err)) {
Steve Block	8aeb6e2	2012-01-06 14:13:42 +0000	[diff] [blame]	417	ALOGE("error generating or caching assembly. Reverting to NOP.");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	418	c->scanline = scanline_noop;
				419	c->init_y = init_y_noop;
				420	c->step_y = step_y__nop;
				421	return;
				422	}
				423	assembly = a;
				424	}
				425
				426	// release the previous assembly
				427	if (c->scanline_as) {
				428	c->scanline_as->decStrong(c);
				429	}
				430
Steve Block	4163b45	2012-01-04 19:19:03 +0000	[diff] [blame]	431	//ALOGI("using generated pixel-pipeline");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	432	c->scanline_as = assembly.get();
				433	c->scanline_as->incStrong(c); // hold on to assembly
				434	c->scanline = (void()(context_t c))assembly->base();
				435	#else
Steve Block	4f07a1f	2012-01-05 22:25:38 +0000	[diff] [blame]	436	// ALOGW("using generic (slow) pixel-pipeline");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	437	c->scanline = scanline;
				438	#endif
				439	}
				440
				441	void ggl_pick_scanline(context_t* c)
				442	{
				443	pick_scanline(c);
				444	if ((c->state.enables & GGL_ENABLE_W) &&
				445	(c->state.enables & GGL_ENABLE_TMUS))
				446	{
				447	c->span = c->scanline;
				448	c->scanline = scanline_perspective;
				449	if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
				450	// only one TMU enabled
				451	c->scanline = scanline_perspective_single;
				452	}
				453	}
				454	}
				455
				456	// ----------------------------------------------------------------------------
				457
				458	static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
				459	static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
				460	const pixel_t* src, const pixel_t* dst);
				461	static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
				462
				463	#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				464
				465	// no need to compile the generic-pipeline, it can't be reached
				466	void scanline(context_t*)
				467	{
				468	}
				469
				470	#else
				471
				472	void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
				473	{
				474	if (su && sv) {
				475	if (su > sv) {
				476	v = ggl_expand(v, sv, su);
				477	sv = su;
				478	} else if (su < sv) {
				479	u = ggl_expand(u, su, sv);
				480	su = sv;
				481	}
				482	}
				483	}
				484
				485	void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
				486	{
				487	rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
				488	rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
				489	rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
				490	rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
				491
				492	pixel_t sf, df;
				493	blend_factor(c, &sf, c->state.blend.src, fragment, fb);
				494	blend_factor(c, &df, c->state.blend.dst, fragment, fb);
				495
				496	fragment->c[1] =
				497	gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
				498	fragment->c[2] =
				499	gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
				500	fragment->c[3] =
				501	gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
				502
				503	if (c->state.blend.alpha_separate) {
				504	blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
				505	blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
				506	}
				507
				508	fragment->c[0] =
				509	gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
				510
				511	// clamp to 1.0
				512	if (fragment->c[0] >= (1LU<<fragment->s[0]))
				513	fragment->c[0] = (1<<fragment->s[0])-1;
				514	if (fragment->c[1] >= (1LU<<fragment->s[1]))
				515	fragment->c[1] = (1<<fragment->s[1])-1;
				516	if (fragment->c[2] >= (1LU<<fragment->s[2]))
				517	fragment->c[2] = (1<<fragment->s[2])-1;
				518	if (fragment->c[3] >= (1LU<<fragment->s[3]))
				519	fragment->c[3] = (1<<fragment->s[3])-1;
				520	}
				521
				522	static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
				523	{
				524	if (!size)
				525	return def;
				526
				527	// scale to 16 bits
				528	if (size > 16) {
				529	x >>= (size - 16);
				530	} else if (size < 16) {
				531	x = ggl_expand(x, size, 16);
				532	}
				533	x += x >> 15;
				534	return x;
				535	}
				536
				537	void blend_factor(context_t* c, pixel_t* r,
				538	uint32_t factor, const pixel_t* src, const pixel_t* dst)
				539	{
				540	switch (factor) {
				541	case GGL_ZERO:
				542	r->c[1] =
				543	r->c[2] =
				544	r->c[3] =
				545	r->c[0] = 0;
				546	break;
				547	case GGL_ONE:
				548	r->c[1] =
				549	r->c[2] =
				550	r->c[3] =
				551	r->c[0] = FIXED_ONE;
				552	break;
				553	case GGL_DST_COLOR:
				554	r->c[1] = blendfactor(dst->c[1], dst->s[1]);
				555	r->c[2] = blendfactor(dst->c[2], dst->s[2]);
				556	r->c[3] = blendfactor(dst->c[3], dst->s[3]);
				557	r->c[0] = blendfactor(dst->c[0], dst->s[0]);
				558	break;
				559	case GGL_SRC_COLOR:
				560	r->c[1] = blendfactor(src->c[1], src->s[1]);
				561	r->c[2] = blendfactor(src->c[2], src->s[2]);
				562	r->c[3] = blendfactor(src->c[3], src->s[3]);
				563	r->c[0] = blendfactor(src->c[0], src->s[0]);
				564	break;
				565	case GGL_ONE_MINUS_DST_COLOR:
				566	r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
				567	r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
				568	r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
				569	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
				570	break;
				571	case GGL_ONE_MINUS_SRC_COLOR:
				572	r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
				573	r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
				574	r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
				575	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
				576	break;
				577	case GGL_SRC_ALPHA:
				578	r->c[1] =
				579	r->c[2] =
				580	r->c[3] =
				581	r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
				582	break;
				583	case GGL_ONE_MINUS_SRC_ALPHA:
				584	r->c[1] =
				585	r->c[2] =
				586	r->c[3] =
				587	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
				588	break;
				589	case GGL_DST_ALPHA:
				590	r->c[1] =
				591	r->c[2] =
				592	r->c[3] =
				593	r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				594	break;
				595	case GGL_ONE_MINUS_DST_ALPHA:
				596	r->c[1] =
				597	r->c[2] =
				598	r->c[3] =
				599	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				600	break;
				601	case GGL_SRC_ALPHA_SATURATE:
				602	// XXX: GGL_SRC_ALPHA_SATURATE
				603	break;
				604	}
				605	}
				606
				607	static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
				608	{
				609	GGLfixed d;
				610	if (tx_wrap == GGL_REPEAT) {
				611	d = (uint32_t(coord)>>16) * size;
				612	} else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
				613	const GGLfixed clamp_min = FIXED_HALF;
				614	const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
				615	if (coord < clamp_min) coord = clamp_min;
				616	if (coord > clamp_max) coord = clamp_max;
				617	d = coord;
				618	} else { // 1:1
				619	const GGLfixed clamp_min = 0;
				620	const GGLfixed clamp_max = (size << 16);
				621	if (coord < clamp_min) coord = clamp_min;
				622	if (coord > clamp_max) coord = clamp_max;
				623	d = coord;
				624	}
				625	return d;
				626	}
				627
				628	static inline
				629	GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
				630	{
				631	const int32_t end = dvdx * (len-1) + v;
				632	if (end < 0)
				633	v -= end;
				634	v &= ~(v>>31);
				635	return v;
				636	}
				637
				638	void scanline(context_t* c)
				639	{
				640	const uint32_t enables = c->state.enables;
				641	const int xs = c->iterators.xl;
				642	const int x1 = c->iterators.xr;
				643	int xc = x1 - xs;
				644	const int16_t* covPtr = c->state.buffers.coverage + xs;
				645
				646	// All iterated values are sampled at the pixel center
				647
				648	// reset iterators for that scanline...
				649	GGLcolor r, g, b, a;
				650	iterators_t& ci = c->iterators;
				651	if (enables & GGL_ENABLE_SMOOTH) {
				652	r = (xs * c->shade.drdx) + ci.ydrdy;
				653	g = (xs * c->shade.dgdx) + ci.ydgdy;
				654	b = (xs * c->shade.dbdx) + ci.ydbdy;
				655	a = (xs * c->shade.dadx) + ci.ydady;
				656	r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
				657	g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
				658	b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
				659	a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
				660	} else {
				661	r = ci.ydrdy;
				662	g = ci.ydgdy;
				663	b = ci.ydbdy;
				664	a = ci.ydady;
				665	}
				666
				667	// z iterators are 1.31
				668	GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
				669	GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
				670
				671	struct {
				672	GGLfixed s, t;
				673	} tc[GGL_TEXTURE_UNIT_COUNT];
				674	if (enables & GGL_ENABLE_TMUS) {
				675	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				676	if (c->state.texture[i].enable) {
				677	texture_iterators_t& ti = c->state.texture[i].iterators;
				678	if (enables & GGL_ENABLE_W) {
				679	tc[i].s = ti.ydsdy;
				680	tc[i].t = ti.ydtdy;
				681	} else {
				682	tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
				683	tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
				684	}
				685	}
				686	}
				687	}
				688
				689	pixel_t fragment;
				690	pixel_t texel;
				691	pixel_t fb;
				692
				693	uint32_t x = xs;
				694	uint32_t y = c->iterators.y;
				695
				696	while (xc--) {
				697
				698	{ // just a scope
				699
				700	// read color (convert to 8 bits by keeping only the integer part)
				701	fragment.s[1] = fragment.s[2] =
				702	fragment.s[3] = fragment.s[0] = 8;
				703	fragment.c[1] = r >> (GGL_COLOR_BITS-8);
				704	fragment.c[2] = g >> (GGL_COLOR_BITS-8);
				705	fragment.c[3] = b >> (GGL_COLOR_BITS-8);
				706	fragment.c[0] = a >> (GGL_COLOR_BITS-8);
				707
				708	// texturing
				709	if (enables & GGL_ENABLE_TMUS) {
				710	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				711	texture_t& tx = c->state.texture[i];
				712	if (!tx.enable)
				713	continue;
				714	texture_iterators_t& ti = tx.iterators;
				715	int32_t u, v;
				716
				717	// s-coordinate
				718	if (tx.s_coord != GGL_ONE_TO_ONE) {
				719	const int w = tx.surface.width;
				720	u = wrapping(tc[i].s, w, tx.s_wrap);
				721	tc[i].s += ti.dsdx;
				722	} else {
				723	u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
				724	}
				725
				726	// t-coordinate
				727	if (tx.t_coord != GGL_ONE_TO_ONE) {
				728	const int h = tx.surface.height;
				729	v = wrapping(tc[i].t, h, tx.t_wrap);
				730	tc[i].t += ti.dtdx;
				731	} else {
				732	v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
				733	}
				734
				735	// read texture
				736	if (tx.mag_filter == GGL_NEAREST &&
				737	tx.min_filter == GGL_NEAREST)
				738	{
				739	u >>= 16;
				740	v >>= 16;
				741	tx.surface.read(&tx.surface, c, u, v, &texel);
				742	} else {
				743	const int w = tx.surface.width;
				744	const int h = tx.surface.height;
				745	u -= FIXED_HALF;
				746	v -= FIXED_HALF;
				747	int u0 = u >> 16;
				748	int v0 = v >> 16;
				749	int u1 = u0 + 1;
				750	int v1 = v0 + 1;
				751	if (tx.s_wrap == GGL_REPEAT) {
				752	if (u0<0) u0 += w;
				753	if (u1<0) u1 += w;
				754	if (u0>=w) u0 -= w;
				755	if (u1>=w) u1 -= w;
				756	} else {
				757	if (u0<0) u0 = 0;
				758	if (u1<0) u1 = 0;
				759	if (u0>=w) u0 = w-1;
				760	if (u1>=w) u1 = w-1;
				761	}
				762	if (tx.t_wrap == GGL_REPEAT) {
				763	if (v0<0) v0 += h;
				764	if (v1<0) v1 += h;
				765	if (v0>=h) v0 -= h;
				766	if (v1>=h) v1 -= h;
				767	} else {
				768	if (v0<0) v0 = 0;
				769	if (v1<0) v1 = 0;
				770	if (v0>=h) v0 = h-1;
				771	if (v1>=h) v1 = h-1;
				772	}
				773	pixel_t texels[4];
				774	uint32_t mm[4];
				775	tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
				776	tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
				777	tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
				778	tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
				779	u = (u >> 12) & 0xF;
				780	v = (v >> 12) & 0xF;
				781	u += u>>3;
				782	v += v>>3;
				783	mm[0] = (0x10 - u) * (0x10 - v);
				784	mm[1] = (0x10 - u) * v;
				785	mm[2] = u * (0x10 - v);
				786	mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
				787	for (int j=0 ; j<4 ; j++) {
				788	texel.s[j] = texels[0].s[j];
				789	if (!texel.s[j]) continue;
				790	texel.s[j] += 8;
				791	texel.c[j] = texels[0].c[j]*mm[0] +
				792	texels[1].c[j]*mm[1] +
				793	texels[2].c[j]*mm[2] +
				794	texels[3].c[j]*mm[3] ;
				795	}
				796	}
				797
				798	// Texture environnement...
				799	for (int j=0 ; j<4 ; j++) {
				800	uint32_t& Cf = fragment.c[j];
				801	uint32_t& Ct = texel.c[j];
				802	uint8_t& sf = fragment.s[j];
				803	uint8_t& st = texel.s[j];
				804	uint32_t At = texel.c[0];
				805	uint8_t sat = texel.s[0];
				806	switch (tx.env) {
				807	case GGL_REPLACE:
				808	if (st) {
				809	Cf = Ct;
				810	sf = st;
				811	}
				812	break;
				813	case GGL_MODULATE:
				814	if (st) {
				815	uint32_t factor = Ct + (Ct>>(st-1));
				816	Cf = (Cf * factor) >> st;
				817	}
				818	break;
				819	case GGL_DECAL:
				820	if (sat) {
				821	rescale(Cf, sf, Ct, st);
				822	Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
				823	}
				824	break;
				825	case GGL_BLEND:
				826	if (st) {
				827	uint32_t Cc = tx.env_color[i];
				828	if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
				829	else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
				830	uint32_t factor = Ct + (Ct>>(st-1));
				831	Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
				832	}
				833	break;
				834	case GGL_ADD:
				835	if (st) {
				836	rescale(Cf, sf, Ct, st);
				837	Cf += Ct;
				838	}
				839	break;
				840	}
				841	}
				842	}
				843	}
				844
				845	// coverage application
				846	if (enables & GGL_ENABLE_AA) {
				847	int16_t cf = *covPtr++;
				848	fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
				849	}
				850
				851	// alpha-test
				852	if (enables & GGL_ENABLE_ALPHA_TEST) {
				853	GGLcolor ref = c->state.alpha_test.ref;
				854	GGLcolor alpha = (uint64_t(fragment.c[0]) *
				855	((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
				856	switch (c->state.alpha_test.func) {
				857	case GGL_NEVER: goto discard;
				858	case GGL_LESS: if (alpha<ref) break; goto discard;
				859	case GGL_EQUAL: if (alpha==ref) break; goto discard;
				860	case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
				861	case GGL_GREATER: if (alpha>ref) break; goto discard;
				862	case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
				863	case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
				864	}
				865	}
				866
				867	// depth test
				868	if (c->state.buffers.depth.format) {
				869	if (enables & GGL_ENABLE_DEPTH_TEST) {
				870	surface_t* cb = &(c->state.buffers.depth);
				871	uint16_t* p = (uint16_t)(cb->data)+(x+(cb->stridey));
				872	uint16_t zz = uint32_t(z)>>(16);
				873	uint16_t depth = *p;
				874	switch (c->state.depth_test.func) {
				875	case GGL_NEVER: goto discard;
				876	case GGL_LESS: if (zz<depth) break; goto discard;
				877	case GGL_EQUAL: if (zz==depth) break; goto discard;
				878	case GGL_LEQUAL: if (zz<=depth) break; goto discard;
				879	case GGL_GREATER: if (zz>depth) break; goto discard;
				880	case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
				881	case GGL_GEQUAL: if (zz>=depth) break; goto discard;
				882	}
				883	// depth buffer is not enabled, if depth-test is not enabled
				884	/*
				885	fragment.s[1] = fragment.s[2] =
				886	fragment.s[3] = fragment.s[0] = 8;
				887	fragment.c[1] =
				888	fragment.c[2] =
				889	fragment.c[3] =
				890	fragment.c[0] = 255 - (zz>>8);
				891	*/
				892	if (c->state.mask.depth) {
				893	*p = zz;
				894	}
				895	}
				896	}
				897
				898	// fog
				899	if (enables & GGL_ENABLE_FOG) {
				900	for (int i=1 ; i<=3 ; i++) {
				901	GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
				902	uint32_t& c = fragment.c[i];
				903	uint8_t& s = fragment.s[i];
				904	c = (c * 0x10000) / ((1<<s)-1);
				905	c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
				906	s = 16;
				907	}
				908	}
				909
				910	// blending
				911	if (enables & GGL_ENABLE_BLENDING) {
				912	fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
				913	fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
				914	c->state.buffers.color.read(
				915	&(c->state.buffers.color), c, x, y, &fb);
				916	blending( c, &fragment, &fb );
				917	}
				918
				919	// write
				920	c->state.buffers.color.write(
				921	&(c->state.buffers.color), c, x, y, &fragment);
				922	}
				923
				924	discard:
				925	// iterate...
				926	x += 1;
				927	if (enables & GGL_ENABLE_SMOOTH) {
				928	r += c->shade.drdx;
				929	g += c->shade.dgdx;
				930	b += c->shade.dbdx;
				931	a += c->shade.dadx;
				932	}
				933	z += c->shade.dzdx;
				934	f += c->shade.dfdx;
				935	}
				936	}
				937
				938	#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				939
				940	// ----------------------------------------------------------------------------
				941	#if 0
				942	#pragma mark -
				943	#pragma mark Scanline
				944	#endif
				945
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	946	/* Used to parse a 32-bit source texture linearly. Usage is:
				947	*
				948	* horz_iterator32 hi(context);
				949	* while (...) {
				950	* uint32_t src_pixel = hi.get_pixel32();
				951	* ...
				952	* }
				953	*
				954	* Use only for one-to-one texture mapping.
				955	*/
				956	struct horz_iterator32 {
				957	horz_iterator32(context_t* c) {
				958	const int x = c->iterators.xl;
				959	const int y = c->iterators.y;
				960	texture_t& tx = c->state.texture[0];
				961	const int32_t u = (tx.shade.is0>>16) + x;
				962	const int32_t v = (tx.shade.it0>>16) + y;
				963	m_src = reinterpret_cast<uint32_t>(tx.surface.data)+(u+(tx.surface.stridev));
				964	}
				965	uint32_t get_pixel32() {
				966	return *m_src++;
				967	}
				968	protected:
				969	uint32_t* m_src;
				970	};
				971
				972	/* A variant for 16-bit source textures. */
				973	struct horz_iterator16 {
				974	horz_iterator16(context_t* c) {
				975	const int x = c->iterators.xl;
				976	const int y = c->iterators.y;
				977	texture_t& tx = c->state.texture[0];
				978	const int32_t u = (tx.shade.is0>>16) + x;
				979	const int32_t v = (tx.shade.it0>>16) + y;
				980	m_src = reinterpret_cast<uint16_t>(tx.surface.data)+(u+(tx.surface.stridev));
				981	}
				982	uint16_t get_pixel16() {
				983	return *m_src++;
				984	}
				985	protected:
				986	uint16_t* m_src;
				987	};
				988
				989	/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
				990	* After initialization, call get_src16() or get_src32() to get the current
				991	* texture pixel value.
				992	*/
				993	struct clamp_iterator {
				994	clamp_iterator(context_t* c) {
				995	const int xs = c->iterators.xl;
				996	texture_t& tx = c->state.texture[0];
				997	texture_iterators_t& ti = tx.iterators;
				998	m_s = (xs * ti.dsdx) + ti.ydsdy;
				999	m_t = (xs * ti.dtdx) + ti.ydtdy;
				1000	m_ds = ti.dsdx;
				1001	m_dt = ti.dtdx;
				1002	m_width_m1 = tx.surface.width - 1;
				1003	m_height_m1 = tx.surface.height - 1;
				1004	m_data = tx.surface.data;
				1005	m_stride = tx.surface.stride;
				1006	}
				1007	uint16_t get_pixel16() {
				1008	int u, v;
				1009	get_uv(u, v);
				1010	uint16_t* src = reinterpret_cast<uint16_t>(m_data) + (u + (m_stridev));
				1011	return src[0];
				1012	}
				1013	uint32_t get_pixel32() {
				1014	int u, v;
				1015	get_uv(u, v);
				1016	uint32_t* src = reinterpret_cast<uint32_t>(m_data) + (u + (m_stridev));
				1017	return src[0];
				1018	}
				1019	private:
				1020	void get_uv(int& u, int& v) {
				1021	int uu = m_s >> 16;
				1022	int vv = m_t >> 16;
				1023	if (uu < 0)
				1024	uu = 0;
				1025	if (uu > m_width_m1)
				1026	uu = m_width_m1;
				1027	if (vv < 0)
				1028	vv = 0;
				1029	if (vv > m_height_m1)
				1030	vv = m_height_m1;
				1031	u = uu;
				1032	v = vv;
				1033	m_s += m_ds;
				1034	m_t += m_dt;
				1035	}
				1036
				1037	GGLfixed m_s, m_t;
				1038	GGLfixed m_ds, m_dt;
				1039	int m_width_m1, m_height_m1;
				1040	uint8_t* m_data;
				1041	int m_stride;
				1042	};
				1043
				1044	/*
				1045	* The 'horizontal clamp iterator' variant corresponds to the case where
				1046	* the 'v' coordinate doesn't change. This is useful to avoid one mult and
				1047	* extra adds / checks per pixels, if the blending/processing operation after
				1048	* this is very fast.
				1049	*/
				1050	static int is_context_horizontal(const context_t* c) {
				1051	return (c->state.texture[0].iterators.dtdx == 0);
				1052	}
				1053
				1054	struct horz_clamp_iterator {
				1055	uint16_t get_pixel16() {
				1056	int u = m_s >> 16;
				1057	m_s += m_ds;
				1058	if (u < 0)
				1059	u = 0;
				1060	if (u > m_width_m1)
				1061	u = m_width_m1;
				1062	const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
				1063	return src[u];
				1064	}
				1065	uint32_t get_pixel32() {
				1066	int u = m_s >> 16;
				1067	m_s += m_ds;
				1068	if (u < 0)
				1069	u = 0;
				1070	if (u > m_width_m1)
				1071	u = m_width_m1;
				1072	const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
				1073	return src[u];
				1074	}
				1075	protected:
				1076	void init(const context_t* c, int shift);
				1077	GGLfixed m_s;
				1078	GGLfixed m_ds;
				1079	int m_width_m1;
				1080	const uint8_t* m_data;
				1081	};
				1082
				1083	void horz_clamp_iterator::init(const context_t* c, int shift)
				1084	{
				1085	const int xs = c->iterators.xl;
				1086	const texture_t& tx = c->state.texture[0];
				1087	const texture_iterators_t& ti = tx.iterators;
				1088	m_s = (xs * ti.dsdx) + ti.ydsdy;
				1089	m_ds = ti.dsdx;
				1090	m_width_m1 = tx.surface.width-1;
				1091	m_data = tx.surface.data;
				1092
				1093	GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
				1094	int v = t >> 16;
				1095	if (v < 0)
				1096	v = 0;
				1097	else if (v >= (int)tx.surface.height)
				1098	v = (int)tx.surface.height-1;
				1099
				1100	m_data += (tx.surface.stride*v) << shift;
				1101	}
				1102
				1103	struct horz_clamp_iterator16 : horz_clamp_iterator {
				1104	horz_clamp_iterator16(const context_t* c) {
				1105	init(c,1);
				1106	};
				1107	};
				1108
				1109	struct horz_clamp_iterator32 : horz_clamp_iterator {
				1110	horz_clamp_iterator32(context_t* c) {
				1111	init(c,2);
				1112	};
				1113	};
				1114
				1115	/* This is used to perform dithering operations.
				1116	*/
				1117	struct ditherer {
				1118	ditherer(const context_t* c) {
				1119	const int x = c->iterators.xl;
				1120	const int y = c->iterators.y;
				1121	m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
				1122	m_index = x & GGL_DITHER_MASK;
				1123	}
				1124	void step(void) {
				1125	m_index++;
				1126	}
				1127	int get_value(void) {
				1128	int ret = m_line[m_index & GGL_DITHER_MASK];
				1129	m_index++;
				1130	return ret;
				1131	}
				1132	uint16_t abgr8888ToRgb565(uint32_t s) {
				1133	uint32_t r = s & 0xff;
				1134	uint32_t g = (s >> 8) & 0xff;
				1135	uint32_t b = (s >> 16) & 0xff;
				1136	return rgb888ToRgb565(r,g,b);
				1137	}
				1138	/* The following assumes that r/g/b are in the 0..255 range each */
				1139	uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
				1140	int threshold = get_value();
				1141	/* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
				1142	r += (threshold >> (GGL_DITHER_BITS-8 +5));
				1143	g += (threshold >> (GGL_DITHER_BITS-8 +6));
				1144	b += (threshold >> (GGL_DITHER_BITS-8 +5));
				1145	if (r > 0xff)
				1146	r = 0xff;
				1147	if (g > 0xff)
				1148	g = 0xff;
				1149	if (b > 0xff)
				1150	b = 0xff;
				1151	return uint16_t(((r & 0xf8) << 8) \| ((g & 0xfc) << 3) \| (b >> 3));
				1152	}
				1153	protected:
				1154	const uint8_t* m_line;
				1155	int m_index;
				1156	};
				1157
				1158	/* This structure is used to blend (SRC_OVER) 32-bit source pixels
				1159	* onto 16-bit destination ones. Usage is simply:
				1160	*
				1161	* blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
				1162	*/
				1163	struct blender_32to16 {
				1164	blender_32to16(context_t* c) { }
				1165	void write(uint32_t s, uint16_t* dst) {
				1166	if (s == 0)
				1167	return;
				1168	s = GGL_RGBA_TO_HOST(s);
				1169	int sA = (s>>24);
				1170	if (sA == 0xff) {
				1171	*dst = convertAbgr8888ToRgb565(s);
				1172	} else {
				1173	int f = 0x100 - (sA + (sA>>7));
				1174	int sR = (s >> ( 3))&0x1F;
				1175	int sG = (s >> ( 8+2))&0x3F;
				1176	int sB = (s >> (16+3))&0x1F;
				1177	uint16_t d = *dst;
				1178	int dR = (d>>11)&0x1f;
				1179	int dG = (d>>5)&0x3f;
				1180	int dB = (d)&0x1f;
				1181	sR += (f*dR)>>8;
				1182	sG += (f*dG)>>8;
				1183	sB += (f*dB)>>8;
				1184	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1185	}
				1186	}
				1187	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1188	if (s == 0) {
				1189	di.step();
				1190	return;
				1191	}
				1192	s = GGL_RGBA_TO_HOST(s);
				1193	int sA = (s>>24);
				1194	if (sA == 0xff) {
				1195	*dst = di.abgr8888ToRgb565(s);
				1196	} else {
				1197	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1198	int f = 0x100 - (sA + (sA>>7));
				1199	int sR = (s >> ( 3))&0x1F;
				1200	int sG = (s >> ( 8+2))&0x3F;
				1201	int sB = (s >> (16+3))&0x1F;
				1202	uint16_t d = *dst;
				1203	int dR = (d>>11)&0x1f;
				1204	int dG = (d>>5)&0x3f;
				1205	int dB = (d)&0x1f;
				1206	sR = ((sR << 8) + f*dR + threshold)>>8;
				1207	sG = ((sG << 8) + f*dG + threshold)>>8;
				1208	sB = ((sB << 8) + f*dB + threshold)>>8;
				1209	if (sR > 0x1f) sR = 0x1f;
				1210	if (sG > 0x3f) sG = 0x3f;
				1211	if (sB > 0x1f) sB = 0x1f;
				1212	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1213	}
				1214	}
				1215	};
				1216
				1217	/* This blender does the same for the 'blend_srca' operation.
				1218	* where dstFactor=srcA*(1-srcA) srcFactor=srcA
				1219	*/
				1220	struct blender_32to16_srcA {
				1221	blender_32to16_srcA(const context_t* c) { }
				1222	void write(uint32_t s, uint16_t* dst) {
				1223	if (!s) {
				1224	return;
				1225	}
				1226	uint16_t d = *dst;
				1227	s = GGL_RGBA_TO_HOST(s);
				1228	int sR = (s >> ( 3))&0x1F;
				1229	int sG = (s >> ( 8+2))&0x3F;
				1230	int sB = (s >> (16+3))&0x1F;
				1231	int sA = (s>>24);
				1232	int f1 = (sA + (sA>>7));
				1233	int f2 = 0x100-f1;
				1234	int dR = (d>>11)&0x1f;
				1235	int dG = (d>>5)&0x3f;
				1236	int dB = (d)&0x1f;
				1237	sR = (f1sR + f2dR)>>8;
				1238	sG = (f1sG + f2dG)>>8;
				1239	sB = (f1sB + f2dB)>>8;
				1240	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1241	}
				1242	};
				1243
				1244	/* Common init code the modulating blenders */
				1245	struct blender_modulate {
				1246	void init(const context_t* c) {
				1247	const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
				1248	const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
				1249	const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
				1250	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				1251	m_r = r + (r >> 7);
				1252	m_g = g + (g >> 7);
				1253	m_b = b + (b >> 7);
				1254	m_a = a + (a >> 7);
				1255	}
				1256	protected:
				1257	int m_r, m_g, m_b, m_a;
				1258	};
				1259
				1260	/* This blender does a normal blend after modulation.
				1261	*/
				1262	struct blender_32to16_modulate : blender_modulate {
				1263	blender_32to16_modulate(const context_t* c) {
				1264	init(c);
				1265	}
				1266	void write(uint32_t s, uint16_t* dst) {
				1267	// blend source and destination
				1268	if (!s) {
				1269	return;
				1270	}
				1271	s = GGL_RGBA_TO_HOST(s);
				1272
				1273	/* We need to modulate s */
				1274	uint32_t sA = (s >> 24);
				1275	uint32_t sB = (s >> 16) & 0xff;
				1276	uint32_t sG = (s >> 8) & 0xff;
				1277	uint32_t sR = s & 0xff;
				1278
				1279	sA = (sA*m_a) >> 8;
				1280	/* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1281	sR = (sR*m_r) >> (8 - 5);
				1282	sG = (sG*m_g) >> (8 - 6);
				1283	sB = (sB*m_b) >> (8 - 5);
				1284
				1285	/* Now do a normal blend */
				1286	int f = 0x100 - (sA + (sA>>7));
				1287	uint16_t d = *dst;
				1288	int dR = (d>>11)&0x1f;
				1289	int dG = (d>>5)&0x3f;
				1290	int dB = (d)&0x1f;
				1291	sR = (sR + f*dR)>>8;
				1292	sG = (sG + f*dG)>>8;
				1293	sB = (sB + f*dB)>>8;
				1294	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1295	}
				1296	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1297	// blend source and destination
				1298	if (!s) {
				1299	di.step();
				1300	return;
				1301	}
				1302	s = GGL_RGBA_TO_HOST(s);
				1303
				1304	/* We need to modulate s */
				1305	uint32_t sA = (s >> 24);
				1306	uint32_t sB = (s >> 16) & 0xff;
				1307	uint32_t sG = (s >> 8) & 0xff;
				1308	uint32_t sR = s & 0xff;
				1309
				1310	sA = (sA*m_a) >> 8;
				1311	/* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1312	sR = (sR*m_r) >> (8 - 5);
				1313	sG = (sG*m_g) >> (8 - 6);
				1314	sB = (sB*m_b) >> (8 - 5);
				1315
				1316	/* Scale threshold to 0.8 fixed float format */
				1317	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1318	int f = 0x100 - (sA + (sA>>7));
				1319	uint16_t d = *dst;
				1320	int dR = (d>>11)&0x1f;
				1321	int dG = (d>>5)&0x3f;
				1322	int dB = (d)&0x1f;
				1323	sR = (sR + f*dR + threshold)>>8;
				1324	sG = (sG + f*dG + threshold)>>8;
				1325	sB = (sB + f*dB + threshold)>>8;
				1326	if (sR > 0x1f) sR = 0x1f;
				1327	if (sG > 0x3f) sG = 0x3f;
				1328	if (sB > 0x1f) sB = 0x1f;
				1329	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1330	}
				1331	};
				1332
				1333	/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
				1334	struct blender_x32to16_modulate : blender_modulate {
				1335	blender_x32to16_modulate(const context_t* c) {
				1336	init(c);
				1337	}
				1338	void write(uint32_t s, uint16_t* dst) {
				1339	s = GGL_RGBA_TO_HOST(s);
				1340
				1341	uint32_t sB = (s >> 16) & 0xff;
				1342	uint32_t sG = (s >> 8) & 0xff;
				1343	uint32_t sR = s & 0xff;
				1344
				1345	/* Keep R/G/B in 5.8 or 6.8 format */
				1346	sR = (sR*m_r) >> (8 - 5);
				1347	sG = (sG*m_g) >> (8 - 6);
				1348	sB = (sB*m_b) >> (8 - 5);
				1349
				1350	int f = 0x100 - m_a;
				1351	uint16_t d = *dst;
				1352	int dR = (d>>11)&0x1f;
				1353	int dG = (d>>5)&0x3f;
				1354	int dB = (d)&0x1f;
				1355	sR = (sR + f*dR)>>8;
				1356	sG = (sG + f*dG)>>8;
				1357	sB = (sB + f*dB)>>8;
				1358	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1359	}
				1360	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1361	s = GGL_RGBA_TO_HOST(s);
				1362
				1363	uint32_t sB = (s >> 16) & 0xff;
				1364	uint32_t sG = (s >> 8) & 0xff;
				1365	uint32_t sR = s & 0xff;
				1366
				1367	sR = (sR*m_r) >> (8 - 5);
				1368	sG = (sG*m_g) >> (8 - 6);
				1369	sB = (sB*m_b) >> (8 - 5);
				1370
				1371	/* Now do a normal blend */
				1372	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1373	int f = 0x100 - m_a;
				1374	uint16_t d = *dst;
				1375	int dR = (d>>11)&0x1f;
				1376	int dG = (d>>5)&0x3f;
				1377	int dB = (d)&0x1f;
				1378	sR = (sR + f*dR + threshold)>>8;
				1379	sG = (sG + f*dG + threshold)>>8;
				1380	sB = (sB + f*dB + threshold)>>8;
				1381	if (sR > 0x1f) sR = 0x1f;
				1382	if (sG > 0x3f) sG = 0x3f;
				1383	if (sB > 0x1f) sB = 0x1f;
				1384	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1385	}
				1386	};
				1387
				1388	/* Same as above, but source is 16bit rgb565 */
				1389	struct blender_16to16_modulate : blender_modulate {
				1390	blender_16to16_modulate(const context_t* c) {
				1391	init(c);
				1392	}
				1393	void write(uint16_t s16, uint16_t* dst) {
				1394	uint32_t s = s16;
				1395
				1396	uint32_t sR = s >> 11;
				1397	uint32_t sG = (s >> 5) & 0x3f;
				1398	uint32_t sB = s & 0x1f;
				1399
				1400	sR = (sR*m_r);
				1401	sG = (sG*m_g);
				1402	sB = (sB*m_b);
				1403
				1404	int f = 0x100 - m_a;
				1405	uint16_t d = *dst;
				1406	int dR = (d>>11)&0x1f;
				1407	int dG = (d>>5)&0x3f;
				1408	int dB = (d)&0x1f;
				1409	sR = (sR + f*dR)>>8;
				1410	sG = (sG + f*dG)>>8;
				1411	sB = (sB + f*dB)>>8;
				1412	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1413	}
				1414	};
				1415
				1416	/* This is used to iterate over a 16-bit destination color buffer.
				1417	* Usage is:
				1418	*
				1419	* dst_iterator16 di(context);
				1420	* while (di.count--) {
				1421	* <do stuff with dest pixel at di.dst>
				1422	* di.dst++;
				1423	* }
				1424	*/
				1425	struct dst_iterator16 {
				1426	dst_iterator16(const context_t* c) {
				1427	const int x = c->iterators.xl;
				1428	const int width = c->iterators.xr - x;
				1429	const int32_t y = c->iterators.y;
				1430	const surface_t* cb = &(c->state.buffers.color);
				1431	count = width;
				1432	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1433	}
				1434	int count;
				1435	uint16_t* dst;
				1436	};
				1437
				1438
				1439	static void scanline_t32cb16_clamp(context_t* c)
				1440	{
				1441	dst_iterator16 di(c);
				1442
				1443	if (is_context_horizontal(c)) {
				1444	/* Special case for simple horizontal scaling */
				1445	horz_clamp_iterator32 ci(c);
				1446	while (di.count--) {
				1447	uint32_t s = ci.get_pixel32();
				1448	*di.dst++ = convertAbgr8888ToRgb565(s);
				1449	}
				1450	} else {
				1451	/* General case */
				1452	clamp_iterator ci(c);
				1453	while (di.count--) {
				1454	uint32_t s = ci.get_pixel32();
				1455	*di.dst++ = convertAbgr8888ToRgb565(s);
				1456	}
				1457	}
				1458	}
				1459
				1460	static void scanline_t32cb16_dither(context_t* c)
				1461	{
				1462	horz_iterator32 si(c);
				1463	dst_iterator16 di(c);
				1464	ditherer dither(c);
				1465
				1466	while (di.count--) {
				1467	uint32_t s = si.get_pixel32();
				1468	*di.dst++ = dither.abgr8888ToRgb565(s);
				1469	}
				1470	}
				1471
				1472	static void scanline_t32cb16_clamp_dither(context_t* c)
				1473	{
				1474	dst_iterator16 di(c);
				1475	ditherer dither(c);
				1476
				1477	if (is_context_horizontal(c)) {
				1478	/* Special case for simple horizontal scaling */
				1479	horz_clamp_iterator32 ci(c);
				1480	while (di.count--) {
				1481	uint32_t s = ci.get_pixel32();
				1482	*di.dst++ = dither.abgr8888ToRgb565(s);
				1483	}
				1484	} else {
				1485	/* General case */
				1486	clamp_iterator ci(c);
				1487	while (di.count--) {
				1488	uint32_t s = ci.get_pixel32();
				1489	*di.dst++ = dither.abgr8888ToRgb565(s);
				1490	}
				1491	}
				1492	}
				1493
				1494	static void scanline_t32cb16blend_dither(context_t* c)
				1495	{
				1496	dst_iterator16 di(c);
				1497	ditherer dither(c);
				1498	blender_32to16 bl(c);
				1499	horz_iterator32 hi(c);
				1500	while (di.count--) {
				1501	uint32_t s = hi.get_pixel32();
				1502	bl.write(s, di.dst, dither);
				1503	di.dst++;
				1504	}
				1505	}
				1506
				1507	static void scanline_t32cb16blend_clamp(context_t* c)
				1508	{
				1509	dst_iterator16 di(c);
				1510	blender_32to16 bl(c);
				1511
				1512	if (is_context_horizontal(c)) {
				1513	horz_clamp_iterator32 ci(c);
				1514	while (di.count--) {
				1515	uint32_t s = ci.get_pixel32();
				1516	bl.write(s, di.dst);
				1517	di.dst++;
				1518	}
				1519	} else {
				1520	clamp_iterator ci(c);
				1521	while (di.count--) {
				1522	uint32_t s = ci.get_pixel32();
				1523	bl.write(s, di.dst);
				1524	di.dst++;
				1525	}
				1526	}
				1527	}
				1528
				1529	static void scanline_t32cb16blend_clamp_dither(context_t* c)
				1530	{
				1531	dst_iterator16 di(c);
				1532	ditherer dither(c);
				1533	blender_32to16 bl(c);
				1534
				1535	clamp_iterator ci(c);
				1536	while (di.count--) {
				1537	uint32_t s = ci.get_pixel32();
				1538	bl.write(s, di.dst, dither);
				1539	di.dst++;
				1540	}
				1541	}
				1542
				1543	void scanline_t32cb16blend_clamp_mod(context_t* c)
				1544	{
				1545	dst_iterator16 di(c);
				1546	blender_32to16_modulate bl(c);
				1547
				1548	clamp_iterator ci(c);
				1549	while (di.count--) {
				1550	uint32_t s = ci.get_pixel32();
				1551	bl.write(s, di.dst);
				1552	di.dst++;
				1553	}
				1554	}
				1555
				1556	void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
				1557	{
				1558	dst_iterator16 di(c);
				1559	blender_32to16_modulate bl(c);
				1560	ditherer dither(c);
				1561
				1562	clamp_iterator ci(c);
				1563	while (di.count--) {
				1564	uint32_t s = ci.get_pixel32();
				1565	bl.write(s, di.dst, dither);
				1566	di.dst++;
				1567	}
				1568	}
				1569
				1570	/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
				1571	void scanline_x32cb16blend_clamp_mod(context_t* c)
				1572	{
				1573	dst_iterator16 di(c);
				1574	blender_x32to16_modulate bl(c);
				1575
				1576	clamp_iterator ci(c);
				1577	while (di.count--) {
				1578	uint32_t s = ci.get_pixel32();
				1579	bl.write(s, di.dst);
				1580	di.dst++;
				1581	}
				1582	}
				1583
				1584	void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
				1585	{
				1586	dst_iterator16 di(c);
				1587	blender_x32to16_modulate bl(c);
				1588	ditherer dither(c);
				1589
				1590	clamp_iterator ci(c);
				1591	while (di.count--) {
				1592	uint32_t s = ci.get_pixel32();
				1593	bl.write(s, di.dst, dither);
				1594	di.dst++;
				1595	}
				1596	}
				1597
				1598	void scanline_t16cb16_clamp(context_t* c)
				1599	{
				1600	dst_iterator16 di(c);
				1601
				1602	/* Special case for simple horizontal scaling */
				1603	if (is_context_horizontal(c)) {
				1604	horz_clamp_iterator16 ci(c);
				1605	while (di.count--) {
				1606	*di.dst++ = ci.get_pixel16();
				1607	}
				1608	} else {
				1609	clamp_iterator ci(c);
				1610	while (di.count--) {
				1611	*di.dst++ = ci.get_pixel16();
				1612	}
				1613	}
				1614	}
				1615
				1616
				1617
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1618	template <typename T, typename U>
				1619	static inline __attribute__((const))
				1620	T interpolate(int y, T v0, U dvdx, U dvdy) {
				1621	// interpolates in pixel's centers
				1622	// v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
				1623	return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
				1624	}
				1625
				1626	// ----------------------------------------------------------------------------
				1627	#if 0
				1628	#pragma mark -
				1629	#endif
				1630
				1631	void init_y(context_t* c, int32_t ys)
				1632	{
				1633	const uint32_t enables = c->state.enables;
				1634
				1635	// compute iterators...
				1636	iterators_t& ci = c->iterators;
				1637
				1638	// sample in the center
				1639	ci.y = ys;
				1640
				1641	if (enables & (GGL_ENABLE_DEPTH_TEST\|GGL_ENABLE_W\|GGL_ENABLE_FOG)) {
				1642	ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
				1643	ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
				1644	ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
				1645	}
				1646
				1647	if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
				1648	ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
				1649	ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
				1650	ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
				1651	ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
				1652	c->step_y = step_y__smooth;
				1653	} else {
				1654	ci.ydrdy = c->shade.r0;
				1655	ci.ydgdy = c->shade.g0;
				1656	ci.ydbdy = c->shade.b0;
				1657	ci.ydady = c->shade.a0;
				1658	// XXX: do only if needed, or make sure this is fast
				1659	c->packed = ggl_pack_color(c, c->state.buffers.color.format,
				1660	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1661	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1662	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1663	}
				1664
				1665	// initialize the variables we need in the shader
				1666	generated_vars_t& gen = c->generated_vars;
				1667	gen.argb[GGLFormat::ALPHA].c = ci.ydady;
				1668	gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
				1669	gen.argb[GGLFormat::RED ].c = ci.ydrdy;
				1670	gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
				1671	gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
				1672	gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
				1673	gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
				1674	gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
				1675	gen.dzdx = c->shade.dzdx;
				1676	gen.f = ci.ydfdy;
				1677	gen.dfdx = c->shade.dfdx;
				1678
				1679	if (enables & GGL_ENABLE_TMUS) {
				1680	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1681	texture_t& t = c->state.texture[i];
				1682	if (!t.enable) continue;
				1683
				1684	texture_iterators_t& ti = t.iterators;
				1685	if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
				1686	// we need to set all of these to 0 because in some cases
				1687	// step_y__generic() or step_y__tmu() will be used and
				1688	// therefore will update dtdy, however, in 1:1 mode
				1689	// this is always done by the scanline rasterizer.
				1690	ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
				1691	ti.ydsdy = t.shade.is0;
				1692	ti.ydtdy = t.shade.it0;
				1693	} else {
				1694	const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
				1695	const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
				1696	ti.sscale = t.shade.sscale + adjustSWrap;
				1697	ti.tscale = t.shade.tscale + adjustTWrap;
				1698	if (!(enables & GGL_ENABLE_W)) {
				1699	// S coordinate
				1700	const int32_t sscale = ti.sscale;
				1701	const int32_t sy = interpolate(ys,
				1702	t.shade.is0, t.shade.idsdx, t.shade.idsdy);
				1703	if (sscale>=0) {
				1704	ti.ydsdy= sy << sscale;
				1705	ti.dsdx = t.shade.idsdx << sscale;
				1706	ti.dsdy = t.shade.idsdy << sscale;
				1707	} else {
				1708	ti.ydsdy= sy >> -sscale;
				1709	ti.dsdx = t.shade.idsdx >> -sscale;
				1710	ti.dsdy = t.shade.idsdy >> -sscale;
				1711	}
				1712	// T coordinate
				1713	const int32_t tscale = ti.tscale;
				1714	const int32_t ty = interpolate(ys,
				1715	t.shade.it0, t.shade.idtdx, t.shade.idtdy);
				1716	if (tscale>=0) {
				1717	ti.ydtdy= ty << tscale;
				1718	ti.dtdx = t.shade.idtdx << tscale;
				1719	ti.dtdy = t.shade.idtdy << tscale;
				1720	} else {
				1721	ti.ydtdy= ty >> -tscale;
				1722	ti.dtdx = t.shade.idtdx >> -tscale;
				1723	ti.dtdy = t.shade.idtdy >> -tscale;
				1724	}
				1725	}
				1726	}
				1727	// mirror for generated code...
				1728	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1729	gen.width = t.surface.width;
				1730	gen.height = t.surface.height;
				1731	gen.stride = t.surface.stride;
Ashok Bhat	d10afb1	2013-11-14 11:13:41 +0000	[diff] [blame]	1732	gen.data = uintptr_t(t.surface.data);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1733	gen.dsdx = ti.dsdx;
				1734	gen.dtdx = ti.dtdx;
				1735	}
				1736	}
				1737
				1738	// choose the y-stepper
				1739	c->step_y = step_y__nop;
				1740	if (enables & GGL_ENABLE_FOG) {
				1741	c->step_y = step_y__generic;
				1742	} else if (enables & GGL_ENABLE_TMUS) {
				1743	if (enables & GGL_ENABLE_SMOOTH) {
				1744	c->step_y = step_y__generic;
				1745	} else if (enables & GGL_ENABLE_W) {
				1746	c->step_y = step_y__w;
				1747	} else {
				1748	c->step_y = step_y__tmu;
				1749	}
				1750	} else {
				1751	if (enables & GGL_ENABLE_SMOOTH) {
				1752	c->step_y = step_y__smooth;
				1753	}
				1754	}
				1755
				1756	// choose the rectangle blitter
				1757	c->rect = rect_generic;
				1758	if ((c->step_y == step_y__nop) &&
				1759	(c->scanline == scanline_memcpy))
				1760	{
				1761	c->rect = rect_memcpy;
				1762	}
				1763	}
				1764
				1765	void init_y_packed(context_t* c, int32_t y0)
				1766	{
				1767	uint8_t f = c->state.buffers.color.format;
				1768	c->packed = ggl_pack_color(c, f,
				1769	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	1770	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1771	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1772	c->iterators.y = y0;
				1773	c->step_y = step_y__nop;
				1774	// choose the rectangle blitter
				1775	c->rect = rect_generic;
				1776	if (c->scanline == scanline_memcpy) {
				1777	c->rect = rect_memcpy;
				1778	}
				1779	}
				1780
				1781	void init_y_noop(context_t* c, int32_t y0)
				1782	{
				1783	c->iterators.y = y0;
				1784	c->step_y = step_y__nop;
				1785	// choose the rectangle blitter
				1786	c->rect = rect_generic;
				1787	if (c->scanline == scanline_memcpy) {
				1788	c->rect = rect_memcpy;
				1789	}
				1790	}
				1791
				1792	void init_y_error(context_t* c, int32_t y0)
				1793	{
				1794	// woooops, shoud never happen,
				1795	// fail gracefully (don't display anything)
				1796	init_y_noop(c, y0);
Steve Block	8aeb6e2	2012-01-06 14:13:42 +0000	[diff] [blame]	1797	ALOGE("color-buffer has an invalid format!");
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1798	}
				1799
				1800	// ----------------------------------------------------------------------------
				1801	#if 0
				1802	#pragma mark -
				1803	#endif
				1804
				1805	void step_y__generic(context_t* c)
				1806	{
				1807	const uint32_t enables = c->state.enables;
				1808
				1809	// iterate...
				1810	iterators_t& ci = c->iterators;
				1811	ci.y += 1;
				1812
				1813	if (enables & GGL_ENABLE_SMOOTH) {
				1814	ci.ydrdy += c->shade.drdy;
				1815	ci.ydgdy += c->shade.dgdy;
				1816	ci.ydbdy += c->shade.dbdy;
				1817	ci.ydady += c->shade.dady;
				1818	}
				1819
				1820	const uint32_t mask =
				1821	GGL_ENABLE_DEPTH_TEST \|
				1822	GGL_ENABLE_W \|
				1823	GGL_ENABLE_FOG;
				1824	if (enables & mask) {
				1825	ci.ydzdy += c->shade.dzdy;
				1826	ci.ydwdy += c->shade.dwdy;
				1827	ci.ydfdy += c->shade.dfdy;
				1828	}
				1829
				1830	if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
				1831	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1832	if (c->state.texture[i].enable) {
				1833	texture_iterators_t& ti = c->state.texture[i].iterators;
				1834	ti.ydsdy += ti.dsdy;
				1835	ti.ydtdy += ti.dtdy;
				1836	}
				1837	}
				1838	}
				1839	}
				1840
				1841	void step_y__nop(context_t* c)
				1842	{
				1843	c->iterators.y += 1;
				1844	c->iterators.ydzdy += c->shade.dzdy;
				1845	}
				1846
				1847	void step_y__smooth(context_t* c)
				1848	{
				1849	iterators_t& ci = c->iterators;
				1850	ci.y += 1;
				1851	ci.ydrdy += c->shade.drdy;
				1852	ci.ydgdy += c->shade.dgdy;
				1853	ci.ydbdy += c->shade.dbdy;
				1854	ci.ydady += c->shade.dady;
				1855	ci.ydzdy += c->shade.dzdy;
				1856	}
				1857
				1858	void step_y__w(context_t* c)
				1859	{
				1860	iterators_t& ci = c->iterators;
				1861	ci.y += 1;
				1862	ci.ydzdy += c->shade.dzdy;
				1863	ci.ydwdy += c->shade.dwdy;
				1864	}
				1865
				1866	void step_y__tmu(context_t* c)
				1867	{
				1868	iterators_t& ci = c->iterators;
				1869	ci.y += 1;
				1870	ci.ydzdy += c->shade.dzdy;
				1871	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1872	if (c->state.texture[i].enable) {
				1873	texture_iterators_t& ti = c->state.texture[i].iterators;
				1874	ti.ydsdy += ti.dsdy;
				1875	ti.ydtdy += ti.dtdy;
				1876	}
				1877	}
				1878	}
				1879
				1880	// ----------------------------------------------------------------------------
				1881	#if 0
				1882	#pragma mark -
				1883	#endif
				1884
				1885	void scanline_perspective(context_t* c)
				1886	{
				1887	struct {
				1888	union {
				1889	struct {
				1890	int32_t s, sq;
				1891	int32_t t, tq;
synergy dev	cd2fe3b	2013-11-06 16:30:06 -0800	[diff] [blame]	1892	} sqtq;
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1893	struct {
				1894	int32_t v, q;
				1895	} st[2];
				1896	};
				1897	} tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
				1898
				1899	// XXX: we should have a special case when dwdx = 0
				1900
				1901	// 32 pixels spans works okay. 16 is a lot better,
				1902	// but hey, it's a software renderer...
				1903	const uint32_t SPAN_BITS = 5;
				1904	const uint32_t ys = c->iterators.y;
				1905	const uint32_t xs = c->iterators.xl;
				1906	const uint32_t x1 = c->iterators.xr;
				1907	const uint32_t xc = x1 - xs;
				1908	uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
				1909	uint32_t numSpans = xc >> SPAN_BITS;
				1910
				1911	const iterators_t& ci = c->iterators;
				1912	int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
				1913	int32_t q0 = gglRecipQ(w0, 30);
				1914	const int iwscale = 32 - gglClz(q0);
				1915
				1916	const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
				1917	int32_t xl = c->iterators.xl;
				1918
				1919	// We process s & t with a loop to reduce the code size
				1920	// (and i-cache pressure).
				1921
				1922	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1923	const texture_t& tmu = c->state.texture[i];
				1924	if (!tmu.enable) continue;
				1925	int32_t s = tmu.shade.is0 +
				1926	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1927	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1928	int32_t t = tmu.shade.it0 +
				1929	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1930	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
synergy dev	cd2fe3b	2013-11-06 16:30:06 -0800	[diff] [blame]	1931	tc[i].sqtq.s = s;
				1932	tc[i].sqtq.t = t;
				1933	tc[i].sqtq.sq = gglMulx(s, q0, iwscale);
				1934	tc[i].sqtq.tq = gglMulx(t, q0, iwscale);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1935	}
				1936
				1937	int32_t span = 0;
				1938	do {
				1939	int32_t w1;
				1940	if (ggl_likely(numSpans)) {
				1941	w1 = w0 + dwdx;
				1942	} else {
				1943	if (remainder) {
				1944	// finish off the scanline...
				1945	span = remainder;
				1946	w1 = (c->shade.dwdx * span) + w0;
				1947	} else {
				1948	break;
				1949	}
				1950	}
				1951	int32_t q1 = gglRecipQ(w1, 30);
				1952	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1953	texture_t& tmu = c->state.texture[i];
				1954	if (!tmu.enable) continue;
				1955	texture_iterators_t& ti = tmu.iterators;
				1956
				1957	for (int j=0 ; j<2 ; j++) {
				1958	int32_t v = tc[i].st[j].v;
				1959	if (span) v += (tmu.shade.st[j].dx)*span;
				1960	else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
				1961	const int32_t v0 = tc[i].st[j].q;
				1962	const int32_t v1 = gglMulx(v, q1, iwscale);
				1963	int32_t dvdx = v1 - v0;
				1964	if (span) dvdx /= span;
				1965	else dvdx >>= SPAN_BITS;
				1966	tc[i].st[j].v = v;
				1967	tc[i].st[j].q = v1;
				1968
				1969	const int scale = ti.st[j].scale + (iwscale - 30);
				1970	if (scale >= 0) {
				1971	ti.st[j].ydvdy = v0 << scale;
				1972	ti.st[j].dvdx = dvdx << scale;
				1973	} else {
				1974	ti.st[j].ydvdy = v0 >> -scale;
				1975	ti.st[j].dvdx = dvdx >> -scale;
				1976	}
				1977	}
				1978	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1979	gen.dsdx = ti.st[0].dvdx;
				1980	gen.dtdx = ti.st[1].dvdx;
				1981	}
				1982	c->iterators.xl = xl;
				1983	c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
				1984	w0 = w1;
				1985	q0 = q1;
				1986	c->span(c);
				1987	} while(numSpans--);
				1988	}
				1989
				1990	void scanline_perspective_single(context_t* c)
				1991	{
				1992	// 32 pixels spans works okay. 16 is a lot better,
				1993	// but hey, it's a software renderer...
				1994	const uint32_t SPAN_BITS = 5;
				1995	const uint32_t ys = c->iterators.y;
				1996	const uint32_t xs = c->iterators.xl;
				1997	const uint32_t x1 = c->iterators.xr;
				1998	const uint32_t xc = x1 - xs;
				1999
				2000	const iterators_t& ci = c->iterators;
				2001	int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
				2002	int32_t iw = gglRecipQ(w, 30);
				2003	const int iwscale = 32 - gglClz(iw);
				2004
				2005	const int i = 31 - gglClz(c->state.enabled_tmu);
				2006	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				2007	texture_t& tmu = c->state.texture[i];
				2008	texture_iterators_t& ti = tmu.iterators;
				2009	const int sscale = ti.sscale + (iwscale - 30);
				2010	const int tscale = ti.tscale + (iwscale - 30);
				2011	int32_t s = tmu.shade.is0 +
				2012	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				2013	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				2014	int32_t t = tmu.shade.it0 +
				2015	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				2016	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				2017	int32_t s0 = gglMulx(s, iw, iwscale);
				2018	int32_t t0 = gglMulx(t, iw, iwscale);
				2019	int32_t xl = c->iterators.xl;
				2020
				2021	int32_t sq, tq, dsdx, dtdx;
				2022	int32_t premainder = xc & ((1<<SPAN_BITS)-1);
				2023	uint32_t numSpans = xc >> SPAN_BITS;
				2024	if (c->shade.dwdx == 0) {
				2025	// XXX: we could choose to do this if the error is small enough
				2026	numSpans = 0;
				2027	premainder = xc;
				2028	goto no_perspective;
				2029	}
				2030
				2031	if (premainder) {
				2032	w += c->shade.dwdx * premainder;
				2033	iw = gglRecipQ(w, 30);
				2034	no_perspective:
				2035	s += tmu.shade.idsdx * premainder;
				2036	t += tmu.shade.idtdx * premainder;
				2037	sq = gglMulx(s, iw, iwscale);
				2038	tq = gglMulx(t, iw, iwscale);
				2039	dsdx = (sq - s0) / premainder;
				2040	dtdx = (tq - t0) / premainder;
				2041	c->iterators.xl = xl;
				2042	c->iterators.xr = xl = xl + premainder;
				2043	goto finish;
				2044	}
				2045
				2046	while (numSpans--) {
				2047	w += c->shade.dwdx << SPAN_BITS;
				2048	s += tmu.shade.idsdx << SPAN_BITS;
				2049	t += tmu.shade.idtdx << SPAN_BITS;
				2050	iw = gglRecipQ(w, 30);
				2051	sq = gglMulx(s, iw, iwscale);
				2052	tq = gglMulx(t, iw, iwscale);
				2053	dsdx = (sq - s0) >> SPAN_BITS;
				2054	dtdx = (tq - t0) >> SPAN_BITS;
				2055	c->iterators.xl = xl;
				2056	c->iterators.xr = xl = xl + (1<<SPAN_BITS);
				2057	finish:
				2058	if (sscale >= 0) {
				2059	ti.ydsdy = s0 << sscale;
				2060	ti.dsdx = dsdx << sscale;
				2061	} else {
				2062	ti.ydsdy = s0 >>-sscale;
				2063	ti.dsdx = dsdx >>-sscale;
				2064	}
				2065	if (tscale >= 0) {
				2066	ti.ydtdy = t0 << tscale;
				2067	ti.dtdx = dtdx << tscale;
				2068	} else {
				2069	ti.ydtdy = t0 >>-tscale;
				2070	ti.dtdx = dtdx >>-tscale;
				2071	}
				2072	s0 = sq;
				2073	t0 = tq;
				2074	gen.dsdx = ti.dsdx;
				2075	gen.dtdx = ti.dtdx;
				2076	c->span(c);
				2077	}
				2078	}
				2079
				2080	// ----------------------------------------------------------------------------
				2081
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	2082	void scanline_col32cb16blend(context_t* c)
				2083	{
				2084	int32_t x = c->iterators.xl;
				2085	size_t ct = c->iterators.xr - x;
				2086	int32_t y = c->iterators.y;
				2087	surface_t* cb = &(c->state.buffers.color);
				2088	union {
				2089	uint16_t* dst;
				2090	uint32_t* dst32;
				2091	};
				2092	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2093
				2094	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
				2095	#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2096	scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
				2097	#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2098	scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
				2099	#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	2100	#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__))
				2101	scanline_col32cb16blend_aarch64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	2102	#else
				2103	uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
				2104	int sA = (s>>24);
				2105	int f = 0x100 - (sA + (sA>>7));
				2106	while (ct--) {
				2107	uint16_t d = *dst;
				2108	int dR = (d>>11)&0x1f;
				2109	int dG = (d>>5)&0x3f;
				2110	int dB = (d)&0x1f;
				2111	int sR = (s >> ( 3))&0x1F;
				2112	int sG = (s >> ( 8+2))&0x3F;
				2113	int sB = (s >> (16+3))&0x1F;
				2114	sR += (f*dR)>>8;
				2115	sG += (f*dG)>>8;
				2116	sB += (f*dB)>>8;
				2117	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				2118	}
				2119	#endif
				2120
				2121	}
				2122
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2123	void scanline_t32cb16(context_t* c)
				2124	{
				2125	int32_t x = c->iterators.xl;
				2126	size_t ct = c->iterators.xr - x;
				2127	int32_t y = c->iterators.y;
				2128	surface_t* cb = &(c->state.buffers.color);
				2129	union {
				2130	uint16_t* dst;
				2131	uint32_t* dst32;
				2132	};
				2133	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2134
				2135	surface_t* tex = &(c->state.texture[0].surface);
				2136	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2137	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2138	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2139	int sR, sG, sB;
				2140	uint32_t s, d;
				2141
Ashok Bhat	d10afb1	2013-11-14 11:13:41 +0000	[diff] [blame]	2142	if (ct==1 \|\| uintptr_t(dst)&2) {
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2143	last_one:
				2144	s = GGL_RGBA_TO_HOST( *src++ );
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2145	*dst++ = convertAbgr8888ToRgb565(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2146	ct--;
				2147	}
				2148
				2149	while (ct >= 2) {
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2150	#if BYTE_ORDER == BIG_ENDIAN
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2151	s = GGL_RGBA_TO_HOST( *src++ );
				2152	d = convertAbgr8888ToRgb565_hi16(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2153
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2154	s = GGL_RGBA_TO_HOST( *src++ );
				2155	d \|= convertAbgr8888ToRgb565(s);
				2156	#else
				2157	s = GGL_RGBA_TO_HOST( *src++ );
				2158	d = convertAbgr8888ToRgb565(s);
				2159
				2160	s = GGL_RGBA_TO_HOST( *src++ );
				2161	d \|= convertAbgr8888ToRgb565(s) << 16;
				2162	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2163	*dst32++ = d;
				2164	ct -= 2;
				2165	}
				2166
				2167	if (ct > 0) {
				2168	goto last_one;
				2169	}
				2170	}
				2171
				2172	void scanline_t32cb16blend(context_t* c)
				2173	{
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	2174	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) \|\| defined(__mips__) \|\| defined(__aarch64__)))
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2175	int32_t x = c->iterators.xl;
				2176	size_t ct = c->iterators.xr - x;
				2177	int32_t y = c->iterators.y;
				2178	surface_t* cb = &(c->state.buffers.color);
				2179	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2180
				2181	surface_t* tex = &(c->state.texture[0].surface);
				2182	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2183	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2184	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2185
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	2186	#ifdef __arm__
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2187	scanline_t32cb16blend_arm(dst, src, ct);
Ashok Bhat	658f89d	2013-02-28 18:32:03 +0000	[diff] [blame]	2188	#elif defined(__aarch64__)
				2189	scanline_t32cb16blend_aarch64(dst, src, ct);
				2190	#elif defined(__mips__)
Duane Sand	068f9f3	2012-05-24 22:09:24 -0700	[diff] [blame]	2191	scanline_t32cb16blend_mips(dst, src, ct);
				2192	#endif
				2193	#else
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2194	dst_iterator16 di(c);
				2195	horz_iterator32 hi(c);
				2196	blender_32to16 bl(c);
				2197	while (di.count--) {
				2198	uint32_t s = hi.get_pixel32();
				2199	bl.write(s, di.dst);
				2200	di.dst++;
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2201	}
				2202	#endif
				2203	}
				2204
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2205	void scanline_t32cb16blend_srca(context_t* c)
				2206	{
				2207	dst_iterator16 di(c);
				2208	horz_iterator32 hi(c);
				2209	blender_32to16_srcA blender(c);
				2210
				2211	while (di.count--) {
				2212	uint32_t s = hi.get_pixel32();
				2213	blender.write(s,di.dst);
				2214	di.dst++;
				2215	}
				2216	}
				2217
				2218	void scanline_t16cb16blend_clamp_mod(context_t* c)
				2219	{
				2220	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				2221	if (a == 0) {
				2222	return;
				2223	}
				2224
				2225	if (a == 255) {
				2226	scanline_t16cb16_clamp(c);
				2227	return;
				2228	}
				2229
				2230	dst_iterator16 di(c);
				2231	blender_16to16_modulate blender(c);
				2232	clamp_iterator ci(c);
				2233
				2234	while (di.count--) {
				2235	uint16_t s = ci.get_pixel16();
				2236	blender.write(s, di.dst);
				2237	di.dst++;
				2238	}
				2239	}
				2240
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2241	void scanline_memcpy(context_t* c)
				2242	{
				2243	int32_t x = c->iterators.xl;
				2244	size_t ct = c->iterators.xr - x;
				2245	int32_t y = c->iterators.y;
				2246	surface_t* cb = &(c->state.buffers.color);
				2247	const GGLFormat* fp = &(c->formats[cb->format]);
				2248	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2249	(x + (cb->stride * y)) * fp->size;
				2250
				2251	surface_t* tex = &(c->state.texture[0].surface);
				2252	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2253	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2254	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2255	(u + (tex->stride * v)) * fp->size;
				2256
				2257	const size_t size = ct * fp->size;
				2258	memcpy(dst, src, size);
				2259	}
				2260
				2261	void scanline_memset8(context_t* c)
				2262	{
				2263	int32_t x = c->iterators.xl;
				2264	size_t ct = c->iterators.xr - x;
				2265	int32_t y = c->iterators.y;
				2266	surface_t* cb = &(c->state.buffers.color);
				2267	uint8_t* dst = reinterpret_cast<uint8_t>(cb->data) + (x+(cb->stridey));
				2268	uint32_t packed = c->packed;
				2269	memset(dst, packed, ct);
				2270	}
				2271
				2272	void scanline_memset16(context_t* c)
				2273	{
				2274	int32_t x = c->iterators.xl;
				2275	size_t ct = c->iterators.xr - x;
				2276	int32_t y = c->iterators.y;
				2277	surface_t* cb = &(c->state.buffers.color);
				2278	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2279	uint32_t packed = c->packed;
				2280	android_memset16(dst, packed, ct*2);
				2281	}
				2282
				2283	void scanline_memset32(context_t* c)
				2284	{
				2285	int32_t x = c->iterators.xl;
				2286	size_t ct = c->iterators.xr - x;
				2287	int32_t y = c->iterators.y;
				2288	surface_t* cb = &(c->state.buffers.color);
				2289	uint32_t* dst = reinterpret_cast<uint32_t>(cb->data) + (x+(cb->stridey));
				2290	uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
				2291	android_memset32(dst, packed, ct*4);
				2292	}
				2293
				2294	void scanline_clear(context_t* c)
				2295	{
				2296	int32_t x = c->iterators.xl;
				2297	size_t ct = c->iterators.xr - x;
				2298	int32_t y = c->iterators.y;
				2299	surface_t* cb = &(c->state.buffers.color);
				2300	const GGLFormat* fp = &(c->formats[cb->format]);
				2301	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2302	(x + (cb->stride * y)) * fp->size;
				2303	const size_t size = ct * fp->size;
				2304	memset(dst, 0, size);
				2305	}
				2306
				2307	void scanline_set(context_t* c)
				2308	{
				2309	int32_t x = c->iterators.xl;
				2310	size_t ct = c->iterators.xr - x;
				2311	int32_t y = c->iterators.y;
				2312	surface_t* cb = &(c->state.buffers.color);
				2313	const GGLFormat* fp = &(c->formats[cb->format]);
				2314	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2315	(x + (cb->stride * y)) * fp->size;
				2316	const size_t size = ct * fp->size;
				2317	memset(dst, 0xFF, size);
				2318	}
				2319
				2320	void scanline_noop(context_t* c)
				2321	{
				2322	}
				2323
				2324	void rect_generic(context_t* c, size_t yc)
				2325	{
				2326	do {
				2327	c->scanline(c);
				2328	c->step_y(c);
				2329	} while (--yc);
				2330	}
				2331
				2332	void rect_memcpy(context_t* c, size_t yc)
				2333	{
				2334	int32_t x = c->iterators.xl;
				2335	size_t ct = c->iterators.xr - x;
				2336	int32_t y = c->iterators.y;
				2337	surface_t* cb = &(c->state.buffers.color);
				2338	const GGLFormat* fp = &(c->formats[cb->format]);
				2339	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2340	(x + (cb->stride * y)) * fp->size;
				2341
				2342	surface_t* tex = &(c->state.texture[0].surface);
				2343	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2344	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2345	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2346	(u + (tex->stride * v)) * fp->size;
				2347
				2348	if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
				2349	memcpy(dst, src, ct * fp->size * yc);
				2350	} else {
				2351	const size_t size = ct * fp->size;
				2352	const size_t dbpr = cb->stride * fp->size;
				2353	const size_t sbpr = tex->stride * fp->size;
				2354	do {
				2355	memcpy(dst, src, size);
				2356	dst += dbpr;
				2357	src += sbpr;
				2358	} while (--yc);
				2359	}
				2360	}
				2361	// ----------------------------------------------------------------------------
				2362	}; // namespace android
				2363