blob: d1f3d9677192906297e2c6c5f5915de74126fa26 [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/scanline.cpp
2**
David 'Digit' Turner39764f42011-04-15 20:12:07 +02003** Copyright 2006-2011, The Android Open Source Project
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08004**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18
19#define LOG_TAG "pixelflinger"
20
21#include <assert.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25
26#include <cutils/memory.h>
27#include <cutils/log.h>
28
29#include "buffer.h"
30#include "scanline.h"
31
32#include "codeflinger/CodeCache.h"
33#include "codeflinger/GGLAssembler.h"
34#include "codeflinger/ARMAssembler.h"
35//#include "codeflinger/ARMAssemblerOptimizer.h"
36
37// ----------------------------------------------------------------------------
38
39#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
40#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
41#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
42#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
43
44#ifdef NDEBUG
45# define ANDROID_RELEASE
46# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
47#else
48# define ANDROID_DEBUG
49# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
50#endif
51
52#if defined(__arm__)
53# define ANDROID_ARM_CODEGEN 1
54#else
55# define ANDROID_ARM_CODEGEN 0
56#endif
57
58#define DEBUG__CODEGEN_ONLY 0
59
David 'Digit' Turner39764f42011-04-15 20:12:07 +020060/* Set to 1 to dump to the log the states that need a new
61 * code-generated scanline callback, i.e. those that don't
62 * have a corresponding shortcut function.
63 */
64#define DEBUG_NEEDS 0
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080065
66#define ASSEMBLY_SCRATCH_SIZE 2048
67
68// ----------------------------------------------------------------------------
69namespace android {
70// ----------------------------------------------------------------------------
71
72static void init_y(context_t*, int32_t);
73static void init_y_noop(context_t*, int32_t);
74static void init_y_packed(context_t*, int32_t);
75static void init_y_error(context_t*, int32_t);
76
77static void step_y__generic(context_t* c);
78static void step_y__nop(context_t*);
79static void step_y__smooth(context_t* c);
80static void step_y__tmu(context_t* c);
81static void step_y__w(context_t* c);
82
83static void scanline(context_t* c);
84static void scanline_perspective(context_t* c);
85static void scanline_perspective_single(context_t* c);
86static void scanline_t32cb16blend(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +020087static void scanline_t32cb16blend_dither(context_t* c);
88static void scanline_t32cb16blend_srca(context_t* c);
89static void scanline_t32cb16blend_clamp(context_t* c);
90static void scanline_t32cb16blend_clamp_dither(context_t* c);
91static void scanline_t32cb16blend_clamp_mod(context_t* c);
92static void scanline_x32cb16blend_clamp_mod(context_t* c);
93static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
94static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080095static void scanline_t32cb16(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +020096static void scanline_t32cb16_dither(context_t* c);
97static void scanline_t32cb16_clamp(context_t* c);
98static void scanline_t32cb16_clamp_dither(context_t* c);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +000099static void scanline_col32cb16blend(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200100static void scanline_t16cb16_clamp(context_t* c);
101static void scanline_t16cb16blend_clamp_mod(context_t* c);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800102static void scanline_memcpy(context_t* c);
103static void scanline_memset8(context_t* c);
104static void scanline_memset16(context_t* c);
105static void scanline_memset32(context_t* c);
106static void scanline_noop(context_t* c);
107static void scanline_set(context_t* c);
108static void scanline_clear(context_t* c);
109
110static void rect_generic(context_t* c, size_t yc);
111static void rect_memcpy(context_t* c, size_t yc);
112
Duane Sand068f9f32012-05-24 22:09:24 -0700113#if defined( __arm__)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800114extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t);
115extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +0000116extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct);
117extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
Duane Sand068f9f32012-05-24 22:09:24 -0700118#elif defined(__mips__)
119extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t);
120#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800121
122// ----------------------------------------------------------------------------
123
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200124static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
125{
126 return uint16_t( ((pix << 8) & 0xf800) |
127 ((pix >> 5) & 0x07e0) |
128 ((pix >> 19) & 0x001f) );
129}
130
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800131struct shortcut_t {
132 needs_filter_t filter;
133 const char* desc;
134 void (*scanline)(context_t*);
135 void (*init_y)(context_t*, int32_t);
136};
137
138// Keep in sync with needs
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200139
140/* To understand the values here, have a look at:
141 * system/core/include/private/pixelflinger/ggl_context.h
142 *
143 * Especially the lines defining and using GGL_RESERVE_NEEDS
144 *
145 * Quick reminders:
146 * - the last nibble of the first value is the destination buffer format.
147 * - the last nibble of the third value is the source texture format
148 * - formats: 4=rgb565 1=abgr8888 2=xbgr8888
149 *
150 * In the descriptions below:
151 *
152 * SRC means we copy the source pixels to the destination
153 *
154 * SRC_OVER means we blend the source pixels to the destination
155 * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
156 * This mode is otherwise called 'blend'.
157 *
158 * SRCA_OVER means we blend the source pixels to the destination
159 * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
160 * This mode is otherwise called 'blend_srca'
161 *
162 * clamp means we fetch source pixels from a texture with u/v clamping
163 *
164 * mod means the source pixels are modulated (multiplied) by the
165 * a/r/g/b of the current context's color. Typically used for
166 * fade-in / fade-out.
167 *
168 * dither means we dither 32 bit values to 16 bits
169 */
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800170static shortcut_t shortcuts[] = {
171 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
172 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200173 "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800174 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
175 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200176 "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
177 /* same as first entry, but with dithering */
178 { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
179 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
180 "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
181 /* same as second entry, but with dithering */
182 { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
183 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
184 "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
185 /* this is used during the boot animation - CHEAT: ignore dithering */
186 { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
187 { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
188 "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
189 /* special case for arbitrary texture coordinates (think scaling) */
190 { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
191 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
192 "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
193 { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
194 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
195 "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
196 /* another case used during emulation */
197 { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
198 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
199 "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
200 /* and this */
201 { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
202 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
203 "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
204 { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
205 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
206 "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
207 { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
208 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
209 "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
210 { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
211 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
212 "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
213 { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
214 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
215 "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
216 { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
217 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
218 "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
219 { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
220 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
221 "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
222 { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
223 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
224 "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
225 { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
226 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
227 "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
Martyn Capewellf9e8ab02009-12-07 15:00:19 +0000228 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
229 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
230 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800231 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
232 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
233 "(nop) alpha test", scanline_noop, init_y_noop },
234 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
235 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
236 "(nop) depth test", scanline_noop, init_y_noop },
237 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
238 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
239 "(nop) logic_op", scanline_noop, init_y_noop },
240 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
241 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
242 "(nop) color mask", scanline_noop, init_y_noop },
243 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
244 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
245 "(set) logic_op", scanline_set, init_y_noop },
246 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
247 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
248 "(clear) logic_op", scanline_clear, init_y_noop },
249 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
250 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
251 "(clear) blending 0/0", scanline_clear, init_y_noop },
252 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
253 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
254 "(error) invalid color-buffer format", scanline_noop, init_y_error },
255};
256static const needs_filter_t noblend1to1 = {
257 // (disregard dithering, see below)
258 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
259 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
260};
261static const needs_filter_t fill16noblend = {
262 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
263 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
264};
265
266// ----------------------------------------------------------------------------
267
268#if ANDROID_ARM_CODEGEN
269static CodeCache gCodeCache(12 * 1024);
270
271class ScanlineAssembly : public Assembly {
272 AssemblyKey<needs_t> mKey;
273public:
274 ScanlineAssembly(needs_t needs, size_t size)
275 : Assembly(size), mKey(needs) { }
276 const AssemblyKey<needs_t>& key() const { return mKey; }
277};
278#endif
279
280// ----------------------------------------------------------------------------
281
282void ggl_init_scanline(context_t* c)
283{
284 c->init_y = init_y;
285 c->step_y = step_y__generic;
286 c->scanline = scanline;
287}
288
289void ggl_uninit_scanline(context_t* c)
290{
291 if (c->state.buffers.coverage)
292 free(c->state.buffers.coverage);
293#if ANDROID_ARM_CODEGEN
294 if (c->scanline_as)
295 c->scanline_as->decStrong(c);
296#endif
297}
298
299// ----------------------------------------------------------------------------
300
301static void pick_scanline(context_t* c)
302{
303#if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0))
304
305#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
306 c->init_y = init_y;
307 c->step_y = step_y__generic;
308 c->scanline = scanline;
309 return;
310#endif
311
312 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
313 // c->state.needs.n, c->state.needs.p,
314 // c->state.needs.t[0], c->state.needs.t[1]);
315
316 // first handle the special case that we cannot test with a filter
317 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
318 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
319 if (c->state.needs.match(noblend1to1)) {
320 // this will match regardless of dithering state, since both
321 // src and dest have the same format anyway, there is no dithering
322 // to be done.
323 const GGLFormat* f =
324 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
325 if ((f->components == GGL_RGB) ||
326 (f->components == GGL_RGBA) ||
327 (f->components == GGL_LUMINANCE) ||
328 (f->components == GGL_LUMINANCE_ALPHA))
329 {
330 // format must have all of RGB components
331 // (so the current color doesn't show through)
332 c->scanline = scanline_memcpy;
333 c->init_y = init_y_noop;
334 return;
335 }
336 }
337 }
338
339 if (c->state.needs.match(fill16noblend)) {
340 c->init_y = init_y_packed;
341 switch (c->formats[cb_format].size) {
342 case 1: c->scanline = scanline_memset8; return;
343 case 2: c->scanline = scanline_memset16; return;
344 case 4: c->scanline = scanline_memset32; return;
345 }
346 }
347
348 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
349 for (int i=0 ; i<numFilters ; i++) {
350 if (c->state.needs.match(shortcuts[i].filter)) {
351 c->scanline = shortcuts[i].scanline;
352 c->init_y = shortcuts[i].init_y;
353 return;
354 }
355 }
356
Vladimir Chtchetkinedccddee2011-08-29 10:02:24 -0700357#if DEBUG_NEEDS
Steve Block4163b452012-01-04 19:19:03 +0000358 ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200359 c->state.needs.n, c->state.needs.p,
360 c->state.needs.t[0], c->state.needs.t[1]);
361#endif
362
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800363#endif // DEBUG__CODEGEN_ONLY
364
365 c->init_y = init_y;
366 c->step_y = step_y__generic;
367
368#if ANDROID_ARM_CODEGEN
369 // we're going to have to generate some code...
370 // here, generate code for our pixel pipeline
371 const AssemblyKey<needs_t> key(c->state.needs);
372 sp<Assembly> assembly = gCodeCache.lookup(key);
373 if (assembly == 0) {
374 // create a new assembly region
375 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
376 ASSEMBLY_SCRATCH_SIZE);
377 // initialize our assembler
378 GGLAssembler assembler( new ARMAssembler(a) );
379 //GGLAssembler assembler(
380 // new ARMAssemblerOptimizer(new ARMAssembler(a)) );
381 // generate the scanline code for the given needs
382 int err = assembler.scanline(c->state.needs, c);
383 if (ggl_likely(!err)) {
384 // finally, cache this assembly
385 err = gCodeCache.cache(a->key(), a);
386 }
387 if (ggl_unlikely(err)) {
Steve Block8aeb6e22012-01-06 14:13:42 +0000388 ALOGE("error generating or caching assembly. Reverting to NOP.");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800389 c->scanline = scanline_noop;
390 c->init_y = init_y_noop;
391 c->step_y = step_y__nop;
392 return;
393 }
394 assembly = a;
395 }
396
397 // release the previous assembly
398 if (c->scanline_as) {
399 c->scanline_as->decStrong(c);
400 }
401
Steve Block4163b452012-01-04 19:19:03 +0000402 //ALOGI("using generated pixel-pipeline");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800403 c->scanline_as = assembly.get();
404 c->scanline_as->incStrong(c); // hold on to assembly
405 c->scanline = (void(*)(context_t* c))assembly->base();
406#else
Steve Block4f07a1f2012-01-05 22:25:38 +0000407// ALOGW("using generic (slow) pixel-pipeline");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800408 c->scanline = scanline;
409#endif
410}
411
412void ggl_pick_scanline(context_t* c)
413{
414 pick_scanline(c);
415 if ((c->state.enables & GGL_ENABLE_W) &&
416 (c->state.enables & GGL_ENABLE_TMUS))
417 {
418 c->span = c->scanline;
419 c->scanline = scanline_perspective;
420 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
421 // only one TMU enabled
422 c->scanline = scanline_perspective_single;
423 }
424 }
425}
426
427// ----------------------------------------------------------------------------
428
429static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
430static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
431 const pixel_t* src, const pixel_t* dst);
432static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
433
434#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
435
436// no need to compile the generic-pipeline, it can't be reached
437void scanline(context_t*)
438{
439}
440
441#else
442
443void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
444{
445 if (su && sv) {
446 if (su > sv) {
447 v = ggl_expand(v, sv, su);
448 sv = su;
449 } else if (su < sv) {
450 u = ggl_expand(u, su, sv);
451 su = sv;
452 }
453 }
454}
455
456void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
457{
458 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
459 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
460 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
461 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
462
463 pixel_t sf, df;
464 blend_factor(c, &sf, c->state.blend.src, fragment, fb);
465 blend_factor(c, &df, c->state.blend.dst, fragment, fb);
466
467 fragment->c[1] =
468 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
469 fragment->c[2] =
470 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
471 fragment->c[3] =
472 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
473
474 if (c->state.blend.alpha_separate) {
475 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
476 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
477 }
478
479 fragment->c[0] =
480 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
481
482 // clamp to 1.0
483 if (fragment->c[0] >= (1LU<<fragment->s[0]))
484 fragment->c[0] = (1<<fragment->s[0])-1;
485 if (fragment->c[1] >= (1LU<<fragment->s[1]))
486 fragment->c[1] = (1<<fragment->s[1])-1;
487 if (fragment->c[2] >= (1LU<<fragment->s[2]))
488 fragment->c[2] = (1<<fragment->s[2])-1;
489 if (fragment->c[3] >= (1LU<<fragment->s[3]))
490 fragment->c[3] = (1<<fragment->s[3])-1;
491}
492
493static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
494{
495 if (!size)
496 return def;
497
498 // scale to 16 bits
499 if (size > 16) {
500 x >>= (size - 16);
501 } else if (size < 16) {
502 x = ggl_expand(x, size, 16);
503 }
504 x += x >> 15;
505 return x;
506}
507
508void blend_factor(context_t* c, pixel_t* r,
509 uint32_t factor, const pixel_t* src, const pixel_t* dst)
510{
511 switch (factor) {
512 case GGL_ZERO:
513 r->c[1] =
514 r->c[2] =
515 r->c[3] =
516 r->c[0] = 0;
517 break;
518 case GGL_ONE:
519 r->c[1] =
520 r->c[2] =
521 r->c[3] =
522 r->c[0] = FIXED_ONE;
523 break;
524 case GGL_DST_COLOR:
525 r->c[1] = blendfactor(dst->c[1], dst->s[1]);
526 r->c[2] = blendfactor(dst->c[2], dst->s[2]);
527 r->c[3] = blendfactor(dst->c[3], dst->s[3]);
528 r->c[0] = blendfactor(dst->c[0], dst->s[0]);
529 break;
530 case GGL_SRC_COLOR:
531 r->c[1] = blendfactor(src->c[1], src->s[1]);
532 r->c[2] = blendfactor(src->c[2], src->s[2]);
533 r->c[3] = blendfactor(src->c[3], src->s[3]);
534 r->c[0] = blendfactor(src->c[0], src->s[0]);
535 break;
536 case GGL_ONE_MINUS_DST_COLOR:
537 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
538 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
539 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
540 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
541 break;
542 case GGL_ONE_MINUS_SRC_COLOR:
543 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
544 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
545 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
546 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
547 break;
548 case GGL_SRC_ALPHA:
549 r->c[1] =
550 r->c[2] =
551 r->c[3] =
552 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
553 break;
554 case GGL_ONE_MINUS_SRC_ALPHA:
555 r->c[1] =
556 r->c[2] =
557 r->c[3] =
558 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
559 break;
560 case GGL_DST_ALPHA:
561 r->c[1] =
562 r->c[2] =
563 r->c[3] =
564 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
565 break;
566 case GGL_ONE_MINUS_DST_ALPHA:
567 r->c[1] =
568 r->c[2] =
569 r->c[3] =
570 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
571 break;
572 case GGL_SRC_ALPHA_SATURATE:
573 // XXX: GGL_SRC_ALPHA_SATURATE
574 break;
575 }
576}
577
578static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
579{
580 GGLfixed d;
581 if (tx_wrap == GGL_REPEAT) {
582 d = (uint32_t(coord)>>16) * size;
583 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
584 const GGLfixed clamp_min = FIXED_HALF;
585 const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
586 if (coord < clamp_min) coord = clamp_min;
587 if (coord > clamp_max) coord = clamp_max;
588 d = coord;
589 } else { // 1:1
590 const GGLfixed clamp_min = 0;
591 const GGLfixed clamp_max = (size << 16);
592 if (coord < clamp_min) coord = clamp_min;
593 if (coord > clamp_max) coord = clamp_max;
594 d = coord;
595 }
596 return d;
597}
598
599static inline
600GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
601{
602 const int32_t end = dvdx * (len-1) + v;
603 if (end < 0)
604 v -= end;
605 v &= ~(v>>31);
606 return v;
607}
608
609void scanline(context_t* c)
610{
611 const uint32_t enables = c->state.enables;
612 const int xs = c->iterators.xl;
613 const int x1 = c->iterators.xr;
614 int xc = x1 - xs;
615 const int16_t* covPtr = c->state.buffers.coverage + xs;
616
617 // All iterated values are sampled at the pixel center
618
619 // reset iterators for that scanline...
620 GGLcolor r, g, b, a;
621 iterators_t& ci = c->iterators;
622 if (enables & GGL_ENABLE_SMOOTH) {
623 r = (xs * c->shade.drdx) + ci.ydrdy;
624 g = (xs * c->shade.dgdx) + ci.ydgdy;
625 b = (xs * c->shade.dbdx) + ci.ydbdy;
626 a = (xs * c->shade.dadx) + ci.ydady;
627 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
628 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
629 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
630 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
631 } else {
632 r = ci.ydrdy;
633 g = ci.ydgdy;
634 b = ci.ydbdy;
635 a = ci.ydady;
636 }
637
638 // z iterators are 1.31
639 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
640 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
641
642 struct {
643 GGLfixed s, t;
644 } tc[GGL_TEXTURE_UNIT_COUNT];
645 if (enables & GGL_ENABLE_TMUS) {
646 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
647 if (c->state.texture[i].enable) {
648 texture_iterators_t& ti = c->state.texture[i].iterators;
649 if (enables & GGL_ENABLE_W) {
650 tc[i].s = ti.ydsdy;
651 tc[i].t = ti.ydtdy;
652 } else {
653 tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
654 tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
655 }
656 }
657 }
658 }
659
660 pixel_t fragment;
661 pixel_t texel;
662 pixel_t fb;
663
664 uint32_t x = xs;
665 uint32_t y = c->iterators.y;
666
667 while (xc--) {
668
669 { // just a scope
670
671 // read color (convert to 8 bits by keeping only the integer part)
672 fragment.s[1] = fragment.s[2] =
673 fragment.s[3] = fragment.s[0] = 8;
674 fragment.c[1] = r >> (GGL_COLOR_BITS-8);
675 fragment.c[2] = g >> (GGL_COLOR_BITS-8);
676 fragment.c[3] = b >> (GGL_COLOR_BITS-8);
677 fragment.c[0] = a >> (GGL_COLOR_BITS-8);
678
679 // texturing
680 if (enables & GGL_ENABLE_TMUS) {
681 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
682 texture_t& tx = c->state.texture[i];
683 if (!tx.enable)
684 continue;
685 texture_iterators_t& ti = tx.iterators;
686 int32_t u, v;
687
688 // s-coordinate
689 if (tx.s_coord != GGL_ONE_TO_ONE) {
690 const int w = tx.surface.width;
691 u = wrapping(tc[i].s, w, tx.s_wrap);
692 tc[i].s += ti.dsdx;
693 } else {
694 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
695 }
696
697 // t-coordinate
698 if (tx.t_coord != GGL_ONE_TO_ONE) {
699 const int h = tx.surface.height;
700 v = wrapping(tc[i].t, h, tx.t_wrap);
701 tc[i].t += ti.dtdx;
702 } else {
703 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
704 }
705
706 // read texture
707 if (tx.mag_filter == GGL_NEAREST &&
708 tx.min_filter == GGL_NEAREST)
709 {
710 u >>= 16;
711 v >>= 16;
712 tx.surface.read(&tx.surface, c, u, v, &texel);
713 } else {
714 const int w = tx.surface.width;
715 const int h = tx.surface.height;
716 u -= FIXED_HALF;
717 v -= FIXED_HALF;
718 int u0 = u >> 16;
719 int v0 = v >> 16;
720 int u1 = u0 + 1;
721 int v1 = v0 + 1;
722 if (tx.s_wrap == GGL_REPEAT) {
723 if (u0<0) u0 += w;
724 if (u1<0) u1 += w;
725 if (u0>=w) u0 -= w;
726 if (u1>=w) u1 -= w;
727 } else {
728 if (u0<0) u0 = 0;
729 if (u1<0) u1 = 0;
730 if (u0>=w) u0 = w-1;
731 if (u1>=w) u1 = w-1;
732 }
733 if (tx.t_wrap == GGL_REPEAT) {
734 if (v0<0) v0 += h;
735 if (v1<0) v1 += h;
736 if (v0>=h) v0 -= h;
737 if (v1>=h) v1 -= h;
738 } else {
739 if (v0<0) v0 = 0;
740 if (v1<0) v1 = 0;
741 if (v0>=h) v0 = h-1;
742 if (v1>=h) v1 = h-1;
743 }
744 pixel_t texels[4];
745 uint32_t mm[4];
746 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
747 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
748 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
749 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
750 u = (u >> 12) & 0xF;
751 v = (v >> 12) & 0xF;
752 u += u>>3;
753 v += v>>3;
754 mm[0] = (0x10 - u) * (0x10 - v);
755 mm[1] = (0x10 - u) * v;
756 mm[2] = u * (0x10 - v);
757 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
758 for (int j=0 ; j<4 ; j++) {
759 texel.s[j] = texels[0].s[j];
760 if (!texel.s[j]) continue;
761 texel.s[j] += 8;
762 texel.c[j] = texels[0].c[j]*mm[0] +
763 texels[1].c[j]*mm[1] +
764 texels[2].c[j]*mm[2] +
765 texels[3].c[j]*mm[3] ;
766 }
767 }
768
769 // Texture environnement...
770 for (int j=0 ; j<4 ; j++) {
771 uint32_t& Cf = fragment.c[j];
772 uint32_t& Ct = texel.c[j];
773 uint8_t& sf = fragment.s[j];
774 uint8_t& st = texel.s[j];
775 uint32_t At = texel.c[0];
776 uint8_t sat = texel.s[0];
777 switch (tx.env) {
778 case GGL_REPLACE:
779 if (st) {
780 Cf = Ct;
781 sf = st;
782 }
783 break;
784 case GGL_MODULATE:
785 if (st) {
786 uint32_t factor = Ct + (Ct>>(st-1));
787 Cf = (Cf * factor) >> st;
788 }
789 break;
790 case GGL_DECAL:
791 if (sat) {
792 rescale(Cf, sf, Ct, st);
793 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
794 }
795 break;
796 case GGL_BLEND:
797 if (st) {
798 uint32_t Cc = tx.env_color[i];
799 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
800 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
801 uint32_t factor = Ct + (Ct>>(st-1));
802 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
803 }
804 break;
805 case GGL_ADD:
806 if (st) {
807 rescale(Cf, sf, Ct, st);
808 Cf += Ct;
809 }
810 break;
811 }
812 }
813 }
814 }
815
816 // coverage application
817 if (enables & GGL_ENABLE_AA) {
818 int16_t cf = *covPtr++;
819 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
820 }
821
822 // alpha-test
823 if (enables & GGL_ENABLE_ALPHA_TEST) {
824 GGLcolor ref = c->state.alpha_test.ref;
825 GGLcolor alpha = (uint64_t(fragment.c[0]) *
826 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
827 switch (c->state.alpha_test.func) {
828 case GGL_NEVER: goto discard;
829 case GGL_LESS: if (alpha<ref) break; goto discard;
830 case GGL_EQUAL: if (alpha==ref) break; goto discard;
831 case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
832 case GGL_GREATER: if (alpha>ref) break; goto discard;
833 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
834 case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
835 }
836 }
837
838 // depth test
839 if (c->state.buffers.depth.format) {
840 if (enables & GGL_ENABLE_DEPTH_TEST) {
841 surface_t* cb = &(c->state.buffers.depth);
842 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y));
843 uint16_t zz = uint32_t(z)>>(16);
844 uint16_t depth = *p;
845 switch (c->state.depth_test.func) {
846 case GGL_NEVER: goto discard;
847 case GGL_LESS: if (zz<depth) break; goto discard;
848 case GGL_EQUAL: if (zz==depth) break; goto discard;
849 case GGL_LEQUAL: if (zz<=depth) break; goto discard;
850 case GGL_GREATER: if (zz>depth) break; goto discard;
851 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
852 case GGL_GEQUAL: if (zz>=depth) break; goto discard;
853 }
854 // depth buffer is not enabled, if depth-test is not enabled
855/*
856 fragment.s[1] = fragment.s[2] =
857 fragment.s[3] = fragment.s[0] = 8;
858 fragment.c[1] =
859 fragment.c[2] =
860 fragment.c[3] =
861 fragment.c[0] = 255 - (zz>>8);
862*/
863 if (c->state.mask.depth) {
864 *p = zz;
865 }
866 }
867 }
868
869 // fog
870 if (enables & GGL_ENABLE_FOG) {
871 for (int i=1 ; i<=3 ; i++) {
872 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
873 uint32_t& c = fragment.c[i];
874 uint8_t& s = fragment.s[i];
875 c = (c * 0x10000) / ((1<<s)-1);
876 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
877 s = 16;
878 }
879 }
880
881 // blending
882 if (enables & GGL_ENABLE_BLENDING) {
883 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
884 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
885 c->state.buffers.color.read(
886 &(c->state.buffers.color), c, x, y, &fb);
887 blending( c, &fragment, &fb );
888 }
889
890 // write
891 c->state.buffers.color.write(
892 &(c->state.buffers.color), c, x, y, &fragment);
893 }
894
895discard:
896 // iterate...
897 x += 1;
898 if (enables & GGL_ENABLE_SMOOTH) {
899 r += c->shade.drdx;
900 g += c->shade.dgdx;
901 b += c->shade.dbdx;
902 a += c->shade.dadx;
903 }
904 z += c->shade.dzdx;
905 f += c->shade.dfdx;
906 }
907}
908
909#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
910
911// ----------------------------------------------------------------------------
912#if 0
913#pragma mark -
914#pragma mark Scanline
915#endif
916
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200917/* Used to parse a 32-bit source texture linearly. Usage is:
918 *
919 * horz_iterator32 hi(context);
920 * while (...) {
921 * uint32_t src_pixel = hi.get_pixel32();
922 * ...
923 * }
924 *
925 * Use only for one-to-one texture mapping.
926 */
927struct horz_iterator32 {
928 horz_iterator32(context_t* c) {
929 const int x = c->iterators.xl;
930 const int y = c->iterators.y;
931 texture_t& tx = c->state.texture[0];
932 const int32_t u = (tx.shade.is0>>16) + x;
933 const int32_t v = (tx.shade.it0>>16) + y;
934 m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
935 }
936 uint32_t get_pixel32() {
937 return *m_src++;
938 }
939protected:
940 uint32_t* m_src;
941};
942
943/* A variant for 16-bit source textures. */
944struct horz_iterator16 {
945 horz_iterator16(context_t* c) {
946 const int x = c->iterators.xl;
947 const int y = c->iterators.y;
948 texture_t& tx = c->state.texture[0];
949 const int32_t u = (tx.shade.is0>>16) + x;
950 const int32_t v = (tx.shade.it0>>16) + y;
951 m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
952 }
953 uint16_t get_pixel16() {
954 return *m_src++;
955 }
956protected:
957 uint16_t* m_src;
958};
959
960/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
961 * After initialization, call get_src16() or get_src32() to get the current
962 * texture pixel value.
963 */
964struct clamp_iterator {
965 clamp_iterator(context_t* c) {
966 const int xs = c->iterators.xl;
967 texture_t& tx = c->state.texture[0];
968 texture_iterators_t& ti = tx.iterators;
969 m_s = (xs * ti.dsdx) + ti.ydsdy;
970 m_t = (xs * ti.dtdx) + ti.ydtdy;
971 m_ds = ti.dsdx;
972 m_dt = ti.dtdx;
973 m_width_m1 = tx.surface.width - 1;
974 m_height_m1 = tx.surface.height - 1;
975 m_data = tx.surface.data;
976 m_stride = tx.surface.stride;
977 }
978 uint16_t get_pixel16() {
979 int u, v;
980 get_uv(u, v);
981 uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v));
982 return src[0];
983 }
984 uint32_t get_pixel32() {
985 int u, v;
986 get_uv(u, v);
987 uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v));
988 return src[0];
989 }
990private:
991 void get_uv(int& u, int& v) {
992 int uu = m_s >> 16;
993 int vv = m_t >> 16;
994 if (uu < 0)
995 uu = 0;
996 if (uu > m_width_m1)
997 uu = m_width_m1;
998 if (vv < 0)
999 vv = 0;
1000 if (vv > m_height_m1)
1001 vv = m_height_m1;
1002 u = uu;
1003 v = vv;
1004 m_s += m_ds;
1005 m_t += m_dt;
1006 }
1007
1008 GGLfixed m_s, m_t;
1009 GGLfixed m_ds, m_dt;
1010 int m_width_m1, m_height_m1;
1011 uint8_t* m_data;
1012 int m_stride;
1013};
1014
1015/*
1016 * The 'horizontal clamp iterator' variant corresponds to the case where
1017 * the 'v' coordinate doesn't change. This is useful to avoid one mult and
1018 * extra adds / checks per pixels, if the blending/processing operation after
1019 * this is very fast.
1020 */
1021static int is_context_horizontal(const context_t* c) {
1022 return (c->state.texture[0].iterators.dtdx == 0);
1023}
1024
1025struct horz_clamp_iterator {
1026 uint16_t get_pixel16() {
1027 int u = m_s >> 16;
1028 m_s += m_ds;
1029 if (u < 0)
1030 u = 0;
1031 if (u > m_width_m1)
1032 u = m_width_m1;
1033 const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
1034 return src[u];
1035 }
1036 uint32_t get_pixel32() {
1037 int u = m_s >> 16;
1038 m_s += m_ds;
1039 if (u < 0)
1040 u = 0;
1041 if (u > m_width_m1)
1042 u = m_width_m1;
1043 const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
1044 return src[u];
1045 }
1046protected:
1047 void init(const context_t* c, int shift);
1048 GGLfixed m_s;
1049 GGLfixed m_ds;
1050 int m_width_m1;
1051 const uint8_t* m_data;
1052};
1053
1054void horz_clamp_iterator::init(const context_t* c, int shift)
1055{
1056 const int xs = c->iterators.xl;
1057 const texture_t& tx = c->state.texture[0];
1058 const texture_iterators_t& ti = tx.iterators;
1059 m_s = (xs * ti.dsdx) + ti.ydsdy;
1060 m_ds = ti.dsdx;
1061 m_width_m1 = tx.surface.width-1;
1062 m_data = tx.surface.data;
1063
1064 GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
1065 int v = t >> 16;
1066 if (v < 0)
1067 v = 0;
1068 else if (v >= (int)tx.surface.height)
1069 v = (int)tx.surface.height-1;
1070
1071 m_data += (tx.surface.stride*v) << shift;
1072}
1073
1074struct horz_clamp_iterator16 : horz_clamp_iterator {
1075 horz_clamp_iterator16(const context_t* c) {
1076 init(c,1);
1077 };
1078};
1079
1080struct horz_clamp_iterator32 : horz_clamp_iterator {
1081 horz_clamp_iterator32(context_t* c) {
1082 init(c,2);
1083 };
1084};
1085
1086/* This is used to perform dithering operations.
1087 */
1088struct ditherer {
1089 ditherer(const context_t* c) {
1090 const int x = c->iterators.xl;
1091 const int y = c->iterators.y;
1092 m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
1093 m_index = x & GGL_DITHER_MASK;
1094 }
1095 void step(void) {
1096 m_index++;
1097 }
1098 int get_value(void) {
1099 int ret = m_line[m_index & GGL_DITHER_MASK];
1100 m_index++;
1101 return ret;
1102 }
1103 uint16_t abgr8888ToRgb565(uint32_t s) {
1104 uint32_t r = s & 0xff;
1105 uint32_t g = (s >> 8) & 0xff;
1106 uint32_t b = (s >> 16) & 0xff;
1107 return rgb888ToRgb565(r,g,b);
1108 }
1109 /* The following assumes that r/g/b are in the 0..255 range each */
1110 uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
1111 int threshold = get_value();
1112 /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
1113 r += (threshold >> (GGL_DITHER_BITS-8 +5));
1114 g += (threshold >> (GGL_DITHER_BITS-8 +6));
1115 b += (threshold >> (GGL_DITHER_BITS-8 +5));
1116 if (r > 0xff)
1117 r = 0xff;
1118 if (g > 0xff)
1119 g = 0xff;
1120 if (b > 0xff)
1121 b = 0xff;
1122 return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3));
1123 }
1124protected:
1125 const uint8_t* m_line;
1126 int m_index;
1127};
1128
1129/* This structure is used to blend (SRC_OVER) 32-bit source pixels
1130 * onto 16-bit destination ones. Usage is simply:
1131 *
1132 * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
1133 */
1134struct blender_32to16 {
1135 blender_32to16(context_t* c) { }
1136 void write(uint32_t s, uint16_t* dst) {
1137 if (s == 0)
1138 return;
1139 s = GGL_RGBA_TO_HOST(s);
1140 int sA = (s>>24);
1141 if (sA == 0xff) {
1142 *dst = convertAbgr8888ToRgb565(s);
1143 } else {
1144 int f = 0x100 - (sA + (sA>>7));
1145 int sR = (s >> ( 3))&0x1F;
1146 int sG = (s >> ( 8+2))&0x3F;
1147 int sB = (s >> (16+3))&0x1F;
1148 uint16_t d = *dst;
1149 int dR = (d>>11)&0x1f;
1150 int dG = (d>>5)&0x3f;
1151 int dB = (d)&0x1f;
1152 sR += (f*dR)>>8;
1153 sG += (f*dG)>>8;
1154 sB += (f*dB)>>8;
1155 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1156 }
1157 }
1158 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1159 if (s == 0) {
1160 di.step();
1161 return;
1162 }
1163 s = GGL_RGBA_TO_HOST(s);
1164 int sA = (s>>24);
1165 if (sA == 0xff) {
1166 *dst = di.abgr8888ToRgb565(s);
1167 } else {
1168 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1169 int f = 0x100 - (sA + (sA>>7));
1170 int sR = (s >> ( 3))&0x1F;
1171 int sG = (s >> ( 8+2))&0x3F;
1172 int sB = (s >> (16+3))&0x1F;
1173 uint16_t d = *dst;
1174 int dR = (d>>11)&0x1f;
1175 int dG = (d>>5)&0x3f;
1176 int dB = (d)&0x1f;
1177 sR = ((sR << 8) + f*dR + threshold)>>8;
1178 sG = ((sG << 8) + f*dG + threshold)>>8;
1179 sB = ((sB << 8) + f*dB + threshold)>>8;
1180 if (sR > 0x1f) sR = 0x1f;
1181 if (sG > 0x3f) sG = 0x3f;
1182 if (sB > 0x1f) sB = 0x1f;
1183 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1184 }
1185 }
1186};
1187
1188/* This blender does the same for the 'blend_srca' operation.
1189 * where dstFactor=srcA*(1-srcA) srcFactor=srcA
1190 */
1191struct blender_32to16_srcA {
1192 blender_32to16_srcA(const context_t* c) { }
1193 void write(uint32_t s, uint16_t* dst) {
1194 if (!s) {
1195 return;
1196 }
1197 uint16_t d = *dst;
1198 s = GGL_RGBA_TO_HOST(s);
1199 int sR = (s >> ( 3))&0x1F;
1200 int sG = (s >> ( 8+2))&0x3F;
1201 int sB = (s >> (16+3))&0x1F;
1202 int sA = (s>>24);
1203 int f1 = (sA + (sA>>7));
1204 int f2 = 0x100-f1;
1205 int dR = (d>>11)&0x1f;
1206 int dG = (d>>5)&0x3f;
1207 int dB = (d)&0x1f;
1208 sR = (f1*sR + f2*dR)>>8;
1209 sG = (f1*sG + f2*dG)>>8;
1210 sB = (f1*sB + f2*dB)>>8;
1211 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1212 }
1213};
1214
1215/* Common init code the modulating blenders */
1216struct blender_modulate {
1217 void init(const context_t* c) {
1218 const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
1219 const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
1220 const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
1221 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
1222 m_r = r + (r >> 7);
1223 m_g = g + (g >> 7);
1224 m_b = b + (b >> 7);
1225 m_a = a + (a >> 7);
1226 }
1227protected:
1228 int m_r, m_g, m_b, m_a;
1229};
1230
1231/* This blender does a normal blend after modulation.
1232 */
1233struct blender_32to16_modulate : blender_modulate {
1234 blender_32to16_modulate(const context_t* c) {
1235 init(c);
1236 }
1237 void write(uint32_t s, uint16_t* dst) {
1238 // blend source and destination
1239 if (!s) {
1240 return;
1241 }
1242 s = GGL_RGBA_TO_HOST(s);
1243
1244 /* We need to modulate s */
1245 uint32_t sA = (s >> 24);
1246 uint32_t sB = (s >> 16) & 0xff;
1247 uint32_t sG = (s >> 8) & 0xff;
1248 uint32_t sR = s & 0xff;
1249
1250 sA = (sA*m_a) >> 8;
1251 /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1252 sR = (sR*m_r) >> (8 - 5);
1253 sG = (sG*m_g) >> (8 - 6);
1254 sB = (sB*m_b) >> (8 - 5);
1255
1256 /* Now do a normal blend */
1257 int f = 0x100 - (sA + (sA>>7));
1258 uint16_t d = *dst;
1259 int dR = (d>>11)&0x1f;
1260 int dG = (d>>5)&0x3f;
1261 int dB = (d)&0x1f;
1262 sR = (sR + f*dR)>>8;
1263 sG = (sG + f*dG)>>8;
1264 sB = (sB + f*dB)>>8;
1265 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1266 }
1267 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1268 // blend source and destination
1269 if (!s) {
1270 di.step();
1271 return;
1272 }
1273 s = GGL_RGBA_TO_HOST(s);
1274
1275 /* We need to modulate s */
1276 uint32_t sA = (s >> 24);
1277 uint32_t sB = (s >> 16) & 0xff;
1278 uint32_t sG = (s >> 8) & 0xff;
1279 uint32_t sR = s & 0xff;
1280
1281 sA = (sA*m_a) >> 8;
1282 /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1283 sR = (sR*m_r) >> (8 - 5);
1284 sG = (sG*m_g) >> (8 - 6);
1285 sB = (sB*m_b) >> (8 - 5);
1286
1287 /* Scale threshold to 0.8 fixed float format */
1288 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1289 int f = 0x100 - (sA + (sA>>7));
1290 uint16_t d = *dst;
1291 int dR = (d>>11)&0x1f;
1292 int dG = (d>>5)&0x3f;
1293 int dB = (d)&0x1f;
1294 sR = (sR + f*dR + threshold)>>8;
1295 sG = (sG + f*dG + threshold)>>8;
1296 sB = (sB + f*dB + threshold)>>8;
1297 if (sR > 0x1f) sR = 0x1f;
1298 if (sG > 0x3f) sG = 0x3f;
1299 if (sB > 0x1f) sB = 0x1f;
1300 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1301 }
1302};
1303
1304/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
1305struct blender_x32to16_modulate : blender_modulate {
1306 blender_x32to16_modulate(const context_t* c) {
1307 init(c);
1308 }
1309 void write(uint32_t s, uint16_t* dst) {
1310 s = GGL_RGBA_TO_HOST(s);
1311
1312 uint32_t sB = (s >> 16) & 0xff;
1313 uint32_t sG = (s >> 8) & 0xff;
1314 uint32_t sR = s & 0xff;
1315
1316 /* Keep R/G/B in 5.8 or 6.8 format */
1317 sR = (sR*m_r) >> (8 - 5);
1318 sG = (sG*m_g) >> (8 - 6);
1319 sB = (sB*m_b) >> (8 - 5);
1320
1321 int f = 0x100 - m_a;
1322 uint16_t d = *dst;
1323 int dR = (d>>11)&0x1f;
1324 int dG = (d>>5)&0x3f;
1325 int dB = (d)&0x1f;
1326 sR = (sR + f*dR)>>8;
1327 sG = (sG + f*dG)>>8;
1328 sB = (sB + f*dB)>>8;
1329 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1330 }
1331 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1332 s = GGL_RGBA_TO_HOST(s);
1333
1334 uint32_t sB = (s >> 16) & 0xff;
1335 uint32_t sG = (s >> 8) & 0xff;
1336 uint32_t sR = s & 0xff;
1337
1338 sR = (sR*m_r) >> (8 - 5);
1339 sG = (sG*m_g) >> (8 - 6);
1340 sB = (sB*m_b) >> (8 - 5);
1341
1342 /* Now do a normal blend */
1343 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1344 int f = 0x100 - m_a;
1345 uint16_t d = *dst;
1346 int dR = (d>>11)&0x1f;
1347 int dG = (d>>5)&0x3f;
1348 int dB = (d)&0x1f;
1349 sR = (sR + f*dR + threshold)>>8;
1350 sG = (sG + f*dG + threshold)>>8;
1351 sB = (sB + f*dB + threshold)>>8;
1352 if (sR > 0x1f) sR = 0x1f;
1353 if (sG > 0x3f) sG = 0x3f;
1354 if (sB > 0x1f) sB = 0x1f;
1355 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1356 }
1357};
1358
1359/* Same as above, but source is 16bit rgb565 */
1360struct blender_16to16_modulate : blender_modulate {
1361 blender_16to16_modulate(const context_t* c) {
1362 init(c);
1363 }
1364 void write(uint16_t s16, uint16_t* dst) {
1365 uint32_t s = s16;
1366
1367 uint32_t sR = s >> 11;
1368 uint32_t sG = (s >> 5) & 0x3f;
1369 uint32_t sB = s & 0x1f;
1370
1371 sR = (sR*m_r);
1372 sG = (sG*m_g);
1373 sB = (sB*m_b);
1374
1375 int f = 0x100 - m_a;
1376 uint16_t d = *dst;
1377 int dR = (d>>11)&0x1f;
1378 int dG = (d>>5)&0x3f;
1379 int dB = (d)&0x1f;
1380 sR = (sR + f*dR)>>8;
1381 sG = (sG + f*dG)>>8;
1382 sB = (sB + f*dB)>>8;
1383 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1384 }
1385};
1386
1387/* This is used to iterate over a 16-bit destination color buffer.
1388 * Usage is:
1389 *
1390 * dst_iterator16 di(context);
1391 * while (di.count--) {
1392 * <do stuff with dest pixel at di.dst>
1393 * di.dst++;
1394 * }
1395 */
1396struct dst_iterator16 {
1397 dst_iterator16(const context_t* c) {
1398 const int x = c->iterators.xl;
1399 const int width = c->iterators.xr - x;
1400 const int32_t y = c->iterators.y;
1401 const surface_t* cb = &(c->state.buffers.color);
1402 count = width;
1403 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
1404 }
1405 int count;
1406 uint16_t* dst;
1407};
1408
1409
1410static void scanline_t32cb16_clamp(context_t* c)
1411{
1412 dst_iterator16 di(c);
1413
1414 if (is_context_horizontal(c)) {
1415 /* Special case for simple horizontal scaling */
1416 horz_clamp_iterator32 ci(c);
1417 while (di.count--) {
1418 uint32_t s = ci.get_pixel32();
1419 *di.dst++ = convertAbgr8888ToRgb565(s);
1420 }
1421 } else {
1422 /* General case */
1423 clamp_iterator ci(c);
1424 while (di.count--) {
1425 uint32_t s = ci.get_pixel32();
1426 *di.dst++ = convertAbgr8888ToRgb565(s);
1427 }
1428 }
1429}
1430
1431static void scanline_t32cb16_dither(context_t* c)
1432{
1433 horz_iterator32 si(c);
1434 dst_iterator16 di(c);
1435 ditherer dither(c);
1436
1437 while (di.count--) {
1438 uint32_t s = si.get_pixel32();
1439 *di.dst++ = dither.abgr8888ToRgb565(s);
1440 }
1441}
1442
1443static void scanline_t32cb16_clamp_dither(context_t* c)
1444{
1445 dst_iterator16 di(c);
1446 ditherer dither(c);
1447
1448 if (is_context_horizontal(c)) {
1449 /* Special case for simple horizontal scaling */
1450 horz_clamp_iterator32 ci(c);
1451 while (di.count--) {
1452 uint32_t s = ci.get_pixel32();
1453 *di.dst++ = dither.abgr8888ToRgb565(s);
1454 }
1455 } else {
1456 /* General case */
1457 clamp_iterator ci(c);
1458 while (di.count--) {
1459 uint32_t s = ci.get_pixel32();
1460 *di.dst++ = dither.abgr8888ToRgb565(s);
1461 }
1462 }
1463}
1464
1465static void scanline_t32cb16blend_dither(context_t* c)
1466{
1467 dst_iterator16 di(c);
1468 ditherer dither(c);
1469 blender_32to16 bl(c);
1470 horz_iterator32 hi(c);
1471 while (di.count--) {
1472 uint32_t s = hi.get_pixel32();
1473 bl.write(s, di.dst, dither);
1474 di.dst++;
1475 }
1476}
1477
1478static void scanline_t32cb16blend_clamp(context_t* c)
1479{
1480 dst_iterator16 di(c);
1481 blender_32to16 bl(c);
1482
1483 if (is_context_horizontal(c)) {
1484 horz_clamp_iterator32 ci(c);
1485 while (di.count--) {
1486 uint32_t s = ci.get_pixel32();
1487 bl.write(s, di.dst);
1488 di.dst++;
1489 }
1490 } else {
1491 clamp_iterator ci(c);
1492 while (di.count--) {
1493 uint32_t s = ci.get_pixel32();
1494 bl.write(s, di.dst);
1495 di.dst++;
1496 }
1497 }
1498}
1499
1500static void scanline_t32cb16blend_clamp_dither(context_t* c)
1501{
1502 dst_iterator16 di(c);
1503 ditherer dither(c);
1504 blender_32to16 bl(c);
1505
1506 clamp_iterator ci(c);
1507 while (di.count--) {
1508 uint32_t s = ci.get_pixel32();
1509 bl.write(s, di.dst, dither);
1510 di.dst++;
1511 }
1512}
1513
1514void scanline_t32cb16blend_clamp_mod(context_t* c)
1515{
1516 dst_iterator16 di(c);
1517 blender_32to16_modulate bl(c);
1518
1519 clamp_iterator ci(c);
1520 while (di.count--) {
1521 uint32_t s = ci.get_pixel32();
1522 bl.write(s, di.dst);
1523 di.dst++;
1524 }
1525}
1526
1527void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
1528{
1529 dst_iterator16 di(c);
1530 blender_32to16_modulate bl(c);
1531 ditherer dither(c);
1532
1533 clamp_iterator ci(c);
1534 while (di.count--) {
1535 uint32_t s = ci.get_pixel32();
1536 bl.write(s, di.dst, dither);
1537 di.dst++;
1538 }
1539}
1540
1541/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
1542void scanline_x32cb16blend_clamp_mod(context_t* c)
1543{
1544 dst_iterator16 di(c);
1545 blender_x32to16_modulate bl(c);
1546
1547 clamp_iterator ci(c);
1548 while (di.count--) {
1549 uint32_t s = ci.get_pixel32();
1550 bl.write(s, di.dst);
1551 di.dst++;
1552 }
1553}
1554
1555void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
1556{
1557 dst_iterator16 di(c);
1558 blender_x32to16_modulate bl(c);
1559 ditherer dither(c);
1560
1561 clamp_iterator ci(c);
1562 while (di.count--) {
1563 uint32_t s = ci.get_pixel32();
1564 bl.write(s, di.dst, dither);
1565 di.dst++;
1566 }
1567}
1568
1569void scanline_t16cb16_clamp(context_t* c)
1570{
1571 dst_iterator16 di(c);
1572
1573 /* Special case for simple horizontal scaling */
1574 if (is_context_horizontal(c)) {
1575 horz_clamp_iterator16 ci(c);
1576 while (di.count--) {
1577 *di.dst++ = ci.get_pixel16();
1578 }
1579 } else {
1580 clamp_iterator ci(c);
1581 while (di.count--) {
1582 *di.dst++ = ci.get_pixel16();
1583 }
1584 }
1585}
1586
1587
1588
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001589template <typename T, typename U>
1590static inline __attribute__((const))
1591T interpolate(int y, T v0, U dvdx, U dvdy) {
1592 // interpolates in pixel's centers
1593 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
1594 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
1595}
1596
1597// ----------------------------------------------------------------------------
1598#if 0
1599#pragma mark -
1600#endif
1601
1602void init_y(context_t* c, int32_t ys)
1603{
1604 const uint32_t enables = c->state.enables;
1605
1606 // compute iterators...
1607 iterators_t& ci = c->iterators;
1608
1609 // sample in the center
1610 ci.y = ys;
1611
1612 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) {
1613 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
1614 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
1615 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
1616 }
1617
1618 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
1619 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
1620 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
1621 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
1622 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
1623 c->step_y = step_y__smooth;
1624 } else {
1625 ci.ydrdy = c->shade.r0;
1626 ci.ydgdy = c->shade.g0;
1627 ci.ydbdy = c->shade.b0;
1628 ci.ydady = c->shade.a0;
1629 // XXX: do only if needed, or make sure this is fast
1630 c->packed = ggl_pack_color(c, c->state.buffers.color.format,
1631 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1632 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1633 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1634 }
1635
1636 // initialize the variables we need in the shader
1637 generated_vars_t& gen = c->generated_vars;
1638 gen.argb[GGLFormat::ALPHA].c = ci.ydady;
1639 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
1640 gen.argb[GGLFormat::RED ].c = ci.ydrdy;
1641 gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
1642 gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
1643 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
1644 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
1645 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
1646 gen.dzdx = c->shade.dzdx;
1647 gen.f = ci.ydfdy;
1648 gen.dfdx = c->shade.dfdx;
1649
1650 if (enables & GGL_ENABLE_TMUS) {
1651 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1652 texture_t& t = c->state.texture[i];
1653 if (!t.enable) continue;
1654
1655 texture_iterators_t& ti = t.iterators;
1656 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
1657 // we need to set all of these to 0 because in some cases
1658 // step_y__generic() or step_y__tmu() will be used and
1659 // therefore will update dtdy, however, in 1:1 mode
1660 // this is always done by the scanline rasterizer.
1661 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
1662 ti.ydsdy = t.shade.is0;
1663 ti.ydtdy = t.shade.it0;
1664 } else {
1665 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
1666 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
1667 ti.sscale = t.shade.sscale + adjustSWrap;
1668 ti.tscale = t.shade.tscale + adjustTWrap;
1669 if (!(enables & GGL_ENABLE_W)) {
1670 // S coordinate
1671 const int32_t sscale = ti.sscale;
1672 const int32_t sy = interpolate(ys,
1673 t.shade.is0, t.shade.idsdx, t.shade.idsdy);
1674 if (sscale>=0) {
1675 ti.ydsdy= sy << sscale;
1676 ti.dsdx = t.shade.idsdx << sscale;
1677 ti.dsdy = t.shade.idsdy << sscale;
1678 } else {
1679 ti.ydsdy= sy >> -sscale;
1680 ti.dsdx = t.shade.idsdx >> -sscale;
1681 ti.dsdy = t.shade.idsdy >> -sscale;
1682 }
1683 // T coordinate
1684 const int32_t tscale = ti.tscale;
1685 const int32_t ty = interpolate(ys,
1686 t.shade.it0, t.shade.idtdx, t.shade.idtdy);
1687 if (tscale>=0) {
1688 ti.ydtdy= ty << tscale;
1689 ti.dtdx = t.shade.idtdx << tscale;
1690 ti.dtdy = t.shade.idtdy << tscale;
1691 } else {
1692 ti.ydtdy= ty >> -tscale;
1693 ti.dtdx = t.shade.idtdx >> -tscale;
1694 ti.dtdy = t.shade.idtdy >> -tscale;
1695 }
1696 }
1697 }
1698 // mirror for generated code...
1699 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1700 gen.width = t.surface.width;
1701 gen.height = t.surface.height;
1702 gen.stride = t.surface.stride;
1703 gen.data = int32_t(t.surface.data);
1704 gen.dsdx = ti.dsdx;
1705 gen.dtdx = ti.dtdx;
1706 }
1707 }
1708
1709 // choose the y-stepper
1710 c->step_y = step_y__nop;
1711 if (enables & GGL_ENABLE_FOG) {
1712 c->step_y = step_y__generic;
1713 } else if (enables & GGL_ENABLE_TMUS) {
1714 if (enables & GGL_ENABLE_SMOOTH) {
1715 c->step_y = step_y__generic;
1716 } else if (enables & GGL_ENABLE_W) {
1717 c->step_y = step_y__w;
1718 } else {
1719 c->step_y = step_y__tmu;
1720 }
1721 } else {
1722 if (enables & GGL_ENABLE_SMOOTH) {
1723 c->step_y = step_y__smooth;
1724 }
1725 }
1726
1727 // choose the rectangle blitter
1728 c->rect = rect_generic;
1729 if ((c->step_y == step_y__nop) &&
1730 (c->scanline == scanline_memcpy))
1731 {
1732 c->rect = rect_memcpy;
1733 }
1734}
1735
1736void init_y_packed(context_t* c, int32_t y0)
1737{
1738 uint8_t f = c->state.buffers.color.format;
1739 c->packed = ggl_pack_color(c, f,
1740 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +00001741 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1742 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001743 c->iterators.y = y0;
1744 c->step_y = step_y__nop;
1745 // choose the rectangle blitter
1746 c->rect = rect_generic;
1747 if (c->scanline == scanline_memcpy) {
1748 c->rect = rect_memcpy;
1749 }
1750}
1751
1752void init_y_noop(context_t* c, int32_t y0)
1753{
1754 c->iterators.y = y0;
1755 c->step_y = step_y__nop;
1756 // choose the rectangle blitter
1757 c->rect = rect_generic;
1758 if (c->scanline == scanline_memcpy) {
1759 c->rect = rect_memcpy;
1760 }
1761}
1762
1763void init_y_error(context_t* c, int32_t y0)
1764{
1765 // woooops, shoud never happen,
1766 // fail gracefully (don't display anything)
1767 init_y_noop(c, y0);
Steve Block8aeb6e22012-01-06 14:13:42 +00001768 ALOGE("color-buffer has an invalid format!");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001769}
1770
1771// ----------------------------------------------------------------------------
1772#if 0
1773#pragma mark -
1774#endif
1775
1776void step_y__generic(context_t* c)
1777{
1778 const uint32_t enables = c->state.enables;
1779
1780 // iterate...
1781 iterators_t& ci = c->iterators;
1782 ci.y += 1;
1783
1784 if (enables & GGL_ENABLE_SMOOTH) {
1785 ci.ydrdy += c->shade.drdy;
1786 ci.ydgdy += c->shade.dgdy;
1787 ci.ydbdy += c->shade.dbdy;
1788 ci.ydady += c->shade.dady;
1789 }
1790
1791 const uint32_t mask =
1792 GGL_ENABLE_DEPTH_TEST |
1793 GGL_ENABLE_W |
1794 GGL_ENABLE_FOG;
1795 if (enables & mask) {
1796 ci.ydzdy += c->shade.dzdy;
1797 ci.ydwdy += c->shade.dwdy;
1798 ci.ydfdy += c->shade.dfdy;
1799 }
1800
1801 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
1802 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1803 if (c->state.texture[i].enable) {
1804 texture_iterators_t& ti = c->state.texture[i].iterators;
1805 ti.ydsdy += ti.dsdy;
1806 ti.ydtdy += ti.dtdy;
1807 }
1808 }
1809 }
1810}
1811
1812void step_y__nop(context_t* c)
1813{
1814 c->iterators.y += 1;
1815 c->iterators.ydzdy += c->shade.dzdy;
1816}
1817
1818void step_y__smooth(context_t* c)
1819{
1820 iterators_t& ci = c->iterators;
1821 ci.y += 1;
1822 ci.ydrdy += c->shade.drdy;
1823 ci.ydgdy += c->shade.dgdy;
1824 ci.ydbdy += c->shade.dbdy;
1825 ci.ydady += c->shade.dady;
1826 ci.ydzdy += c->shade.dzdy;
1827}
1828
1829void step_y__w(context_t* c)
1830{
1831 iterators_t& ci = c->iterators;
1832 ci.y += 1;
1833 ci.ydzdy += c->shade.dzdy;
1834 ci.ydwdy += c->shade.dwdy;
1835}
1836
1837void step_y__tmu(context_t* c)
1838{
1839 iterators_t& ci = c->iterators;
1840 ci.y += 1;
1841 ci.ydzdy += c->shade.dzdy;
1842 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1843 if (c->state.texture[i].enable) {
1844 texture_iterators_t& ti = c->state.texture[i].iterators;
1845 ti.ydsdy += ti.dsdy;
1846 ti.ydtdy += ti.dtdy;
1847 }
1848 }
1849}
1850
1851// ----------------------------------------------------------------------------
1852#if 0
1853#pragma mark -
1854#endif
1855
1856void scanline_perspective(context_t* c)
1857{
1858 struct {
1859 union {
1860 struct {
1861 int32_t s, sq;
1862 int32_t t, tq;
1863 };
1864 struct {
1865 int32_t v, q;
1866 } st[2];
1867 };
1868 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
1869
1870 // XXX: we should have a special case when dwdx = 0
1871
1872 // 32 pixels spans works okay. 16 is a lot better,
1873 // but hey, it's a software renderer...
1874 const uint32_t SPAN_BITS = 5;
1875 const uint32_t ys = c->iterators.y;
1876 const uint32_t xs = c->iterators.xl;
1877 const uint32_t x1 = c->iterators.xr;
1878 const uint32_t xc = x1 - xs;
1879 uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
1880 uint32_t numSpans = xc >> SPAN_BITS;
1881
1882 const iterators_t& ci = c->iterators;
1883 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
1884 int32_t q0 = gglRecipQ(w0, 30);
1885 const int iwscale = 32 - gglClz(q0);
1886
1887 const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
1888 int32_t xl = c->iterators.xl;
1889
1890 // We process s & t with a loop to reduce the code size
1891 // (and i-cache pressure).
1892
1893 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1894 const texture_t& tmu = c->state.texture[i];
1895 if (!tmu.enable) continue;
1896 int32_t s = tmu.shade.is0 +
1897 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
1898 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
1899 int32_t t = tmu.shade.it0 +
1900 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
1901 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
1902 tc[i].s = s;
1903 tc[i].t = t;
1904 tc[i].sq = gglMulx(s, q0, iwscale);
1905 tc[i].tq = gglMulx(t, q0, iwscale);
1906 }
1907
1908 int32_t span = 0;
1909 do {
1910 int32_t w1;
1911 if (ggl_likely(numSpans)) {
1912 w1 = w0 + dwdx;
1913 } else {
1914 if (remainder) {
1915 // finish off the scanline...
1916 span = remainder;
1917 w1 = (c->shade.dwdx * span) + w0;
1918 } else {
1919 break;
1920 }
1921 }
1922 int32_t q1 = gglRecipQ(w1, 30);
1923 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1924 texture_t& tmu = c->state.texture[i];
1925 if (!tmu.enable) continue;
1926 texture_iterators_t& ti = tmu.iterators;
1927
1928 for (int j=0 ; j<2 ; j++) {
1929 int32_t v = tc[i].st[j].v;
1930 if (span) v += (tmu.shade.st[j].dx)*span;
1931 else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
1932 const int32_t v0 = tc[i].st[j].q;
1933 const int32_t v1 = gglMulx(v, q1, iwscale);
1934 int32_t dvdx = v1 - v0;
1935 if (span) dvdx /= span;
1936 else dvdx >>= SPAN_BITS;
1937 tc[i].st[j].v = v;
1938 tc[i].st[j].q = v1;
1939
1940 const int scale = ti.st[j].scale + (iwscale - 30);
1941 if (scale >= 0) {
1942 ti.st[j].ydvdy = v0 << scale;
1943 ti.st[j].dvdx = dvdx << scale;
1944 } else {
1945 ti.st[j].ydvdy = v0 >> -scale;
1946 ti.st[j].dvdx = dvdx >> -scale;
1947 }
1948 }
1949 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1950 gen.dsdx = ti.st[0].dvdx;
1951 gen.dtdx = ti.st[1].dvdx;
1952 }
1953 c->iterators.xl = xl;
1954 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
1955 w0 = w1;
1956 q0 = q1;
1957 c->span(c);
1958 } while(numSpans--);
1959}
1960
1961void scanline_perspective_single(context_t* c)
1962{
1963 // 32 pixels spans works okay. 16 is a lot better,
1964 // but hey, it's a software renderer...
1965 const uint32_t SPAN_BITS = 5;
1966 const uint32_t ys = c->iterators.y;
1967 const uint32_t xs = c->iterators.xl;
1968 const uint32_t x1 = c->iterators.xr;
1969 const uint32_t xc = x1 - xs;
1970
1971 const iterators_t& ci = c->iterators;
1972 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
1973 int32_t iw = gglRecipQ(w, 30);
1974 const int iwscale = 32 - gglClz(iw);
1975
1976 const int i = 31 - gglClz(c->state.enabled_tmu);
1977 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1978 texture_t& tmu = c->state.texture[i];
1979 texture_iterators_t& ti = tmu.iterators;
1980 const int sscale = ti.sscale + (iwscale - 30);
1981 const int tscale = ti.tscale + (iwscale - 30);
1982 int32_t s = tmu.shade.is0 +
1983 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
1984 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
1985 int32_t t = tmu.shade.it0 +
1986 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
1987 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
1988 int32_t s0 = gglMulx(s, iw, iwscale);
1989 int32_t t0 = gglMulx(t, iw, iwscale);
1990 int32_t xl = c->iterators.xl;
1991
1992 int32_t sq, tq, dsdx, dtdx;
1993 int32_t premainder = xc & ((1<<SPAN_BITS)-1);
1994 uint32_t numSpans = xc >> SPAN_BITS;
1995 if (c->shade.dwdx == 0) {
1996 // XXX: we could choose to do this if the error is small enough
1997 numSpans = 0;
1998 premainder = xc;
1999 goto no_perspective;
2000 }
2001
2002 if (premainder) {
2003 w += c->shade.dwdx * premainder;
2004 iw = gglRecipQ(w, 30);
2005no_perspective:
2006 s += tmu.shade.idsdx * premainder;
2007 t += tmu.shade.idtdx * premainder;
2008 sq = gglMulx(s, iw, iwscale);
2009 tq = gglMulx(t, iw, iwscale);
2010 dsdx = (sq - s0) / premainder;
2011 dtdx = (tq - t0) / premainder;
2012 c->iterators.xl = xl;
2013 c->iterators.xr = xl = xl + premainder;
2014 goto finish;
2015 }
2016
2017 while (numSpans--) {
2018 w += c->shade.dwdx << SPAN_BITS;
2019 s += tmu.shade.idsdx << SPAN_BITS;
2020 t += tmu.shade.idtdx << SPAN_BITS;
2021 iw = gglRecipQ(w, 30);
2022 sq = gglMulx(s, iw, iwscale);
2023 tq = gglMulx(t, iw, iwscale);
2024 dsdx = (sq - s0) >> SPAN_BITS;
2025 dtdx = (tq - t0) >> SPAN_BITS;
2026 c->iterators.xl = xl;
2027 c->iterators.xr = xl = xl + (1<<SPAN_BITS);
2028finish:
2029 if (sscale >= 0) {
2030 ti.ydsdy = s0 << sscale;
2031 ti.dsdx = dsdx << sscale;
2032 } else {
2033 ti.ydsdy = s0 >>-sscale;
2034 ti.dsdx = dsdx >>-sscale;
2035 }
2036 if (tscale >= 0) {
2037 ti.ydtdy = t0 << tscale;
2038 ti.dtdx = dtdx << tscale;
2039 } else {
2040 ti.ydtdy = t0 >>-tscale;
2041 ti.dtdx = dtdx >>-tscale;
2042 }
2043 s0 = sq;
2044 t0 = tq;
2045 gen.dsdx = ti.dsdx;
2046 gen.dtdx = ti.dtdx;
2047 c->span(c);
2048 }
2049}
2050
2051// ----------------------------------------------------------------------------
2052
Martyn Capewellf9e8ab02009-12-07 15:00:19 +00002053void scanline_col32cb16blend(context_t* c)
2054{
2055 int32_t x = c->iterators.xl;
2056 size_t ct = c->iterators.xr - x;
2057 int32_t y = c->iterators.y;
2058 surface_t* cb = &(c->state.buffers.color);
2059 union {
2060 uint16_t* dst;
2061 uint32_t* dst32;
2062 };
2063 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2064
2065#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
2066#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2067 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
2068#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2069 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
2070#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2071#else
2072 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
2073 int sA = (s>>24);
2074 int f = 0x100 - (sA + (sA>>7));
2075 while (ct--) {
2076 uint16_t d = *dst;
2077 int dR = (d>>11)&0x1f;
2078 int dG = (d>>5)&0x3f;
2079 int dB = (d)&0x1f;
2080 int sR = (s >> ( 3))&0x1F;
2081 int sG = (s >> ( 8+2))&0x3F;
2082 int sB = (s >> (16+3))&0x1F;
2083 sR += (f*dR)>>8;
2084 sG += (f*dG)>>8;
2085 sB += (f*dB)>>8;
2086 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
2087 }
2088#endif
2089
2090}
2091
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002092void scanline_t32cb16(context_t* c)
2093{
2094 int32_t x = c->iterators.xl;
2095 size_t ct = c->iterators.xr - x;
2096 int32_t y = c->iterators.y;
2097 surface_t* cb = &(c->state.buffers.color);
2098 union {
2099 uint16_t* dst;
2100 uint32_t* dst32;
2101 };
2102 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2103
2104 surface_t* tex = &(c->state.texture[0].surface);
2105 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2106 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2107 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2108 int sR, sG, sB;
2109 uint32_t s, d;
2110
2111 if (ct==1 || uint32_t(dst)&2) {
2112last_one:
2113 s = GGL_RGBA_TO_HOST( *src++ );
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002114 *dst++ = convertAbgr8888ToRgb565(s);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002115 ct--;
2116 }
2117
2118 while (ct >= 2) {
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002119#if BYTE_ORDER == BIG_ENDIAN
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002120 s = GGL_RGBA_TO_HOST( *src++ );
2121 d = convertAbgr8888ToRgb565_hi16(s);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002122
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002123 s = GGL_RGBA_TO_HOST( *src++ );
2124 d |= convertAbgr8888ToRgb565(s);
2125#else
2126 s = GGL_RGBA_TO_HOST( *src++ );
2127 d = convertAbgr8888ToRgb565(s);
2128
2129 s = GGL_RGBA_TO_HOST( *src++ );
2130 d |= convertAbgr8888ToRgb565(s) << 16;
2131#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002132 *dst32++ = d;
2133 ct -= 2;
2134 }
2135
2136 if (ct > 0) {
2137 goto last_one;
2138 }
2139}
2140
2141void scanline_t32cb16blend(context_t* c)
2142{
Duane Sand068f9f32012-05-24 22:09:24 -07002143#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__mips)))
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002144 int32_t x = c->iterators.xl;
2145 size_t ct = c->iterators.xr - x;
2146 int32_t y = c->iterators.y;
2147 surface_t* cb = &(c->state.buffers.color);
2148 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2149
2150 surface_t* tex = &(c->state.texture[0].surface);
2151 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2152 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2153 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2154
Duane Sand068f9f32012-05-24 22:09:24 -07002155#ifdef __arm__
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002156 scanline_t32cb16blend_arm(dst, src, ct);
2157#else
Duane Sand068f9f32012-05-24 22:09:24 -07002158 scanline_t32cb16blend_mips(dst, src, ct);
2159#endif
2160#else
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002161 dst_iterator16 di(c);
2162 horz_iterator32 hi(c);
2163 blender_32to16 bl(c);
2164 while (di.count--) {
2165 uint32_t s = hi.get_pixel32();
2166 bl.write(s, di.dst);
2167 di.dst++;
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002168 }
2169#endif
2170}
2171
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002172void scanline_t32cb16blend_srca(context_t* c)
2173{
2174 dst_iterator16 di(c);
2175 horz_iterator32 hi(c);
2176 blender_32to16_srcA blender(c);
2177
2178 while (di.count--) {
2179 uint32_t s = hi.get_pixel32();
2180 blender.write(s,di.dst);
2181 di.dst++;
2182 }
2183}
2184
2185void scanline_t16cb16blend_clamp_mod(context_t* c)
2186{
2187 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
2188 if (a == 0) {
2189 return;
2190 }
2191
2192 if (a == 255) {
2193 scanline_t16cb16_clamp(c);
2194 return;
2195 }
2196
2197 dst_iterator16 di(c);
2198 blender_16to16_modulate blender(c);
2199 clamp_iterator ci(c);
2200
2201 while (di.count--) {
2202 uint16_t s = ci.get_pixel16();
2203 blender.write(s, di.dst);
2204 di.dst++;
2205 }
2206}
2207
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002208void scanline_memcpy(context_t* c)
2209{
2210 int32_t x = c->iterators.xl;
2211 size_t ct = c->iterators.xr - x;
2212 int32_t y = c->iterators.y;
2213 surface_t* cb = &(c->state.buffers.color);
2214 const GGLFormat* fp = &(c->formats[cb->format]);
2215 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2216 (x + (cb->stride * y)) * fp->size;
2217
2218 surface_t* tex = &(c->state.texture[0].surface);
2219 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2220 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2221 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2222 (u + (tex->stride * v)) * fp->size;
2223
2224 const size_t size = ct * fp->size;
2225 memcpy(dst, src, size);
2226}
2227
2228void scanline_memset8(context_t* c)
2229{
2230 int32_t x = c->iterators.xl;
2231 size_t ct = c->iterators.xr - x;
2232 int32_t y = c->iterators.y;
2233 surface_t* cb = &(c->state.buffers.color);
2234 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y));
2235 uint32_t packed = c->packed;
2236 memset(dst, packed, ct);
2237}
2238
2239void scanline_memset16(context_t* c)
2240{
2241 int32_t x = c->iterators.xl;
2242 size_t ct = c->iterators.xr - x;
2243 int32_t y = c->iterators.y;
2244 surface_t* cb = &(c->state.buffers.color);
2245 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2246 uint32_t packed = c->packed;
2247 android_memset16(dst, packed, ct*2);
2248}
2249
2250void scanline_memset32(context_t* c)
2251{
2252 int32_t x = c->iterators.xl;
2253 size_t ct = c->iterators.xr - x;
2254 int32_t y = c->iterators.y;
2255 surface_t* cb = &(c->state.buffers.color);
2256 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y));
2257 uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
2258 android_memset32(dst, packed, ct*4);
2259}
2260
2261void scanline_clear(context_t* c)
2262{
2263 int32_t x = c->iterators.xl;
2264 size_t ct = c->iterators.xr - x;
2265 int32_t y = c->iterators.y;
2266 surface_t* cb = &(c->state.buffers.color);
2267 const GGLFormat* fp = &(c->formats[cb->format]);
2268 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2269 (x + (cb->stride * y)) * fp->size;
2270 const size_t size = ct * fp->size;
2271 memset(dst, 0, size);
2272}
2273
2274void scanline_set(context_t* c)
2275{
2276 int32_t x = c->iterators.xl;
2277 size_t ct = c->iterators.xr - x;
2278 int32_t y = c->iterators.y;
2279 surface_t* cb = &(c->state.buffers.color);
2280 const GGLFormat* fp = &(c->formats[cb->format]);
2281 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2282 (x + (cb->stride * y)) * fp->size;
2283 const size_t size = ct * fp->size;
2284 memset(dst, 0xFF, size);
2285}
2286
2287void scanline_noop(context_t* c)
2288{
2289}
2290
2291void rect_generic(context_t* c, size_t yc)
2292{
2293 do {
2294 c->scanline(c);
2295 c->step_y(c);
2296 } while (--yc);
2297}
2298
2299void rect_memcpy(context_t* c, size_t yc)
2300{
2301 int32_t x = c->iterators.xl;
2302 size_t ct = c->iterators.xr - x;
2303 int32_t y = c->iterators.y;
2304 surface_t* cb = &(c->state.buffers.color);
2305 const GGLFormat* fp = &(c->formats[cb->format]);
2306 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2307 (x + (cb->stride * y)) * fp->size;
2308
2309 surface_t* tex = &(c->state.texture[0].surface);
2310 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2311 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2312 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2313 (u + (tex->stride * v)) * fp->size;
2314
2315 if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
2316 memcpy(dst, src, ct * fp->size * yc);
2317 } else {
2318 const size_t size = ct * fp->size;
2319 const size_t dbpr = cb->stride * fp->size;
2320 const size_t sbpr = tex->stride * fp->size;
2321 do {
2322 memcpy(dst, src, size);
2323 dst += dbpr;
2324 src += sbpr;
2325 } while (--yc);
2326 }
2327}
2328// ----------------------------------------------------------------------------
2329}; // namespace android
2330