blob: ed20a00117eb46be7788bd46cef83137db522971 [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080021#include "codeflinger/GGLAssembler.h"
22
Jean-Baptiste Queru62f4d862010-06-15 08:19:56 -070023#ifdef __ARM_ARCH__
Martyn Capewell4dc1fa82009-12-04 16:44:58 +000024#include <machine/cpu-features.h>
Jean-Baptiste Queru62f4d862010-06-15 08:19:56 -070025#endif
Martyn Capewell4dc1fa82009-12-04 16:44:58 +000026
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080027namespace android {
28
29// ----------------------------------------------------------------------------
30
31void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32{
33 const int bits = addr.size;
34 const int inc = (flags & WRITE_BACK)?1:0;
35 switch (bits) {
36 case 32:
37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
38 else STR(AL, s.reg, addr.reg);
39 break;
40 case 24:
41 // 24 bits formats are a little special and used only for RGB
42 // 0x00BBGGRR is unpacked as R,G,B
43 STRB(AL, s.reg, addr.reg, immed12_pre(0));
44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45 STRB(AL, s.reg, addr.reg, immed12_pre(1));
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47 STRB(AL, s.reg, addr.reg, immed12_pre(2));
48 if (!(s.flags & CORRUPTIBLE)) {
49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50 }
51 if (inc)
52 ADD(AL, 0, addr.reg, addr.reg, imm(3));
53 break;
54 case 16:
55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
56 else STRH(AL, s.reg, addr.reg);
57 break;
58 case 8:
59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
60 else STRB(AL, s.reg, addr.reg);
61 break;
62 }
63}
64
65void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66{
67 Scratch scratches(registerFile());
68 int s0;
69
70 const int bits = addr.size;
71 const int inc = (flags & WRITE_BACK)?1:0;
72 switch (bits) {
73 case 32:
74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
75 else LDR(AL, s.reg, addr.reg);
76 break;
77 case 24:
78 // 24 bits formats are a little special and used only for RGB
79 // R,G,B is packed as 0x00BBGGRR
80 s0 = scratches.obtain();
81 if (s.reg != addr.reg) {
82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87 } else {
88 int s1 = scratches.obtain();
89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94 }
95 if (inc)
96 ADD(AL, 0, addr.reg, addr.reg, imm(3));
97 break;
98 case 16:
99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
100 else LDRH(AL, s.reg, addr.reg);
101 break;
102 case 8:
103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
104 else LDRB(AL, s.reg, addr.reg);
105 break;
106 }
107}
108
109void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110{
111 const int maskLen = h-l;
112
113 assert(maskLen<=8);
114 assert(h);
115
Martyn Capewell4dc1fa82009-12-04 16:44:58 +0000116#if __ARM_ARCH__ >= 7
117 const int mask = (1<<maskLen)-1;
118 if ((h == bits) && !l && (s != d.reg)) {
119 MOV(AL, 0, d.reg, s); // component = packed;
120 } else if ((h == bits) && l) {
121 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
122 } else if (!l && isValidImmediate(mask)) {
123 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
124 } else if (!l && isValidImmediate(~mask)) {
125 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
126 } else {
127 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
128 }
129#else
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800130 if (h != bits) {
131 const int mask = ((1<<maskLen)-1) << l;
132 if (isValidImmediate(mask)) {
133 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
134 } else if (isValidImmediate(~mask)) {
135 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
136 } else {
137 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
138 l += 32-h;
139 h = 32;
140 }
141 s = d.reg;
142 }
143
144 if (l) {
145 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
146 s = d.reg;
147 }
148
149 if (s != d.reg) {
150 MOV(AL, 0, d.reg, s);
151 }
Martyn Capewell4dc1fa82009-12-04 16:44:58 +0000152#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800153
154 d.s = maskLen;
155}
156
157void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
158{
159 extract(d, s.reg,
160 s.format.c[component].h,
161 s.format.c[component].l,
162 s.size());
163}
164
165void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
166{
167 integer_t r(d.reg, 32, d.flags);
168 extract(r, s.reg,
169 s.format.c[component].h,
170 s.format.c[component].l,
171 s.size());
172 d = component_t(r);
173}
174
175
176void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
177{
178 if (s.l || (s.flags & CLEAR_HI)) {
179 extract(d, s.reg, s.h, s.l, 32);
180 expand(d, d, dbits);
181 } else {
182 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
183 }
184}
185
186void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
187{
188 integer_t r(d.reg, 32, d.flags);
189 expand(r, s, dbits);
190 d = component_t(r);
191}
192
193void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
194{
195 assert(src.size());
196
197 int sbits = src.size();
198 int s = src.reg;
199 int d = dst.reg;
200
201 // be sure to set 'dst' after we read 'src' as they may be identical
202 dst.s = dbits;
203 dst.flags = 0;
204
205 if (dbits<=sbits) {
206 if (s != d) {
207 MOV(AL, 0, d, s);
208 }
209 return;
210 }
211
212 if (sbits == 1) {
213 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
214 // d = (s<<dbits) - s;
215 return;
216 }
217
218 if (dbits % sbits) {
219 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
220 // d = s << (dbits-sbits);
221 dbits -= sbits;
222 do {
223 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
224 // d |= d >> sbits;
225 dbits -= sbits;
226 sbits *= 2;
227 } while(dbits>0);
228 return;
229 }
230
231 dbits -= sbits;
232 do {
233 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
234 // d |= d<<sbits;
235 s = d;
236 dbits -= sbits;
237 if (sbits*2 < dbits) {
238 sbits *= 2;
239 }
240 } while(dbits>0);
241}
242
243void GGLAssembler::downshift(
244 pixel_t& d, int component, component_t s, const reg_t& dither)
245{
246 const needs_t& needs = mBuilderContext.needs;
247 Scratch scratches(registerFile());
248
249 int sh = s.h;
250 int sl = s.l;
251 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
252 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
253 int sbits = sh - sl;
254
255 int dh = d.format.c[component].h;
256 int dl = d.format.c[component].l;
257 int dbits = dh - dl;
258 int dithering = 0;
259
260 LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
261
262 if (sbits>dbits) {
263 // see if we need to dither
264 dithering = mDithering;
265 }
266
267 int ireg = d.reg;
268 if (!(d.flags & FIRST)) {
269 if (s.flags & CORRUPTIBLE) {
270 ireg = s.reg;
271 } else {
272 ireg = scratches.obtain();
273 }
274 }
275 d.flags &= ~FIRST;
276
277 if (maskHiBits) {
278 // we need to mask the high bits (and possibly the lowbits too)
279 // and we might be able to use immediate mask.
280 if (!dithering) {
281 // we don't do this if we only have maskLoBits because we can
282 // do it more efficiently below (in the case where dl=0)
283 const int offset = sh - dbits;
284 if (dbits<=8 && offset >= 0) {
285 const uint32_t mask = ((1<<dbits)-1) << offset;
286 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
287 build_and_immediate(ireg, s.reg, mask, 32);
288 sl = offset;
289 s.reg = ireg;
290 sbits = dbits;
291 maskLoBits = maskHiBits = 0;
292 }
293 }
294 } else {
295 // in the dithering case though, we need to preserve the lower bits
296 const uint32_t mask = ((1<<sbits)-1) << sl;
297 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
298 build_and_immediate(ireg, s.reg, mask, 32);
299 s.reg = ireg;
300 maskLoBits = maskHiBits = 0;
301 }
302 }
303 }
304
305 // XXX: we could special case (maskHiBits & !maskLoBits)
306 // like we do for maskLoBits below, but it happens very rarely
307 // that we have maskHiBits only and the conditions necessary to lead
308 // to better code (like doing d |= s << 24)
309
310 if (maskHiBits) {
311 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
312 sl += 32-sh;
313 sh = 32;
314 s.reg = ireg;
315 maskHiBits = 0;
316 }
317
318 // Downsampling should be performed as follows:
319 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
320 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
321 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
322 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
323 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
324 //
325 // By approximating (1>>dbits) and (1>>sbits) to 0:
326 //
327 // V>>(sbits-dbits) - V>>sbits
328 //
329 // A good approximation is V>>(sbits-dbits),
330 // but better one (needed for dithering) is:
331 //
332 // (V>>(sbits-dbits)<<sbits - V)>>sbits
333 // (V<<dbits - V)>>sbits
334 // (V - V>>dbits)>>(sbits-dbits)
335
336 // Dithering is done here
337 if (dithering) {
338 comment("dithering");
339 if (sl) {
340 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
341 sh -= sl;
342 sl = 0;
343 s.reg = ireg;
344 }
345 // scaling (V-V>>dbits)
346 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
347 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
348 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
349 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
350 else ADD(AL, 0, ireg, ireg, dither.reg);
351 s.reg = ireg;
352 }
353
354 if ((maskLoBits|dithering) && (sh > dbits)) {
355 int shift = sh-dbits;
356 if (dl) {
357 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
358 if (ireg == d.reg) {
359 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
360 } else {
361 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
362 }
363 } else {
364 if (ireg == d.reg) {
365 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
366 } else {
367 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
368 }
369 }
370 } else {
371 int shift = sh-dh;
372 if (shift>0) {
373 if (ireg == d.reg) {
374 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
375 } else {
376 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
377 }
378 } else if (shift<0) {
379 if (ireg == d.reg) {
380 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
381 } else {
382 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
383 }
384 } else {
385 if (ireg == d.reg) {
386 if (s.reg != d.reg) {
387 MOV(AL, 0, d.reg, s.reg);
388 }
389 } else {
390 ORR(AL, 0, d.reg, d.reg, s.reg);
391 }
392 }
393 }
394}
395
396}; // namespace android