blob: 0a46eaa3cc40f2037a377609ede94eade976b3fd [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
Mathias Agopian9857d992013-04-01 15:17:55 -070021#include "GGLAssembler.h"
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080022
Jean-Baptiste Queru62f4d862010-06-15 08:19:56 -070023#ifdef __ARM_ARCH__
Martyn Capewell4dc1fa82009-12-04 16:44:58 +000024#include <machine/cpu-features.h>
Jean-Baptiste Queru62f4d862010-06-15 08:19:56 -070025#endif
Martyn Capewell4dc1fa82009-12-04 16:44:58 +000026
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080027namespace android {
28
29// ----------------------------------------------------------------------------
30
31void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
32{
33 const int bits = addr.size;
34 const int inc = (flags & WRITE_BACK)?1:0;
35 switch (bits) {
36 case 32:
37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
38 else STR(AL, s.reg, addr.reg);
39 break;
40 case 24:
41 // 24 bits formats are a little special and used only for RGB
42 // 0x00BBGGRR is unpacked as R,G,B
43 STRB(AL, s.reg, addr.reg, immed12_pre(0));
44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
45 STRB(AL, s.reg, addr.reg, immed12_pre(1));
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
47 STRB(AL, s.reg, addr.reg, immed12_pre(2));
48 if (!(s.flags & CORRUPTIBLE)) {
49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
50 }
51 if (inc)
52 ADD(AL, 0, addr.reg, addr.reg, imm(3));
53 break;
54 case 16:
55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
56 else STRH(AL, s.reg, addr.reg);
57 break;
58 case 8:
59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
60 else STRB(AL, s.reg, addr.reg);
61 break;
62 }
63}
64
65void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
66{
67 Scratch scratches(registerFile());
68 int s0;
69
70 const int bits = addr.size;
71 const int inc = (flags & WRITE_BACK)?1:0;
72 switch (bits) {
73 case 32:
74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
75 else LDR(AL, s.reg, addr.reg);
76 break;
77 case 24:
78 // 24 bits formats are a little special and used only for RGB
79 // R,G,B is packed as 0x00BBGGRR
80 s0 = scratches.obtain();
81 if (s.reg != addr.reg) {
82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
87 } else {
88 int s1 = scratches.obtain();
89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
94 }
95 if (inc)
96 ADD(AL, 0, addr.reg, addr.reg, imm(3));
97 break;
98 case 16:
99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
100 else LDRH(AL, s.reg, addr.reg);
101 break;
102 case 8:
103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
104 else LDRB(AL, s.reg, addr.reg);
105 break;
106 }
107}
108
109void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
110{
111 const int maskLen = h-l;
112
Paul Lind2bc2b792012-02-01 10:54:19 -0800113#ifdef __mips__
114 assert(maskLen<=11);
115#else
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800116 assert(maskLen<=8);
Paul Lind2bc2b792012-02-01 10:54:19 -0800117#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800118 assert(h);
119
Martyn Capewell4dc1fa82009-12-04 16:44:58 +0000120#if __ARM_ARCH__ >= 7
121 const int mask = (1<<maskLen)-1;
122 if ((h == bits) && !l && (s != d.reg)) {
123 MOV(AL, 0, d.reg, s); // component = packed;
124 } else if ((h == bits) && l) {
125 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
126 } else if (!l && isValidImmediate(mask)) {
127 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
128 } else if (!l && isValidImmediate(~mask)) {
129 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
130 } else {
131 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l;
132 }
133#else
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800134 if (h != bits) {
135 const int mask = ((1<<maskLen)-1) << l;
136 if (isValidImmediate(mask)) {
137 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
138 } else if (isValidImmediate(~mask)) {
139 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
140 } else {
141 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
142 l += 32-h;
143 h = 32;
144 }
145 s = d.reg;
146 }
147
148 if (l) {
149 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
150 s = d.reg;
151 }
152
153 if (s != d.reg) {
154 MOV(AL, 0, d.reg, s);
155 }
Martyn Capewell4dc1fa82009-12-04 16:44:58 +0000156#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800157
158 d.s = maskLen;
159}
160
161void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
162{
163 extract(d, s.reg,
164 s.format.c[component].h,
165 s.format.c[component].l,
166 s.size());
167}
168
169void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
170{
171 integer_t r(d.reg, 32, d.flags);
172 extract(r, s.reg,
173 s.format.c[component].h,
174 s.format.c[component].l,
175 s.size());
176 d = component_t(r);
177}
178
179
180void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
181{
182 if (s.l || (s.flags & CLEAR_HI)) {
183 extract(d, s.reg, s.h, s.l, 32);
184 expand(d, d, dbits);
185 } else {
186 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
187 }
188}
189
190void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
191{
192 integer_t r(d.reg, 32, d.flags);
193 expand(r, s, dbits);
194 d = component_t(r);
195}
196
197void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
198{
199 assert(src.size());
200
201 int sbits = src.size();
202 int s = src.reg;
203 int d = dst.reg;
204
205 // be sure to set 'dst' after we read 'src' as they may be identical
206 dst.s = dbits;
207 dst.flags = 0;
208
209 if (dbits<=sbits) {
210 if (s != d) {
211 MOV(AL, 0, d, s);
212 }
213 return;
214 }
215
216 if (sbits == 1) {
217 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
218 // d = (s<<dbits) - s;
219 return;
220 }
221
222 if (dbits % sbits) {
223 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
224 // d = s << (dbits-sbits);
225 dbits -= sbits;
226 do {
227 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
228 // d |= d >> sbits;
229 dbits -= sbits;
230 sbits *= 2;
231 } while(dbits>0);
232 return;
233 }
234
235 dbits -= sbits;
236 do {
237 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
238 // d |= d<<sbits;
239 s = d;
240 dbits -= sbits;
241 if (sbits*2 < dbits) {
242 sbits *= 2;
243 }
244 } while(dbits>0);
245}
246
247void GGLAssembler::downshift(
248 pixel_t& d, int component, component_t s, const reg_t& dither)
249{
250 const needs_t& needs = mBuilderContext.needs;
251 Scratch scratches(registerFile());
252
253 int sh = s.h;
254 int sl = s.l;
255 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
256 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
257 int sbits = sh - sl;
258
259 int dh = d.format.c[component].h;
260 int dl = d.format.c[component].l;
261 int dbits = dh - dl;
262 int dithering = 0;
263
Steve Block01dda202012-01-06 14:13:42 +0000264 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800265
266 if (sbits>dbits) {
267 // see if we need to dither
268 dithering = mDithering;
269 }
270
271 int ireg = d.reg;
272 if (!(d.flags & FIRST)) {
273 if (s.flags & CORRUPTIBLE) {
274 ireg = s.reg;
275 } else {
276 ireg = scratches.obtain();
277 }
278 }
279 d.flags &= ~FIRST;
280
281 if (maskHiBits) {
282 // we need to mask the high bits (and possibly the lowbits too)
283 // and we might be able to use immediate mask.
284 if (!dithering) {
285 // we don't do this if we only have maskLoBits because we can
286 // do it more efficiently below (in the case where dl=0)
287 const int offset = sh - dbits;
288 if (dbits<=8 && offset >= 0) {
289 const uint32_t mask = ((1<<dbits)-1) << offset;
290 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
291 build_and_immediate(ireg, s.reg, mask, 32);
292 sl = offset;
293 s.reg = ireg;
294 sbits = dbits;
295 maskLoBits = maskHiBits = 0;
296 }
297 }
298 } else {
299 // in the dithering case though, we need to preserve the lower bits
300 const uint32_t mask = ((1<<sbits)-1) << sl;
301 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
302 build_and_immediate(ireg, s.reg, mask, 32);
303 s.reg = ireg;
304 maskLoBits = maskHiBits = 0;
305 }
306 }
307 }
308
309 // XXX: we could special case (maskHiBits & !maskLoBits)
310 // like we do for maskLoBits below, but it happens very rarely
311 // that we have maskHiBits only and the conditions necessary to lead
312 // to better code (like doing d |= s << 24)
313
314 if (maskHiBits) {
315 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
316 sl += 32-sh;
317 sh = 32;
318 s.reg = ireg;
319 maskHiBits = 0;
320 }
321
322 // Downsampling should be performed as follows:
323 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
324 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
325 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
326 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
327 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
328 //
329 // By approximating (1>>dbits) and (1>>sbits) to 0:
330 //
331 // V>>(sbits-dbits) - V>>sbits
332 //
333 // A good approximation is V>>(sbits-dbits),
334 // but better one (needed for dithering) is:
335 //
336 // (V>>(sbits-dbits)<<sbits - V)>>sbits
337 // (V<<dbits - V)>>sbits
338 // (V - V>>dbits)>>(sbits-dbits)
339
340 // Dithering is done here
341 if (dithering) {
342 comment("dithering");
343 if (sl) {
344 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
345 sh -= sl;
346 sl = 0;
347 s.reg = ireg;
348 }
349 // scaling (V-V>>dbits)
350 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
351 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
352 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
353 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
354 else ADD(AL, 0, ireg, ireg, dither.reg);
355 s.reg = ireg;
356 }
357
358 if ((maskLoBits|dithering) && (sh > dbits)) {
359 int shift = sh-dbits;
360 if (dl) {
361 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
362 if (ireg == d.reg) {
363 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
364 } else {
365 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
366 }
367 } else {
368 if (ireg == d.reg) {
369 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
370 } else {
371 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
372 }
373 }
374 } else {
375 int shift = sh-dh;
376 if (shift>0) {
377 if (ireg == d.reg) {
378 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
379 } else {
380 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
381 }
382 } else if (shift<0) {
383 if (ireg == d.reg) {
384 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
385 } else {
386 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
387 }
388 } else {
389 if (ireg == d.reg) {
390 if (s.reg != d.reg) {
391 MOV(AL, 0, d.reg, s.reg);
392 }
393 } else {
394 ORR(AL, 0, d.reg, d.reg, s.reg);
395 }
396 }
397 }
398}
399
400}; // namespace android