blob: 93c5825797275eafb5cff63eadc0b1bd5f35dac5 [file] [log] [blame]
The Android Open Source Project4f6e8d72008-10-21 07:00:00 -07001/* libs/pixelflinger/codeflinger/load_store.cpp
2**
3** Copyright 2006, The Android Open Source Project
4**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18#include <assert.h>
19#include <stdio.h>
20#include <cutils/log.h>
21
22#include "codeflinger/GGLAssembler.h"
23
24namespace android {
25
26// ----------------------------------------------------------------------------
27
28void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
29{
30 const int bits = addr.size;
31 const int inc = (flags & WRITE_BACK)?1:0;
32 switch (bits) {
33 case 32:
34 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
35 else STR(AL, s.reg, addr.reg);
36 break;
37 case 24:
38 // 24 bits formats are a little special and used only for RGB
39 // 0x00BBGGRR is unpacked as R,G,B
40 STRB(AL, s.reg, addr.reg, immed12_pre(0));
41 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
42 STRB(AL, s.reg, addr.reg, immed12_pre(1));
43 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
44 STRB(AL, s.reg, addr.reg, immed12_pre(2));
45 if (!(s.flags & CORRUPTIBLE)) {
46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
47 }
48 if (inc)
49 ADD(AL, 0, addr.reg, addr.reg, imm(3));
50 break;
51 case 16:
52 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
53 else STRH(AL, s.reg, addr.reg);
54 break;
55 case 8:
56 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
57 else STRB(AL, s.reg, addr.reg);
58 break;
59 }
60}
61
62void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
63{
64 Scratch scratches(registerFile());
65 int s0;
66
67 const int bits = addr.size;
68 const int inc = (flags & WRITE_BACK)?1:0;
69 switch (bits) {
70 case 32:
71 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
72 else LDR(AL, s.reg, addr.reg);
73 break;
74 case 24:
75 // 24 bits formats are a little special and used only for RGB
76 // R,G,B is packed as 0x00BBGGRR
77 s0 = scratches.obtain();
78 if (s.reg != addr.reg) {
79 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
80 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
81 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
82 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
83 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
84 } else {
85 int s1 = scratches.obtain();
86 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
87 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
88 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
89 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
90 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
91 }
92 if (inc)
93 ADD(AL, 0, addr.reg, addr.reg, imm(3));
94 break;
95 case 16:
96 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
97 else LDRH(AL, s.reg, addr.reg);
98 break;
99 case 8:
100 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
101 else LDRB(AL, s.reg, addr.reg);
102 break;
103 }
104}
105
106void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
107{
108 const int maskLen = h-l;
109
110 assert(maskLen<=8);
111 assert(h);
112
113 if (h != bits) {
114 const int mask = ((1<<maskLen)-1) << l;
115 if (isValidImmediate(mask)) {
116 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
117 } else if (isValidImmediate(~mask)) {
118 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
119 } else {
120 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
121 l += 32-h;
122 h = 32;
123 }
124 s = d.reg;
125 }
126
127 if (l) {
128 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
129 s = d.reg;
130 }
131
132 if (s != d.reg) {
133 MOV(AL, 0, d.reg, s);
134 }
135
136 d.s = maskLen;
137}
138
139void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
140{
141 extract(d, s.reg,
142 s.format.c[component].h,
143 s.format.c[component].l,
144 s.size());
145}
146
147void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
148{
149 integer_t r(d.reg, 32, d.flags);
150 extract(r, s.reg,
151 s.format.c[component].h,
152 s.format.c[component].l,
153 s.size());
154 d = component_t(r);
155}
156
157
158void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
159{
160 if (s.l || (s.flags & CLEAR_HI)) {
161 extract(d, s.reg, s.h, s.l, 32);
162 expand(d, d, dbits);
163 } else {
164 expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
165 }
166}
167
168void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
169{
170 integer_t r(d.reg, 32, d.flags);
The Android Open Source Project35237d12008-12-17 18:08:08 -0800171 expand(r, s, dbits);
The Android Open Source Project4f6e8d72008-10-21 07:00:00 -0700172 d = component_t(r);
173}
174
175void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
176{
177 assert(src.size());
178
179 int sbits = src.size();
180 int s = src.reg;
181 int d = dst.reg;
182
183 // be sure to set 'dst' after we read 'src' as they may be identical
184 dst.s = dbits;
185 dst.flags = 0;
186
187 if (dbits<=sbits) {
188 if (s != d) {
189 MOV(AL, 0, d, s);
190 }
191 return;
192 }
193
194 if (sbits == 1) {
195 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
196 // d = (s<<dbits) - s;
197 return;
198 }
199
200 if (dbits % sbits) {
201 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
202 // d = s << (dbits-sbits);
203 dbits -= sbits;
204 do {
205 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
206 // d |= d >> sbits;
207 dbits -= sbits;
208 sbits *= 2;
209 } while(dbits>0);
210 return;
211 }
212
213 dbits -= sbits;
214 do {
215 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
216 // d |= d<<sbits;
217 s = d;
218 dbits -= sbits;
219 if (sbits*2 < dbits) {
220 sbits *= 2;
221 }
222 } while(dbits>0);
223}
224
225void GGLAssembler::downshift(
226 pixel_t& d, int component, component_t s, const reg_t& dither)
227{
228 const needs_t& needs = mBuilderContext.needs;
229 Scratch scratches(registerFile());
230
231 int sh = s.h;
232 int sl = s.l;
233 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
234 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
235 int sbits = sh - sl;
236
237 int dh = d.format.c[component].h;
238 int dl = d.format.c[component].l;
239 int dbits = dh - dl;
240 int dithering = 0;
241
242 LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
243
244 if (sbits>dbits) {
245 // see if we need to dither
246 dithering = mDithering;
247 }
248
249 int ireg = d.reg;
250 if (!(d.flags & FIRST)) {
251 if (s.flags & CORRUPTIBLE) {
252 ireg = s.reg;
253 } else {
254 ireg = scratches.obtain();
255 }
256 }
257 d.flags &= ~FIRST;
258
259 if (maskHiBits) {
260 // we need to mask the high bits (and possibly the lowbits too)
261 // and we might be able to use immediate mask.
262 if (!dithering) {
263 // we don't do this if we only have maskLoBits because we can
264 // do it more efficiently below (in the case where dl=0)
265 const int offset = sh - dbits;
266 if (dbits<=8 && offset >= 0) {
267 const uint32_t mask = ((1<<dbits)-1) << offset;
268 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
269 build_and_immediate(ireg, s.reg, mask, 32);
270 sl = offset;
271 s.reg = ireg;
272 sbits = dbits;
273 maskLoBits = maskHiBits = 0;
274 }
275 }
276 } else {
277 // in the dithering case though, we need to preserve the lower bits
278 const uint32_t mask = ((1<<sbits)-1) << sl;
279 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
280 build_and_immediate(ireg, s.reg, mask, 32);
281 s.reg = ireg;
282 maskLoBits = maskHiBits = 0;
283 }
284 }
285 }
286
287 // XXX: we could special case (maskHiBits & !maskLoBits)
288 // like we do for maskLoBits below, but it happens very rarely
289 // that we have maskHiBits only and the conditions necessary to lead
290 // to better code (like doing d |= s << 24)
291
292 if (maskHiBits) {
293 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
294 sl += 32-sh;
295 sh = 32;
296 s.reg = ireg;
297 maskHiBits = 0;
298 }
299
300 // Downsampling should be performed as follows:
301 // V * ((1<<dbits)-1) / ((1<<sbits)-1)
302 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
303 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
304 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
305 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
306 //
307 // By approximating (1>>dbits) and (1>>sbits) to 0:
308 //
309 // V>>(sbits-dbits) - V>>sbits
310 //
311 // A good approximation is V>>(sbits-dbits),
312 // but better one (needed for dithering) is:
313 //
314 // (V>>(sbits-dbits)<<sbits - V)>>sbits
315 // (V<<dbits - V)>>sbits
316 // (V - V>>dbits)>>(sbits-dbits)
317
318 // Dithering is done here
319 if (dithering) {
320 comment("dithering");
321 if (sl) {
322 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
323 sh -= sl;
324 sl = 0;
325 s.reg = ireg;
326 }
327 // scaling (V-V>>dbits)
328 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
329 const int shift = (GGL_DITHER_BITS - (sbits-dbits));
330 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
331 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
332 else ADD(AL, 0, ireg, ireg, dither.reg);
333 s.reg = ireg;
334 }
335
336 if ((maskLoBits|dithering) && (sh > dbits)) {
337 int shift = sh-dbits;
338 if (dl) {
339 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
340 if (ireg == d.reg) {
341 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
342 } else {
343 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
344 }
345 } else {
346 if (ireg == d.reg) {
347 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
348 } else {
349 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
350 }
351 }
352 } else {
353 int shift = sh-dh;
354 if (shift>0) {
355 if (ireg == d.reg) {
356 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
357 } else {
358 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
359 }
360 } else if (shift<0) {
361 if (ireg == d.reg) {
362 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
363 } else {
364 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
365 }
366 } else {
367 if (ireg == d.reg) {
368 if (s.reg != d.reg) {
369 MOV(AL, 0, d.reg, s.reg);
370 }
371 } else {
372 ORR(AL, 0, d.reg, d.reg, s.reg);
373 }
374 }
375 }
376}
377
378}; // namespace android