The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 1 | /* libs/pixelflinger/codeflinger/load_store.cpp |
| 2 | ** |
| 3 | ** Copyright 2006, The Android Open Source Project |
| 4 | ** |
| 5 | ** Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | ** you may not use this file except in compliance with the License. |
| 7 | ** You may obtain a copy of the License at |
| 8 | ** |
| 9 | ** http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | ** |
| 11 | ** Unless required by applicable law or agreed to in writing, software |
| 12 | ** distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | ** See the License for the specific language governing permissions and |
| 15 | ** limitations under the License. |
| 16 | */ |
| 17 | |
| 18 | #include <assert.h> |
| 19 | #include <stdio.h> |
| 20 | #include <cutils/log.h> |
Mathias Agopian | 9857d99 | 2013-04-01 15:17:55 -0700 | [diff] [blame] | 21 | #include "GGLAssembler.h" |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 22 | |
Jean-Baptiste Queru | 62f4d86 | 2010-06-15 08:19:56 -0700 | [diff] [blame] | 23 | #ifdef __ARM_ARCH__ |
Martyn Capewell | 4dc1fa8 | 2009-12-04 16:44:58 +0000 | [diff] [blame] | 24 | #include <machine/cpu-features.h> |
Jean-Baptiste Queru | 62f4d86 | 2010-06-15 08:19:56 -0700 | [diff] [blame] | 25 | #endif |
Martyn Capewell | 4dc1fa8 | 2009-12-04 16:44:58 +0000 | [diff] [blame] | 26 | |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 27 | namespace android { |
| 28 | |
| 29 | // ---------------------------------------------------------------------------- |
| 30 | |
| 31 | void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) |
| 32 | { |
| 33 | const int bits = addr.size; |
| 34 | const int inc = (flags & WRITE_BACK)?1:0; |
| 35 | switch (bits) { |
| 36 | case 32: |
| 37 | if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); |
| 38 | else STR(AL, s.reg, addr.reg); |
| 39 | break; |
| 40 | case 24: |
| 41 | // 24 bits formats are a little special and used only for RGB |
| 42 | // 0x00BBGGRR is unpacked as R,G,B |
| 43 | STRB(AL, s.reg, addr.reg, immed12_pre(0)); |
| 44 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); |
| 45 | STRB(AL, s.reg, addr.reg, immed12_pre(1)); |
| 46 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); |
| 47 | STRB(AL, s.reg, addr.reg, immed12_pre(2)); |
| 48 | if (!(s.flags & CORRUPTIBLE)) { |
| 49 | MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); |
| 50 | } |
| 51 | if (inc) |
| 52 | ADD(AL, 0, addr.reg, addr.reg, imm(3)); |
| 53 | break; |
| 54 | case 16: |
| 55 | if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); |
| 56 | else STRH(AL, s.reg, addr.reg); |
| 57 | break; |
| 58 | case 8: |
| 59 | if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); |
| 60 | else STRB(AL, s.reg, addr.reg); |
| 61 | break; |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) |
| 66 | { |
| 67 | Scratch scratches(registerFile()); |
| 68 | int s0; |
| 69 | |
| 70 | const int bits = addr.size; |
| 71 | const int inc = (flags & WRITE_BACK)?1:0; |
| 72 | switch (bits) { |
| 73 | case 32: |
| 74 | if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); |
| 75 | else LDR(AL, s.reg, addr.reg); |
| 76 | break; |
| 77 | case 24: |
| 78 | // 24 bits formats are a little special and used only for RGB |
| 79 | // R,G,B is packed as 0x00BBGGRR |
| 80 | s0 = scratches.obtain(); |
| 81 | if (s.reg != addr.reg) { |
| 82 | LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R |
| 83 | LDRB(AL, s0, addr.reg, immed12_pre(1)); // G |
| 84 | ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); |
| 85 | LDRB(AL, s0, addr.reg, immed12_pre(2)); // B |
| 86 | ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); |
| 87 | } else { |
| 88 | int s1 = scratches.obtain(); |
| 89 | LDRB(AL, s1, addr.reg, immed12_pre(0)); // R |
| 90 | LDRB(AL, s0, addr.reg, immed12_pre(1)); // G |
| 91 | ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); |
| 92 | LDRB(AL, s0, addr.reg, immed12_pre(2)); // B |
| 93 | ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); |
| 94 | } |
| 95 | if (inc) |
| 96 | ADD(AL, 0, addr.reg, addr.reg, imm(3)); |
| 97 | break; |
| 98 | case 16: |
| 99 | if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); |
| 100 | else LDRH(AL, s.reg, addr.reg); |
| 101 | break; |
| 102 | case 8: |
| 103 | if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); |
| 104 | else LDRB(AL, s.reg, addr.reg); |
| 105 | break; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) |
| 110 | { |
| 111 | const int maskLen = h-l; |
| 112 | |
Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 113 | #ifdef __mips__ |
| 114 | assert(maskLen<=11); |
| 115 | #else |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 116 | assert(maskLen<=8); |
Paul Lind | 2bc2b79 | 2012-02-01 10:54:19 -0800 | [diff] [blame] | 117 | #endif |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 118 | assert(h); |
| 119 | |
Martyn Capewell | 4dc1fa8 | 2009-12-04 16:44:58 +0000 | [diff] [blame] | 120 | #if __ARM_ARCH__ >= 7 |
| 121 | const int mask = (1<<maskLen)-1; |
| 122 | if ((h == bits) && !l && (s != d.reg)) { |
| 123 | MOV(AL, 0, d.reg, s); // component = packed; |
| 124 | } else if ((h == bits) && l) { |
| 125 | MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; |
| 126 | } else if (!l && isValidImmediate(mask)) { |
| 127 | AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; |
| 128 | } else if (!l && isValidImmediate(~mask)) { |
| 129 | BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; |
| 130 | } else { |
| 131 | UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l; |
| 132 | } |
| 133 | #else |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 134 | if (h != bits) { |
| 135 | const int mask = ((1<<maskLen)-1) << l; |
| 136 | if (isValidImmediate(mask)) { |
| 137 | AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; |
| 138 | } else if (isValidImmediate(~mask)) { |
| 139 | BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; |
| 140 | } else { |
| 141 | MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); |
| 142 | l += 32-h; |
| 143 | h = 32; |
| 144 | } |
| 145 | s = d.reg; |
| 146 | } |
| 147 | |
| 148 | if (l) { |
| 149 | MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; |
| 150 | s = d.reg; |
| 151 | } |
| 152 | |
| 153 | if (s != d.reg) { |
| 154 | MOV(AL, 0, d.reg, s); |
| 155 | } |
Martyn Capewell | 4dc1fa8 | 2009-12-04 16:44:58 +0000 | [diff] [blame] | 156 | #endif |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 157 | |
| 158 | d.s = maskLen; |
| 159 | } |
| 160 | |
| 161 | void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) |
| 162 | { |
| 163 | extract(d, s.reg, |
| 164 | s.format.c[component].h, |
| 165 | s.format.c[component].l, |
| 166 | s.size()); |
| 167 | } |
| 168 | |
| 169 | void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) |
| 170 | { |
| 171 | integer_t r(d.reg, 32, d.flags); |
| 172 | extract(r, s.reg, |
| 173 | s.format.c[component].h, |
| 174 | s.format.c[component].l, |
| 175 | s.size()); |
| 176 | d = component_t(r); |
| 177 | } |
| 178 | |
| 179 | |
| 180 | void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) |
| 181 | { |
| 182 | if (s.l || (s.flags & CLEAR_HI)) { |
| 183 | extract(d, s.reg, s.h, s.l, 32); |
| 184 | expand(d, d, dbits); |
| 185 | } else { |
| 186 | expand(d, integer_t(s.reg, s.size(), s.flags), dbits); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) |
| 191 | { |
| 192 | integer_t r(d.reg, 32, d.flags); |
| 193 | expand(r, s, dbits); |
| 194 | d = component_t(r); |
| 195 | } |
| 196 | |
| 197 | void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) |
| 198 | { |
| 199 | assert(src.size()); |
| 200 | |
| 201 | int sbits = src.size(); |
| 202 | int s = src.reg; |
| 203 | int d = dst.reg; |
| 204 | |
| 205 | // be sure to set 'dst' after we read 'src' as they may be identical |
| 206 | dst.s = dbits; |
| 207 | dst.flags = 0; |
| 208 | |
| 209 | if (dbits<=sbits) { |
| 210 | if (s != d) { |
| 211 | MOV(AL, 0, d, s); |
| 212 | } |
| 213 | return; |
| 214 | } |
| 215 | |
| 216 | if (sbits == 1) { |
| 217 | RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); |
| 218 | // d = (s<<dbits) - s; |
| 219 | return; |
| 220 | } |
| 221 | |
| 222 | if (dbits % sbits) { |
| 223 | MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); |
| 224 | // d = s << (dbits-sbits); |
| 225 | dbits -= sbits; |
| 226 | do { |
| 227 | ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); |
| 228 | // d |= d >> sbits; |
| 229 | dbits -= sbits; |
| 230 | sbits *= 2; |
| 231 | } while(dbits>0); |
| 232 | return; |
| 233 | } |
| 234 | |
| 235 | dbits -= sbits; |
| 236 | do { |
| 237 | ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); |
| 238 | // d |= d<<sbits; |
| 239 | s = d; |
| 240 | dbits -= sbits; |
| 241 | if (sbits*2 < dbits) { |
| 242 | sbits *= 2; |
| 243 | } |
| 244 | } while(dbits>0); |
| 245 | } |
| 246 | |
| 247 | void GGLAssembler::downshift( |
| 248 | pixel_t& d, int component, component_t s, const reg_t& dither) |
| 249 | { |
| 250 | const needs_t& needs = mBuilderContext.needs; |
| 251 | Scratch scratches(registerFile()); |
| 252 | |
| 253 | int sh = s.h; |
| 254 | int sl = s.l; |
| 255 | int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; |
| 256 | int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; |
| 257 | int sbits = sh - sl; |
| 258 | |
| 259 | int dh = d.format.c[component].h; |
| 260 | int dl = d.format.c[component].l; |
| 261 | int dbits = dh - dl; |
| 262 | int dithering = 0; |
| 263 | |
Steve Block | 01dda20 | 2012-01-06 14:13:42 +0000 | [diff] [blame] | 264 | ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); |
The Android Open Source Project | dd7bc33 | 2009-03-03 19:32:55 -0800 | [diff] [blame] | 265 | |
| 266 | if (sbits>dbits) { |
| 267 | // see if we need to dither |
| 268 | dithering = mDithering; |
| 269 | } |
| 270 | |
| 271 | int ireg = d.reg; |
| 272 | if (!(d.flags & FIRST)) { |
| 273 | if (s.flags & CORRUPTIBLE) { |
| 274 | ireg = s.reg; |
| 275 | } else { |
| 276 | ireg = scratches.obtain(); |
| 277 | } |
| 278 | } |
| 279 | d.flags &= ~FIRST; |
| 280 | |
| 281 | if (maskHiBits) { |
| 282 | // we need to mask the high bits (and possibly the lowbits too) |
| 283 | // and we might be able to use immediate mask. |
| 284 | if (!dithering) { |
| 285 | // we don't do this if we only have maskLoBits because we can |
| 286 | // do it more efficiently below (in the case where dl=0) |
| 287 | const int offset = sh - dbits; |
| 288 | if (dbits<=8 && offset >= 0) { |
| 289 | const uint32_t mask = ((1<<dbits)-1) << offset; |
| 290 | if (isValidImmediate(mask) || isValidImmediate(~mask)) { |
| 291 | build_and_immediate(ireg, s.reg, mask, 32); |
| 292 | sl = offset; |
| 293 | s.reg = ireg; |
| 294 | sbits = dbits; |
| 295 | maskLoBits = maskHiBits = 0; |
| 296 | } |
| 297 | } |
| 298 | } else { |
| 299 | // in the dithering case though, we need to preserve the lower bits |
| 300 | const uint32_t mask = ((1<<sbits)-1) << sl; |
| 301 | if (isValidImmediate(mask) || isValidImmediate(~mask)) { |
| 302 | build_and_immediate(ireg, s.reg, mask, 32); |
| 303 | s.reg = ireg; |
| 304 | maskLoBits = maskHiBits = 0; |
| 305 | } |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | // XXX: we could special case (maskHiBits & !maskLoBits) |
| 310 | // like we do for maskLoBits below, but it happens very rarely |
| 311 | // that we have maskHiBits only and the conditions necessary to lead |
| 312 | // to better code (like doing d |= s << 24) |
| 313 | |
| 314 | if (maskHiBits) { |
| 315 | MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); |
| 316 | sl += 32-sh; |
| 317 | sh = 32; |
| 318 | s.reg = ireg; |
| 319 | maskHiBits = 0; |
| 320 | } |
| 321 | |
| 322 | // Downsampling should be performed as follows: |
| 323 | // V * ((1<<dbits)-1) / ((1<<sbits)-1) |
| 324 | // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] |
| 325 | // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] |
| 326 | // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits |
| 327 | // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) |
| 328 | // |
| 329 | // By approximating (1>>dbits) and (1>>sbits) to 0: |
| 330 | // |
| 331 | // V>>(sbits-dbits) - V>>sbits |
| 332 | // |
| 333 | // A good approximation is V>>(sbits-dbits), |
| 334 | // but better one (needed for dithering) is: |
| 335 | // |
| 336 | // (V>>(sbits-dbits)<<sbits - V)>>sbits |
| 337 | // (V<<dbits - V)>>sbits |
| 338 | // (V - V>>dbits)>>(sbits-dbits) |
| 339 | |
| 340 | // Dithering is done here |
| 341 | if (dithering) { |
| 342 | comment("dithering"); |
| 343 | if (sl) { |
| 344 | MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); |
| 345 | sh -= sl; |
| 346 | sl = 0; |
| 347 | s.reg = ireg; |
| 348 | } |
| 349 | // scaling (V-V>>dbits) |
| 350 | SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); |
| 351 | const int shift = (GGL_DITHER_BITS - (sbits-dbits)); |
| 352 | if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); |
| 353 | else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); |
| 354 | else ADD(AL, 0, ireg, ireg, dither.reg); |
| 355 | s.reg = ireg; |
| 356 | } |
| 357 | |
| 358 | if ((maskLoBits|dithering) && (sh > dbits)) { |
| 359 | int shift = sh-dbits; |
| 360 | if (dl) { |
| 361 | MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); |
| 362 | if (ireg == d.reg) { |
| 363 | MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); |
| 364 | } else { |
| 365 | ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); |
| 366 | } |
| 367 | } else { |
| 368 | if (ireg == d.reg) { |
| 369 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); |
| 370 | } else { |
| 371 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); |
| 372 | } |
| 373 | } |
| 374 | } else { |
| 375 | int shift = sh-dh; |
| 376 | if (shift>0) { |
| 377 | if (ireg == d.reg) { |
| 378 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); |
| 379 | } else { |
| 380 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); |
| 381 | } |
| 382 | } else if (shift<0) { |
| 383 | if (ireg == d.reg) { |
| 384 | MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); |
| 385 | } else { |
| 386 | ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); |
| 387 | } |
| 388 | } else { |
| 389 | if (ireg == d.reg) { |
| 390 | if (s.reg != d.reg) { |
| 391 | MOV(AL, 0, d.reg, s.reg); |
| 392 | } |
| 393 | } else { |
| 394 | ORR(AL, 0, d.reg, d.reg, s.reg); |
| 395 | } |
| 396 | } |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | }; // namespace android |