Faster sprite_blit16 routine, more sprites in sprite_bounce

pull/41/head
Luke Wren 2021-11-22 02:44:39 +00:00
rodzic bca9aa7906
commit 1c42799d43
2 zmienionych plików z 58 dodań i 40 usunięć

Wyświetl plik

@ -64,7 +64,7 @@
#error "Select a video mode!" #error "Select a video mode!"
#endif #endif
#define N_BERRIES 50 #define N_BERRIES 65
#define LED_PIN 21 #define LED_PIN 21
struct dvi_inst dvi0; struct dvi_inst dvi0;

Wyświetl plik

@ -197,15 +197,13 @@ decl_func sprite_fill16
// r2: pixel count // r2: pixel count
// //
// Unrolled loop body with an initial computed branch. Note we can go much // Unrolled loop body with an initial computed branch.
// faster if r0 and r1 are co-aligned, but it's not all that helpful to have a
// 1 in 4 chance of being really fast when minimising worst-case scanline time
decl_func sprite_blit8 decl_func sprite_blit8
mov ip, r0 mov ip, r0
lsrs r3, r2, #3 lsrs r3, r2, #3
lsls r3, #3 lsls r3, #3
eors r2, r3 // r2 = pixels % 8, r3 = pixels = pixels % 8 eors r2, r3 // r2 = pixels % 8, r3 = pixels - pixels % 8
add r0, r3 add r0, r3
add r1, r3 add r1, r3
@ -282,45 +280,65 @@ decl_func sprite_blit8_alpha
bx lr bx lr
.macro storew_alignh rd ra offs
strh \rd, [\ra, #\offs]
lsrs \rd, #16
strh \rd, [\ra, #\offs + 2]
.endm
decl_func sprite_blit16 decl_func sprite_blit16
mov ip, r0 // Force source pointer to be word-aligned
lsrs r3, r2, #3 lsrs r3, r1, #2
lsls r3, #3 bcc 1f
eors r2, r3 // r2 = pixels % 8, r3 = pixels = pixels % 8 ldrh r3, [r1]
strh r3, [r0]
lsls r3, #1 adds r0, #2
add r0, r3 adds r1, #2
add r1, r3 subs r2, #1
adr r3, 2f
lsls r2, #2
subs r3, r2
adds r3, #1 // thumb bit >:(
bx r3
.align 2
1: 1:
subs r0, #16 // Each loop is 8 pixels. Place limit pointer at 16 bytes before
subs r1, #16 // end, loop until past it. There will be 0 to 7 pixels remaining.
ldrh r3, [r1, #14] lsls r2, #1
strh r3, [r0, #14] adds r2, r0
ldrh r3, [r1, #12] subs r2, #16
strh r3, [r0, #12] mov ip, r2
ldrh r3, [r1, #10] b 2f
strh r3, [r0, #10] 1:
ldrh r3, [r1, #8] ldmia r1!, {r2, r3}
strh r3, [r0, #8] storew_alignh r2, r0, 0
ldrh r3, [r1, #6] storew_alignh r3, r0, 4
strh r3, [r0, #6] ldmia r1!, {r2, r3}
ldrh r3, [r1, #4] storew_alignh r2, r0, 8
strh r3, [r0, #4] storew_alignh r3, r0, 12
ldrh r3, [r1, #2] adds r0, #16
strh r3, [r0, #2]
ldrh r3, [r1, #0]
strh r3, [r0, #0]
2: 2:
cmp r0, ip cmp r0, ip
bhi 1b bls 1b
mov r2, ip
subs r2, r0
// At least 4 pixels?
lsls r2, #29
bcc 1f
ldmia r1!, {r3}
storew_alignh r3, r0, 0
ldmia r1!, {r3}
storew_alignh r3, r0, 4
adds r0, #8
1:
// At least 2 pixels?
lsls r2, #1
bcc 1f
ldmia r1!, {r3}
storew_alignh r3, r0, 0
adds r0, #4
1:
// One more pixel?
lsls r2, #1
bcc 1f
ldrh r3, [r1]
strh r3, [r0]
1:
bx lr bx lr
.macro sprite_blit16_alpha_body n .macro sprite_blit16_alpha_body n