Sketching tiled background routines

pull/41/head
Luke Wren 2021-11-20 16:28:17 +00:00
rodzic 62a41787f8
commit a16ab9744c
4 zmienionych plików z 182 dodań i 21 usunięć

Wyświetl plik

@ -2,9 +2,11 @@ add_library(libsprite INTERFACE)
target_sources(libsprite INTERFACE
${CMAKE_CURRENT_LIST_DIR}/affine_transform.h
${CMAKE_CURRENT_LIST_DIR}/sprite_asm_const.h
${CMAKE_CURRENT_LIST_DIR}/sprite.S
${CMAKE_CURRENT_LIST_DIR}/sprite.c
${CMAKE_CURRENT_LIST_DIR}/sprite.h
${CMAKE_CURRENT_LIST_DIR}/tile.S
)
target_include_directories(libsprite INTERFACE ${CMAKE_CURRENT_LIST_DIR})

Wyświetl plik

@ -3,6 +3,8 @@
#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"
#include "sprite_asm_const.h"
#define POP2_OFFS (SIO_INTERP0_POP_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define CTRL0_OFFS (SIO_INTERP0_CTRL_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
#define INTERP1 (SIO_INTERP1_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
@ -11,15 +13,6 @@
.cpu cortex-m0plus
.thumb
// Put every function in its own ELF section, to permit linker GC
.macro decl_func name
.section .time_critical.\name, "ax"
.global \name
.type \name,%function
.thumb_func
\name:
.endm
// ----------------------------------------------------------------------------
// Colour fill
@ -248,10 +241,6 @@ decl_func sprite_blit8
bhi 1b
bx lr
// Assume RAGB2132 (so alpha is bit 5)
#define ALPHA_SHIFT_8BPP 6
.macro sprite_blit8_alpha_body n
ldrb r3, [r1, #\n]
lsrs r2, r3, #ALPHA_SHIFT_8BPP
@ -334,14 +323,6 @@ decl_func sprite_blit16
bhi 1b
bx lr
// Assume RGAB5515 (so alpha is bit 5)
// Note the alpha bit being in the same position as RAGB2132 is a coincidence.
// We are just stealing an LSB such that we can treat our alpha pixels in the
// same way as non-alpha pixels when encoding (and the co-opted channel LSB
// always ends up being set on alpha pixels, which is pretty harmless)
#define ALPHA_SHIFT_16BPP 6
.macro sprite_blit16_alpha_body n
ldrh r3, [r1, #2*\n]
lsrs r2, r3, #ALPHA_SHIFT_16BPP

Wyświetl plik

@ -0,0 +1,26 @@
#ifndef _SPRITE_ASM_CONST
#define _SPRITE_ASM_CONST
// Put every function in its own ELF section, to permit linker GC
.macro decl_func name
.section .time_critical.\name, "ax"
.global \name
.type \name,%function
.thumb_func
\name:
.endm
// Assume RGAB5515 (so alpha is bit 5)
// Note the alpha bit being in the same position as RAGB2132 is a coincidence.
// We are just stealing an LSB such that we can treat our alpha pixels in the
// same way as non-alpha pixels when encoding (and the co-opted channel LSB
// always ends up being set on alpha pixels, which is pretty harmless)
#define ALPHA_SHIFT_16BPP 6
// Assume RAGB2132 (so alpha is bit 5)
#define ALPHA_SHIFT_8BPP 6
#endif

Wyświetl plik

@ -0,0 +1,152 @@
#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"
#include "sprite_asm_const.h"
#define POP2_OFFS (SIO_INTERP0_POP_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET)
.syntax unified
.cpu cortex-m0plus
.thumb
// ----------------------------------------------------------------------------
// Tile layout
//
// Some terms:
// Tileset: 1D array of tile images, concatenated image-after-image
// Tilemap: 2D array of tileset indices
//
// Each tile image in a tileset is the same size. Tiles are square, either 8 x
// 8 px or 16 x 16 px. This makes it easy to find the start of a tile image
// given the tileset base pointer and a tile index (add + shift).
//
// Tilemaps are 8 bits per tile, always.
//
// One advantage of this layout is that y coordinates can be handled outside
// of the loops in this file, which are all scanline-oriented, by offsetting
// the tileset and tilemap pointers passed in. These routines only care about
// x. The tileset pointer is offset by y modulo tile height, and the tilemap
// pointer is offset by y divided by tile height, modulo tileset height in
// tiles.
// Tileset: 16px tiles, 16bpp, with 1-bit alpha.
// Tilemap: 8 bit indices.
.macro do_2px_16bpp_alpha rd rs rx dstoffs
lsrs \rx, \rs, #ALPHA_SHIFT_16BPP
bcc 1f
strh \rs, [\rd, #\dstoffs]
1:
lsrs \rx, \rs, #ALPHA_SHIFT_16BPP + 16
bcc 1f
lsrs \rs, #16
strh \rs, [\rd, #\dstoffs + 2]
1:
.endm
// interp0 has been set up to give the next x-ward pointer into the tileset
// with each pop. This saves us having to remember the tilemap pointer and
// tilemap x size mask in core registers.
// r0: dst
// r1: tileset
// r2: x0 (start pos in tile space)
// r3: x1 (end pos in tile space, exclusive)
decl_func tile_16bpp_16px_alpha_loop
push {r4-r7, lr}
mov r4, r8
mov r5, r9
push {r4, r5}
ldr r7, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
// The main loop only handles whole tiles, so we may need to first copy
// individual pixels to get tile-aligned. Skip this entirely if we are
// already aligned, to avoid the extra interp pop.
lsls r6, r2, #28
beq 3f
// Get pointer to tileset image
ldr r4, [r7, #POP2_OFFS]
ldrb r4, [r4]
lsls r4, #9
add r4, r1
// Offset tile image pointer to align with x0
lsls r5, r2, #28
lsrs r5, #27
add r4, r5
// Fall through into copy loop
1:
ldrh r5, [r4]
lsrs r6, r5, #ALPHA_SHIFT_16BPP
bcc 2f
strh r5, [r0]
2:
adds r4, #2
adds r0, #2
adds r2, #1
lsls r6, r2, #28
bne 1b
3:
// The next output pixel is aligned to the start of a tile. Set up main loop.
// Tileset pointer is only needed for an add operand:
mov r8, r1
// dst limit pointer at end of all pixels:
subs r3, r2
lsls r4, r2, #1
add r4, r0
mov r9, r4
// dst limit pointer at end of whole tiles:
lsrs r4, r2, #4
lsls r4, #5
add r4, r0
mov ip, r4
// r0 is dst, r7 is interp base, r1-r6 are free for loop.
b 3f
2:
// Get next tilemap pointer
ldr r1, [r7, #POP2_OFFS]
// Get tile image pointer
ldrb r1, [r1]
lsls r1, #9
add r1, r8
ldmia r1!, {r3-r6}
do_2px_16bpp_alpha r0 r3 r2 0
do_2px_16bpp_alpha r0 r4 r2 4
do_2px_16bpp_alpha r0 r5 r2 8
do_2px_16bpp_alpha r0 r6 r2 12
ldmia r1!, {r3-r6}
do_2px_16bpp_alpha r0 r3 r2 16
do_2px_16bpp_alpha r0 r4 r2 20
do_2px_16bpp_alpha r0 r5 r2 24
do_2px_16bpp_alpha r0 r6 r2 28
adds r0, 32
3:
cmp r0, ip
blo 2b
// Tidy up runt tile at end. Don't worry about extra interp pop.
ldr r4, [r7, #POP2_OFFS]
ldrb r4, [r4]
lsls r4, #9
add r4, r8
b 3f
1:
ldrh r5, [r4]
lsrs r6, r5, #ALPHA_SHIFT_16BPP
bcc 2f
strh r5, [r0]
2:
adds r4, #2
adds r0, #2
3:
cmp r0, r9
blo 1b
pop {r4, r5}
mov r8, r4
mov r9, r5
pop {r4-r7, pc}