From a16ab9744ca4f7f7375d3c74f1397311ef372785 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sat, 20 Nov 2021 16:28:17 +0000 Subject: [PATCH] Sketching tiled background routines --- software/libsprite/CMakeLists.txt | 2 + software/libsprite/sprite.S | 23 +--- software/libsprite/sprite_asm_const.h | 26 +++++ software/libsprite/tile.S | 152 ++++++++++++++++++++++++++ 4 files changed, 182 insertions(+), 21 deletions(-) create mode 100644 software/libsprite/sprite_asm_const.h create mode 100644 software/libsprite/tile.S diff --git a/software/libsprite/CMakeLists.txt b/software/libsprite/CMakeLists.txt index a7341fd..06b882d 100644 --- a/software/libsprite/CMakeLists.txt +++ b/software/libsprite/CMakeLists.txt @@ -2,9 +2,11 @@ add_library(libsprite INTERFACE) target_sources(libsprite INTERFACE ${CMAKE_CURRENT_LIST_DIR}/affine_transform.h + ${CMAKE_CURRENT_LIST_DIR}/sprite_asm_const.h ${CMAKE_CURRENT_LIST_DIR}/sprite.S ${CMAKE_CURRENT_LIST_DIR}/sprite.c ${CMAKE_CURRENT_LIST_DIR}/sprite.h + ${CMAKE_CURRENT_LIST_DIR}/tile.S ) target_include_directories(libsprite INTERFACE ${CMAKE_CURRENT_LIST_DIR}) diff --git a/software/libsprite/sprite.S b/software/libsprite/sprite.S index c64066a..c1d4807 100644 --- a/software/libsprite/sprite.S +++ b/software/libsprite/sprite.S @@ -3,6 +3,8 @@ #include "hardware/regs/addressmap.h" #include "hardware/regs/sio.h" +#include "sprite_asm_const.h" + #define POP2_OFFS (SIO_INTERP0_POP_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) #define CTRL0_OFFS (SIO_INTERP0_CTRL_LANE0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) #define INTERP1 (SIO_INTERP1_ACCUM0_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) @@ -11,15 +13,6 @@ .cpu cortex-m0plus .thumb -// Put every function in its own ELF section, to permit linker GC -.macro decl_func name -.section .time_critical.\name, "ax" -.global \name -.type \name,%function -.thumb_func -\name: -.endm - // ---------------------------------------------------------------------------- // Colour fill @@ -248,10 +241,6 @@ decl_func sprite_blit8 bhi 1b bx lr -// Assume RAGB2132 (so alpha is bit 5) - -#define ALPHA_SHIFT_8BPP 6 - .macro sprite_blit8_alpha_body n ldrb r3, [r1, #\n] lsrs r2, r3, #ALPHA_SHIFT_8BPP @@ -334,14 +323,6 @@ decl_func sprite_blit16 bhi 1b bx lr -// Assume RGAB5515 (so alpha is bit 5) -// Note the alpha bit being in the same position as RAGB2132 is a coincidence. -// We are just stealing an LSB such that we can treat our alpha pixels in the -// same way as non-alpha pixels when encoding (and the co-opted channel LSB -// always ends up being set on alpha pixels, which is pretty harmless) - -#define ALPHA_SHIFT_16BPP 6 - .macro sprite_blit16_alpha_body n ldrh r3, [r1, #2*\n] lsrs r2, r3, #ALPHA_SHIFT_16BPP diff --git a/software/libsprite/sprite_asm_const.h b/software/libsprite/sprite_asm_const.h new file mode 100644 index 0000000..636f4d6 --- /dev/null +++ b/software/libsprite/sprite_asm_const.h @@ -0,0 +1,26 @@ +#ifndef _SPRITE_ASM_CONST +#define _SPRITE_ASM_CONST + +// Put every function in its own ELF section, to permit linker GC +.macro decl_func name +.section .time_critical.\name, "ax" +.global \name +.type \name,%function +.thumb_func +\name: +.endm + +// Assume RGAB5515 (so alpha is bit 5) +// Note the alpha bit being in the same position as RAGB2132 is a coincidence. +// We are just stealing an LSB such that we can treat our alpha pixels in the +// same way as non-alpha pixels when encoding (and the co-opted channel LSB +// always ends up being set on alpha pixels, which is pretty harmless) + +#define ALPHA_SHIFT_16BPP 6 + +// Assume RAGB2132 (so alpha is bit 5) + +#define ALPHA_SHIFT_8BPP 6 + + +#endif diff --git a/software/libsprite/tile.S b/software/libsprite/tile.S new file mode 100644 index 0000000..69271df --- /dev/null +++ b/software/libsprite/tile.S @@ -0,0 +1,152 @@ +#include "hardware/regs/addressmap.h" +#include "hardware/regs/sio.h" + +#include "sprite_asm_const.h" + +#define POP2_OFFS (SIO_INTERP0_POP_FULL_OFFSET - SIO_INTERP0_ACCUM0_OFFSET) + +.syntax unified +.cpu cortex-m0plus +.thumb + +// ---------------------------------------------------------------------------- +// Tile layout +// +// Some terms: +// Tileset: 1D array of tile images, concatenated image-after-image +// Tilemap: 2D array of tileset indices +// +// Each tile image in a tileset is the same size. Tiles are square, either 8 x +// 8 px or 16 x 16 px. This makes it easy to find the start of a tile image +// given the tileset base pointer and a tile index (add + shift). +// +// Tilemaps are 8 bits per tile, always. +// +// One advantage of this layout is that y coordinates can be handled outside +// of the loops in this file, which are all scanline-oriented, by offsetting +// the tileset and tilemap pointers passed in. These routines only care about +// x. The tileset pointer is offset by y modulo tile height, and the tilemap +// pointer is offset by y divided by tile height, modulo tileset height in +// tiles. + +// Tileset: 16px tiles, 16bpp, with 1-bit alpha. +// Tilemap: 8 bit indices. + +.macro do_2px_16bpp_alpha rd rs rx dstoffs + lsrs \rx, \rs, #ALPHA_SHIFT_16BPP + bcc 1f + strh \rs, [\rd, #\dstoffs] +1: + lsrs \rx, \rs, #ALPHA_SHIFT_16BPP + 16 + bcc 1f + lsrs \rs, #16 + strh \rs, [\rd, #\dstoffs + 2] +1: +.endm + +// interp0 has been set up to give the next x-ward pointer into the tileset +// with each pop. This saves us having to remember the tilemap pointer and +// tilemap x size mask in core registers. + +// r0: dst +// r1: tileset +// r2: x0 (start pos in tile space) +// r3: x1 (end pos in tile space, exclusive) + +decl_func tile_16bpp_16px_alpha_loop + push {r4-r7, lr} + mov r4, r8 + mov r5, r9 + push {r4, r5} + ldr r7, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) + + // The main loop only handles whole tiles, so we may need to first copy + // individual pixels to get tile-aligned. Skip this entirely if we are + // already aligned, to avoid the extra interp pop. + lsls r6, r2, #28 + beq 3f + + // Get pointer to tileset image + ldr r4, [r7, #POP2_OFFS] + ldrb r4, [r4] + lsls r4, #9 + add r4, r1 + // Offset tile image pointer to align with x0 + lsls r5, r2, #28 + lsrs r5, #27 + add r4, r5 + // Fall through into copy loop +1: + ldrh r5, [r4] + lsrs r6, r5, #ALPHA_SHIFT_16BPP + bcc 2f + strh r5, [r0] +2: + adds r4, #2 + adds r0, #2 + adds r2, #1 + lsls r6, r2, #28 + bne 1b +3: + // The next output pixel is aligned to the start of a tile. Set up main loop. + + // Tileset pointer is only needed for an add operand: + mov r8, r1 + // dst limit pointer at end of all pixels: + subs r3, r2 + lsls r4, r2, #1 + add r4, r0 + mov r9, r4 + // dst limit pointer at end of whole tiles: + lsrs r4, r2, #4 + lsls r4, #5 + add r4, r0 + mov ip, r4 + + // r0 is dst, r7 is interp base, r1-r6 are free for loop. + b 3f +2: + // Get next tilemap pointer + ldr r1, [r7, #POP2_OFFS] + // Get tile image pointer + ldrb r1, [r1] + lsls r1, #9 + add r1, r8 + + ldmia r1!, {r3-r6} + do_2px_16bpp_alpha r0 r3 r2 0 + do_2px_16bpp_alpha r0 r4 r2 4 + do_2px_16bpp_alpha r0 r5 r2 8 + do_2px_16bpp_alpha r0 r6 r2 12 + ldmia r1!, {r3-r6} + do_2px_16bpp_alpha r0 r3 r2 16 + do_2px_16bpp_alpha r0 r4 r2 20 + do_2px_16bpp_alpha r0 r5 r2 24 + do_2px_16bpp_alpha r0 r6 r2 28 + adds r0, 32 +3: + cmp r0, ip + blo 2b + + // Tidy up runt tile at end. Don't worry about extra interp pop. + ldr r4, [r7, #POP2_OFFS] + ldrb r4, [r4] + lsls r4, #9 + add r4, r8 + b 3f +1: + ldrh r5, [r4] + lsrs r6, r5, #ALPHA_SHIFT_16BPP + bcc 2f + strh r5, [r0] +2: + adds r4, #2 + adds r0, #2 +3: + cmp r0, r9 + blo 1b + + pop {r4, r5} + mov r8, r4 + mov r9, r5 + pop {r4-r7, pc}