kopia lustrzana https://github.com/Wren6991/PicoDVI
Balance rendering/encode across both cores in tiles_and_sprites
rodzic
df00a0f3aa
commit
bca9aa7906
|
@ -6,6 +6,7 @@ target_compile_options(tiles_and_sprites PRIVATE -Wall)
|
||||||
|
|
||||||
target_compile_definitions(tiles_and_sprites PRIVATE
|
target_compile_definitions(tiles_and_sprites PRIVATE
|
||||||
DVI_DEFAULT_SERIAL_CONFIG=${DVI_DEFAULT_SERIAL_CONFIG}
|
DVI_DEFAULT_SERIAL_CONFIG=${DVI_DEFAULT_SERIAL_CONFIG}
|
||||||
|
DVI_N_TMDS_BUFFERS=5
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(tiles_and_sprites
|
target_link_libraries(tiles_and_sprites
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
#include "dvi.h"
|
#include "dvi.h"
|
||||||
#include "dvi_serialiser.h"
|
#include "dvi_serialiser.h"
|
||||||
|
#include "tmds_encode.h"
|
||||||
#include "common_dvi_pin_configs.h"
|
#include "common_dvi_pin_configs.h"
|
||||||
#include "sprite.h"
|
#include "sprite.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
@ -73,7 +74,7 @@
|
||||||
#define MAP_WIDTH 512
|
#define MAP_WIDTH 512
|
||||||
#define MAP_HEIGHT 256
|
#define MAP_HEIGHT 256
|
||||||
|
|
||||||
#define N_CHARACTERS 75
|
#define N_CHARACTERS 100
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int16_t pos_x;
|
int16_t pos_x;
|
||||||
|
@ -159,9 +160,8 @@ void update(game_state_t *state) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct dvi_inst dvi0;
|
|
||||||
|
|
||||||
void render(const game_state_t *gstate) {
|
void render_scanline(uint16_t *pixbuf, uint y, const game_state_t *gstate) {
|
||||||
tilebg_t bg = {
|
tilebg_t bg = {
|
||||||
.xscroll = gstate->cam_x,
|
.xscroll = gstate->cam_x,
|
||||||
.yscroll = gstate->cam_y,
|
.yscroll = gstate->cam_y,
|
||||||
|
@ -178,41 +178,56 @@ void render(const game_state_t *gstate) {
|
||||||
.has_opacity_metadata = false,
|
.has_opacity_metadata = false,
|
||||||
};
|
};
|
||||||
|
|
||||||
for (uint y = 0; y < FRAME_HEIGHT; ++y) {
|
tile16(pixbuf, &bg, y, FRAME_WIDTH);
|
||||||
uint16_t *pixbuf;
|
|
||||||
queue_remove_blocking(&dvi0.q_colour_free, &pixbuf);
|
|
||||||
|
|
||||||
tile16(pixbuf, &bg, y, FRAME_WIDTH);
|
for (int i = 0; i < N_CHARACTERS; ++i) {
|
||||||
|
const character_t *ch = &gstate->chars[i];
|
||||||
for (int i = 0; i < N_CHARACTERS; ++i) {
|
sp.x = ch->pos_x - gstate->cam_x;
|
||||||
const character_t *ch = &gstate->chars[i];
|
const uint16_t *basetile = (const uint16_t*)zelda_mini_plus_walk +
|
||||||
sp.x = ch->pos_x - gstate->cam_x;
|
16 * 16 * (102 + (ch->dir << 2) + ch->anim_frame);
|
||||||
const uint16_t *basetile = (const uint16_t*)zelda_mini_plus_walk +
|
for (int tile = 0; tile < ch->ntiles; ++tile) {
|
||||||
16 * 16 * (102 + (ch->dir << 2) + ch->anim_frame);
|
sp.y = ch->pos_y - gstate->cam_y + tile * 16;
|
||||||
for (int tile = 0; tile < ch->ntiles; ++tile) {
|
sp.img = basetile + tile * ch->tilestride * 16 * 16;
|
||||||
sp.y = ch->pos_y - gstate->cam_y + tile * 16;
|
sprite_sprite16(pixbuf, &sp, y, FRAME_WIDTH);
|
||||||
sp.img = basetile + tile * ch->tilestride * 16 * 16;
|
|
||||||
sprite_sprite16(pixbuf, &sp, y, FRAME_WIDTH);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
queue_add_blocking(&dvi0.q_colour_valid, &pixbuf);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// DVI setup & launch
|
// DVI setup & launch
|
||||||
|
|
||||||
#define N_SCANLINE_BUFFERS 4
|
struct dvi_inst dvi0;
|
||||||
uint16_t static_scanbuf[N_SCANLINE_BUFFERS][FRAME_WIDTH];
|
game_state_t state;
|
||||||
|
|
||||||
|
uint16_t __scratch_y("render") __attribute__((aligned(4))) core0_scanbuf[FRAME_WIDTH];
|
||||||
|
uint16_t __scratch_x("render") __attribute__((aligned(4))) core1_scanbuf[FRAME_WIDTH];
|
||||||
|
|
||||||
|
// - Core 0 pops two TMDS buffers
|
||||||
|
// - Passes one to core 1
|
||||||
|
// - Renders own buffer and pushes to DVI queue <- core 1 waits here before starting DVI
|
||||||
|
// - Retrieves core 1's TMDS buffer and pushes that to DVI queue as well
|
||||||
|
|
||||||
|
void encode_scanline(uint16_t *pixbuf, uint32_t *tmdsbuf) {
|
||||||
|
uint pixwidth = dvi0.timing->h_active_pixels;
|
||||||
|
uint words_per_channel = pixwidth / DVI_SYMBOLS_PER_WORD;
|
||||||
|
tmds_encode_data_channel_16bpp((uint32_t*)pixbuf, tmdsbuf + 0 * words_per_channel, pixwidth / 2, DVI_16BPP_BLUE_MSB, DVI_16BPP_BLUE_LSB );
|
||||||
|
tmds_encode_data_channel_16bpp((uint32_t*)pixbuf, tmdsbuf + 1 * words_per_channel, pixwidth / 2, DVI_16BPP_GREEN_MSB, DVI_16BPP_GREEN_LSB);
|
||||||
|
tmds_encode_data_channel_16bpp((uint32_t*)pixbuf, tmdsbuf + 2 * words_per_channel, pixwidth / 2, DVI_16BPP_RED_MSB, DVI_16BPP_RED_LSB );
|
||||||
|
}
|
||||||
|
|
||||||
void core1_main() {
|
void core1_main() {
|
||||||
dvi_register_irqs_this_core(&dvi0, DMA_IRQ_0);
|
dvi_register_irqs_this_core(&dvi0, DMA_IRQ_0);
|
||||||
while (queue_is_empty(&dvi0.q_colour_valid))
|
while (queue_is_empty(&dvi0.q_tmds_valid))
|
||||||
__wfe();
|
__wfe();
|
||||||
dvi_start(&dvi0);
|
dvi_start(&dvi0);
|
||||||
dvi_scanbuf_main_16bpp(&dvi0);
|
while (1) {
|
||||||
__builtin_unreachable();
|
for (uint y = 1; y < FRAME_HEIGHT; y += 2) {
|
||||||
|
render_scanline(core1_scanbuf, y, &state);
|
||||||
|
uint32_t *tmdsbuf = (uint32_t*)multicore_fifo_pop_blocking();
|
||||||
|
encode_scanline(core1_scanbuf, tmdsbuf);
|
||||||
|
multicore_fifo_push_blocking((uintptr_t)tmdsbuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
@ -234,17 +249,20 @@ int main() {
|
||||||
printf("Core 1 start\n");
|
printf("Core 1 start\n");
|
||||||
multicore_launch_core1(core1_main);
|
multicore_launch_core1(core1_main);
|
||||||
|
|
||||||
printf("Allocating scanline buffers\n");
|
|
||||||
for (int i = 0; i < N_SCANLINE_BUFFERS; ++i) {
|
|
||||||
void *bufptr = &static_scanbuf[i];
|
|
||||||
queue_add_blocking((void*)&dvi0.q_colour_free, &bufptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
printf("Start rendering\n");
|
printf("Start rendering\n");
|
||||||
game_state_t state;
|
|
||||||
game_init(&state);
|
game_init(&state);
|
||||||
while (1) {
|
while (1) {
|
||||||
render(&state);
|
for (uint y = 0; y < FRAME_HEIGHT; y += 2) {
|
||||||
|
uint32_t *tmds0, *tmds1;
|
||||||
|
queue_remove_blocking_u32(&dvi0.q_tmds_free, &tmds0);
|
||||||
|
queue_remove_blocking_u32(&dvi0.q_tmds_free, &tmds1);
|
||||||
|
multicore_fifo_push_blocking((uintptr_t)tmds1);
|
||||||
|
render_scanline(core0_scanbuf, y, &state);
|
||||||
|
encode_scanline(core0_scanbuf, tmds0);
|
||||||
|
queue_add_blocking_u32(&dvi0.q_tmds_valid, &tmds0);
|
||||||
|
tmds1 = (uint32_t*)multicore_fifo_pop_blocking();
|
||||||
|
queue_add_blocking_u32(&dvi0.q_tmds_valid, &tmds1);
|
||||||
|
}
|
||||||
update(&state);
|
update(&state);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
strh \rs, [\rd, #\dstoffs + 2]
|
strh \rs, [\rd, #\dstoffs + 2]
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
// interp0 has been set up to give the next x-ward pointer into the tilemap
|
// interp1 has been set up to give the next x-ward pointer into the tilemap
|
||||||
// with each pop. This saves us having to remember the tilemap pointer and
|
// with each pop. This saves us having to remember the tilemap pointer and
|
||||||
// tilemap x size mask in core registers.
|
// tilemap x size mask in core registers.
|
||||||
|
|
||||||
|
@ -61,12 +61,13 @@
|
||||||
|
|
||||||
// Instantiated with alpha=1 and alpha=0 to get both variants of the loop.
|
// Instantiated with alpha=1 and alpha=0 to get both variants of the loop.
|
||||||
// Linker garbage collection ensures we only keep the versions we use.
|
// Linker garbage collection ensures we only keep the versions we use.
|
||||||
|
|
||||||
.macro tile16_16px_loop_alpha_or_nonalpha alpha
|
.macro tile16_16px_loop_alpha_or_nonalpha alpha
|
||||||
push {r4-r7, lr}
|
push {r4-r7, lr}
|
||||||
mov r4, r8
|
mov r4, r8
|
||||||
mov r5, r9
|
mov r5, r9
|
||||||
push {r4, r5}
|
push {r4, r5}
|
||||||
ldr r7, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET)
|
ldr r7, =(SIO_BASE + SIO_INTERP1_ACCUM0_OFFSET)
|
||||||
|
|
||||||
// The main loop only handles whole tiles, so we may need to first copy
|
// The main loop only handles whole tiles, so we may need to first copy
|
||||||
// individual pixels to get tile-aligned. Skip this entirely if we are
|
// individual pixels to get tile-aligned. Skip this entirely if we are
|
||||||
|
|
|
@ -19,8 +19,6 @@ static inline void setup_interp_tilemap_ptrs(interp_hw_t *interp, const uint8_t
|
||||||
interp_set_config(interp, 0, &c);
|
interp_set_config(interp, 0, &c);
|
||||||
interp->accum[0] = x0;
|
interp->accum[0] = x0;
|
||||||
interp->base[0] = 1;
|
interp->base[0] = 1;
|
||||||
interp->accum[1] = 0; // necessary if ctrl is 0?
|
|
||||||
interp->base[1] = 0;
|
|
||||||
interp->ctrl[1] = 0;
|
interp->ctrl[1] = 0;
|
||||||
interp->base[2] = (uintptr_t)row;
|
interp->base[2] = (uintptr_t)row;
|
||||||
}
|
}
|
||||||
|
@ -39,10 +37,10 @@ void __ram_func(tile16)(uint16_t *scanbuf, const tilebg_t *bg, uint raster_y, ui
|
||||||
uint tile_x_at_tx0 = tx0 >> tile_log_size(bg->tilesize);
|
uint tile_x_at_tx0 = tx0 >> tile_log_size(bg->tilesize);
|
||||||
uint tile_x_msb = bg->log_size_x - tile_log_size(bg->tilesize) - 1;
|
uint tile_x_msb = bg->log_size_x - tile_log_size(bg->tilesize) - 1;
|
||||||
|
|
||||||
// NOTE this clobbers interp0, currently this will cause issues if you try
|
// NOTE this clobbers interp1, currently this will cause issues if you try
|
||||||
// to run tile code and TMDS encode on the same core. Could be fixed by
|
// to run tile code and certain TMDS encode loops on the same core. Could
|
||||||
// save/restore, using interp1 by default, etc but fine for now
|
// be fixed by save/restore, at the cost of some performance.
|
||||||
setup_interp_tilemap_ptrs(interp0_hw, tilemap_row_ty, tile_x_at_tx0, tile_x_msb);
|
setup_interp_tilemap_ptrs(interp1_hw, tilemap_row_ty, tile_x_at_tx0, tile_x_msb);
|
||||||
|
|
||||||
// Apply intra-tile y offset in advance, since this will be the same for
|
// Apply intra-tile y offset in advance, since this will be the same for
|
||||||
// all pixels of all tiles we render in this call.
|
// all pixels of all tiles we render in this call.
|
||||||
|
|
Ładowanie…
Reference in New Issue