HUB75 Less RAM, DMA flip

Switch from 12-bit to 10-bit gamma to fit RGB into a uint32_t. Simplifies PIO and halves the RAM usage for F/B buffer.

Switch "flip" to *literally* swap the front and back buffers, and then asyncronously DMA the new back buffer into the front ready for the next draw.
pull/193/head
Phil Howard 2021-11-24 19:31:00 +00:00
rodzic 069ca38ae7
commit 30a455a9d5
3 zmienionych plików z 127 dodań i 109 usunięć

Wyświetl plik

@ -37,7 +37,7 @@ Pixel hsv_to_rgb(float h, float s, float v) {
}
Hub75::Hub75(uint8_t width, uint8_t height, Pixel *buffer, PanelType panel_type)
Hub75::Hub75(uint8_t width, uint8_t height, Pixel *buffer, PanelType panel_type, bool inverted_stb)
: width(width), height(height), panel_type(panel_type)
{
// Set up allllll the GPIO
@ -167,6 +167,9 @@ void Hub75::start(irq_handler_t handler) {
hub75_data_rgb888_program_init(pio, sm_data, data_prog_offs, DATA_BASE_PIN, pin_clk);
hub75_row_program_init(pio, sm_row, row_prog_offs, ROWSEL_BASE_PIN, ROWSEL_N_PINS, pin_stb);
// Prevent flicker in Python caused by the smaller dataset just blasting through the PIO too quickly
pio_sm_set_clkdiv(pio, sm_data, 2.0f);
if(dma_channel_is_claimed(dma_channel)) {
irq_set_enabled(DMA_IRQ_0, false);
dma_channel_set_irq0_enabled(dma_channel, false);
@ -176,6 +179,14 @@ void Hub75::start(irq_handler_t handler) {
dma_channel_unclaim(dma_channel);
}
if(dma_channel_is_claimed(dma_flip_channel)){
irq_set_enabled(DMA_IRQ_1, false);
dma_channel_set_irq1_enabled(dma_flip_channel, false);
irq_remove_handler(DMA_IRQ_1, handler);
dma_channel_wait_for_finish_blocking(dma_flip_channel);
dma_channel_unclaim(dma_flip_channel);
}
dma_channel_claim(dma_channel);
dma_channel_config config = dma_channel_get_default_config(dma_channel);
channel_config_set_transfer_data_size(&config, DMA_SIZE_32);
@ -184,33 +195,53 @@ void Hub75::start(irq_handler_t handler) {
dma_channel_configure(dma_channel, &config, &pio->txf[sm_data], NULL, 0, false);
dma_channel_set_irq0_enabled(dma_channel, true);
irq_set_enabled(pio_get_dreq(pio, sm_data, true), true);
dma_channel_claim(dma_flip_channel);
dma_channel_config flip_config = dma_channel_get_default_config(dma_flip_channel);
channel_config_set_transfer_data_size(&flip_config, DMA_SIZE_32);
channel_config_set_read_increment(&flip_config, true);
channel_config_set_write_increment(&flip_config, true);
channel_config_set_bswap(&flip_config, false);
dma_channel_configure(dma_flip_channel, &flip_config, nullptr, nullptr, 0, false);
dma_channel_set_irq1_enabled(dma_flip_channel, true);
// Same handler for both DMA channels
irq_set_exclusive_handler(DMA_IRQ_0, handler);
irq_set_enabled(DMA_IRQ_0, true);
irq_set_exclusive_handler(DMA_IRQ_1, handler);
irq_set_enabled(DMA_IRQ_1, true);
row = 0;
bit = 0;
dma_channel_set_trans_count(dma_channel, width * 4, false);
dma_channel_set_trans_count(dma_channel, width * 2, false);
dma_channel_set_read_addr(dma_channel, &back_buffer, true);
} else {
while (running) {
display_update();
}
}
}
void Hub75::stop(irq_handler_t handler) {
running = false;
if(dma_channel_is_claimed(dma_channel)) {
// stop and release the dma channel
irq_set_enabled(DMA_IRQ_0, false);
// stop and release the dma channel
dma_channel_set_irq0_enabled(dma_channel, false);
irq_set_enabled(pio_get_dreq(pio, sm_data, true), false);
irq_remove_handler(DMA_IRQ_0, handler);
dma_channel_wait_for_finish_blocking(dma_channel);
dma_channel_unclaim(dma_channel);
irq_remove_handler(DMA_IRQ_0, handler);
}
if(dma_channel_is_claimed(dma_flip_channel)) {
dma_channel_wait_for_finish_blocking(dma_flip_channel);
irq_set_enabled(DMA_IRQ_1, false);
dma_channel_set_irq1_enabled(dma_flip_channel, false);
dma_channel_unclaim(dma_flip_channel);
irq_remove_handler(DMA_IRQ_1, handler);
}
hub75_wait_tx_stall(pio, sm_row);
@ -244,60 +275,33 @@ void Hub75::clear() {
}
void Hub75::flip() {
// Flip and block until the front buffer has been prepared
do_flip = true;
while(do_flip) {};
}
void Hub75::display_update() {
if (do_flip) {
memcpy(back_buffer, front_buffer, width * height * sizeof(Pixel));
do_flip = false;
}
for(auto bit = 1u; bit < 1 << 11; bit <<= 1) {
for(auto y = 0u; y < height / 2; y++) {
auto row_top = y * width;
auto row_bottom = (y + height / 2) * width;
for(auto x = 0u; x < width; x++) {
Pixel pixel_top = back_buffer[row_top + x];
Pixel pixel_bottom = back_buffer[row_bottom + x];
gpio_put(pin_clk, !clk_polarity);
gpio_put(pin_r0, (bool)(pixel_top.r & bit));
gpio_put(pin_g0, (bool)(pixel_top.g & bit));
gpio_put(pin_b0, (bool)(pixel_top.b & bit));
gpio_put(pin_r1, (bool)(pixel_bottom.r & bit));
gpio_put(pin_g1, (bool)(pixel_bottom.g & bit));
gpio_put(pin_b1, (bool)(pixel_bottom.b & bit));
gpio_put(pin_clk, clk_polarity);
}
gpio_put_masked(0b11111 << pin_row_a, y << pin_row_a);
gpio_put(pin_stb, stb_polarity);
gpio_put(pin_oe, oe_polarity);
for(auto s = 0u; s < bit; ++s) {
asm volatile("nop \nnop");
}
gpio_put(pin_stb, !stb_polarity);
gpio_put(pin_oe, !oe_polarity);
}
}
while(do_flip) {
best_effort_wfe_or_timeout(make_timeout_time_us(10));
};
}
void Hub75::dma_complete() {
if (do_flip && bit == 0 && row == 0) {
memcpy(back_buffer, front_buffer, width * height * sizeof(Pixel));
if(dma_channel_get_irq1_status(dma_flip_channel)) {
dma_channel_acknowledge_irq1(dma_flip_channel);
do_flip = false;
}
if(dma_channel_get_irq0_status(dma_channel)) {
dma_channel_acknowledge_irq0(dma_channel);
if (do_flip && bit == 0 && row == 0) {
// Literally flip the front and back buffers by swapping their addresses
Pixel *tmp = back_buffer;
back_buffer = front_buffer;
front_buffer = tmp;
// Then, read the contents of the back buffer into the front buffer
dma_channel_set_read_addr(dma_flip_channel, back_buffer, false);
dma_channel_set_write_addr(dma_flip_channel, front_buffer, false);
dma_channel_set_trans_count(dma_flip_channel, width * height, true);
}
// SM is finished when it stalls on empty TX FIFO (or, y'know, DMA callback)
hub75_wait_tx_stall(pio, sm_data);
@ -312,13 +316,13 @@ void Hub75::dma_complete() {
if(row == height / 2) {
row = 0;
bit++;
if (bit == 12) {
if (bit == BIT_DEPTH) {
bit = 0;
}
hub75_data_rgb888_set_shift(pio, sm_data, data_prog_offs, bit);
}
}
dma_channel_set_trans_count(dma_channel, width * 4, false); // This count is in uint32_t which is 1/2th the size of Pixel
dma_channel_set_read_addr(dma_channel, &back_buffer[row * width * 2], true);
dma_channel_set_trans_count(dma_channel, width * 2, false);
dma_channel_set_read_addr(dma_channel, &back_buffer[row * width * 2], true);
}
}

Wyświetl plik

@ -11,39 +11,35 @@ const uint DATA_BASE_PIN = 0;
const uint DATA_N_PINS = 6;
const uint ROWSEL_BASE_PIN = 6;
const uint ROWSEL_N_PINS = 5;
const uint BIT_DEPTH = 10;
// This gamma table is used to correct our 8-bit (0-255) colours up to 11-bit,
// allowing us to gamma correct without losing dynamic range.
constexpr uint16_t GAMMA_12BIT[256] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 50,
52, 54, 57, 59, 62, 65, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94,
98, 101, 105, 108, 112, 115, 119, 123, 127, 131, 135, 139, 143, 147, 151, 155,
160, 164, 169, 173, 178, 183, 187, 192, 197, 202, 207, 212, 217, 223, 228, 233,
239, 244, 250, 255, 261, 267, 273, 279, 285, 291, 297, 303, 309, 316, 322, 328,
335, 342, 348, 355, 362, 369, 376, 383, 390, 397, 404, 412, 419, 427, 434, 442,
449, 457, 465, 473, 481, 489, 497, 505, 513, 522, 530, 539, 547, 556, 565, 573,
582, 591, 600, 609, 618, 628, 637, 646, 656, 665, 675, 685, 694, 704, 714, 724,
734, 744, 755, 765, 775, 786, 796, 807, 817, 828, 839, 850, 861, 872, 883, 894,
905, 917, 928, 940, 951, 963, 975, 987, 998, 1010, 1022, 1035, 1047, 1059, 1071, 1084,
1096, 1109, 1122, 1135, 1147, 1160, 1173, 1186, 1199, 1213, 1226, 1239, 1253, 1266, 1280, 1294,
1308, 1321, 1335, 1349, 1364, 1378, 1392, 1406, 1421, 1435, 1450, 1465, 1479, 1494, 1509, 1524,
1539, 1554, 1570, 1585, 1600, 1616, 1631, 1647, 1663, 1678, 1694, 1710, 1726, 1743, 1759, 1775,
1791, 1808, 1824, 1841, 1858, 1875, 1891, 1908, 1925, 1943, 1960, 1977, 1994, 2012, 2029, 2047};
// We don't *need* to make Pixel a fancy struct with RGB values, but it helps.
#pragma pack(push, 1)
struct alignas(4) Pixel {
uint16_t _;
uint16_t r;
uint16_t g;
uint16_t b;
constexpr Pixel() : _(0), r(0), g(0), b(0) {};
constexpr Pixel(uint8_t r, uint8_t g, uint8_t b) : _(0), r(GAMMA_12BIT[r]), g(GAMMA_12BIT[g]), b(GAMMA_12BIT[b]) {};
constexpr uint16_t GAMMA_10BIT[256] = {
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16,
16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 24, 25,
26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 40, 41, 43, 44, 46, 47,
49, 51, 53, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78,
80, 82, 85, 87, 89, 92, 94, 96, 99, 101, 104, 106, 109, 112, 114, 117,
120, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, 155, 158, 161, 164,
168, 171, 174, 178, 181, 185, 188, 192, 195, 199, 202, 206, 210, 214, 217, 221,
225, 229, 233, 237, 241, 245, 249, 253, 257, 261, 265, 270, 274, 278, 283, 287,
291, 296, 300, 305, 309, 314, 319, 323, 328, 333, 338, 343, 347, 352, 357, 362,
367, 372, 378, 383, 388, 393, 398, 404, 409, 414, 420, 425, 431, 436, 442, 447,
453, 459, 464, 470, 476, 482, 488, 494, 499, 505, 511, 518, 524, 530, 536, 542,
548, 555, 561, 568, 574, 580, 587, 593, 600, 607, 613, 620, 627, 633, 640, 647,
654, 661, 668, 675, 682, 689, 696, 703, 711, 718, 725, 733, 740, 747, 755, 762,
770, 777, 785, 793, 800, 808, 816, 824, 832, 839, 847, 855, 863, 872, 880, 888,
896, 904, 912, 921, 929, 938, 946, 954, 963, 972, 980, 989, 997, 1006, 1015, 1023
};
struct Pixel {
uint32_t color;
constexpr Pixel() : color(0) {};
constexpr Pixel(uint8_t r, uint8_t g, uint8_t b) : color((GAMMA_10BIT[b] << 20) | (GAMMA_10BIT[g] << 10) | GAMMA_10BIT[r]) {};
};
#pragma pack(pop)
enum PanelType {
PANEL_GENERIC = 0,
@ -62,6 +58,7 @@ class Hub75 {
// DMA & PIO
uint dma_channel = 0;
uint dma_flip_channel = 1;
volatile bool do_flip = false;
uint bit = 0;
uint row = 0;
@ -108,8 +105,9 @@ class Hub75 {
unsigned int pin_led_g = 17;
unsigned int pin_led_b = 18;
Hub75(uint8_t width, uint8_t height, Pixel *buffer) : Hub75(width, height, buffer, PANEL_GENERIC) {};
Hub75(uint8_t width, uint8_t height, Pixel *buffer, PanelType panel_type);
Hub75(uint8_t width, uint8_t height, Pixel *buffer) : Hub75(width, height, buffer, PANEL_GENERIC, false) {};
Hub75(uint8_t width, uint8_t height, Pixel *buffer, PanelType panel_type) : Hub75(width, height, buffer, panel_type, false) {};
Hub75(uint8_t width, uint8_t height, Pixel *buffer, PanelType panel_type, bool inverted_stb);
~Hub75();
void FM6126A_write_register(uint16_t value, uint8_t position);

Wyświetl plik

@ -26,6 +26,28 @@ pulse_loop:
jmp x-- pulse_loop side 0x0 ; Assert OEn for x+1 cycles
.wrap
.program hub75_row_inverted
; side-set pin 0 is LATCH
; side-set pin 1 is OEn
; OUT pins are row select A-E
;
; Each FIFO record consists of:
; - 5-bit row select (LSBs)
; - Pulse width - 1 (27 MSBs)
;
; Repeatedly select a row, pulse LATCH, and generate a pulse of a certain
; width on OEn.
.side_set 2
.wrap_target
out pins, 5 [7] side 0x3 ; Deassert OEn, output row select
out x, 27 [7] side 0x2 ; Pulse LATCH, get OEn pulse width
pulse_loop:
jmp x-- pulse_loop side 0x1 ; Assert OEn for x+1 cycles
.wrap
% c-sdk {
static inline void hub75_row_program_init(PIO pio, uint sm, uint offset, uint row_base_pin, uint n_row_pins, uint latch_base_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, row_base_pin, n_row_pins, true);
@ -74,29 +96,25 @@ public entry_point:
public shift0: ; R0 G0 B0 (Top half of 64x64 displays)
pull side 0 ; gets patched to `out null, n` if n nonzero (otherwise the PULL is required for fencing)
out null, 16 side 0 ; discard _
in osr, 1 side 0 ; Red0
out null, 32 side 0 ; discard remaining bits
in osr, 1 side 0 ; Red0 N
out null, 10 side 0 ; Red0 discard
public shift1:
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
in osr, 1 side 0 ; Green0
out null, 16 side 0 ; << next uint16
in osr, 1 side 0 ; Blue0
out null, 32 side 0 ; discard remaining bits
in osr, 1 side 0 ; Green0 N
out null, 10 side 0 ; Green0 discard
public shift2:
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
out null, 16 side 0 ; discard _
in osr, 1 side 0 ; Red1
out null, 32 side 0 ; discard remaining bits
in osr, 1 side 0 ; Blue0 N
out null, 32 side 0 ; Remainder discard
public shift3: ; R1 G1 B1 (Bottom half of 64x64 displays)
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
in osr, 1 side 0 ; Green0, CLK rising edge
out null, 16 side 0 ; << next uint16
in osr, 1 side 0 ; Blue0
out null, 32 side 0 ; discard remaining bits
public shift1: ; R1 G1 B1 (Bottom half of 64x64 displays)
pull side 0 ; gets patched to `out null, n` if n nonzero (otherwise the PULL is required for fencing)
in osr, 1 side 0 ; Red1 N
out null, 10 side 0 ; Red1 discard
in osr, 1 side 0 ; Green1 N
out null, 10 side 0 ; Green1 discard
in osr, 1 side 0 ; Blue1 N
out null, 32 side 0 ; Remainder discard
in null, 26 side 0 ; Note we are just doing this little manoeuvre here to get GPIOs in the order
mov pins, ::isr [7] side 1 ; R0, G0, B0, R1, G1, B1. Can go 1 cycle faster if reversed
@ -135,7 +153,5 @@ static inline void hub75_data_rgb888_set_shift(PIO pio, uint sm, uint offset, ui
instr = pio_encode_out(pio_null, shamt);
pio->instr_mem[offset + hub75_data_rgb888_offset_shift0] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift1] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift2] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift3] = instr;
}
%}