diff --git a/software/.gitignore b/software/.gitignore index 378eac2..89efb0f 100644 --- a/software/.gitignore +++ b/software/.gitignore @@ -1 +1,2 @@ build +*.swp diff --git a/software/apps/CMakeLists.txt b/software/apps/CMakeLists.txt index 02537df..78ae855 100644 --- a/software/apps/CMakeLists.txt +++ b/software/apps/CMakeLists.txt @@ -7,3 +7,4 @@ add_subdirectory(moon) add_subdirectory(sprite_bounce) add_subdirectory(terminal) add_subdirectory(vista) +add_subdirectory(mandel-full) diff --git a/software/apps/mandel-full/CMakeLists.txt b/software/apps/mandel-full/CMakeLists.txt new file mode 100644 index 0000000..c9b1b3f --- /dev/null +++ b/software/apps/mandel-full/CMakeLists.txt @@ -0,0 +1,26 @@ +# Replace TMDS with 10 bit UART (same baud rate): +# add_definitions(-DDVI_SERIAL_DEBUG=1) +# add_definitions(-DRUN_FROM_CRYSTAL) + +add_executable(mandel-full + main.c + mandelbrot.c +) + +target_compile_definitions(mandel-full PRIVATE + DVI_VERTICAL_REPEAT=1 + DVI_N_TMDS_BUFFERS=8 + DVI_SYMBOLS_PER_WORD=2 + ) + +target_compile_definitions(mandel-full PRIVATE PICO_STACK_SIZE=0x400) + + +target_link_libraries(mandel-full + pico_stdlib + pico_multicore + libdvi +) + +# create map/bin/hex file etc. +pico_add_extra_outputs(mandel-full) diff --git a/software/apps/mandel-full/main.c b/software/apps/mandel-full/main.c new file mode 100644 index 0000000..83d137d --- /dev/null +++ b/software/apps/mandel-full/main.c @@ -0,0 +1,207 @@ +#include +#include +#include +#include "hardware/clocks.h" +#include "hardware/dma.h" +#include "hardware/gpio.h" +#include "hardware/irq.h" +#include "hardware/pll.h" +#include "hardware/sync.h" +#include "hardware/structs/bus_ctrl.h" +#include "hardware/structs/ssi.h" +#include "hardware/vreg.h" +#include "pico/multicore.h" +#include "pico/sem.h" +#include "pico/stdlib.h" + +#include "tmds_encode.h" + +#include "dvi.h" +#include "dvi_serialiser.h" +#include "common_dvi_pin_configs.h" + +#include "mandelbrot.h" + +// TMDS bit clock 252 MHz +// DVDD 1.2V (1.1V seems ok too) +#define FRAME_WIDTH 640 +#define FRAME_HEIGHT 480 +#define VREG_VSEL VREG_VOLTAGE_1_10 +#define DVI_TIMING dvi_timing_640x480p_60hz + +#define N_IMAGES 3 +#define FRAMES_PER_IMAGE 300 + +uint8_t mandel[FRAME_WIDTH * (FRAME_HEIGHT / 2)]; + +#define PALETTE_BITS 8 +#define PALETTE_SIZE (1 << PALETTE_BITS) +uint16_t palette[PALETTE_SIZE]; + +uint32_t tmds_palette[PALETTE_SIZE * 6]; + +struct dvi_inst dvi0; +struct semaphore dvi_start_sem; + +FractalBuffer fractal; + +static uint8_t palette_offset = 0; + +void init_palette() { + palette[0] = 0; + for (int i = 1; i < PALETTE_SIZE; ++i) { + uint8_t c = i + palette_offset; + if (c < 0x20) palette[i] = c; + else if (c < 0x40) palette[i] = (c - 0x20) << 6; + else if (c < 0x60) palette[i] = (c - 0x40) << 11; + else if (c < 0x80) palette[i] = ((c - 0x60) & 0x1f) * 0x0840; + else if (c < 0xa0) palette[i] = ((c - 0x80) & 0x1f) * 0x0041; + else if (c < 0xc0) palette[i] = ((c - 0xa0) & 0x1f) * 0x0801; + else if (c < 0xe0) palette[i] = ((c - 0xc0) & 0x1f) * 0x0841; + else palette[i] = 0; + } + ++palette_offset; + + tmds_setup_palette_symbols(palette, tmds_palette, PALETTE_SIZE); +} + +void init_mandel() { + for (int y = 0; y < (FRAME_HEIGHT / 2); ++y) { + uint8_t* buf = &mandel[y * FRAME_WIDTH]; + for (int i = 0; i < FRAME_WIDTH; ++i) { + buf[i] = ((i + y) & 0x3f); + } + } + + fractal.buff = mandel; + fractal.rows = FRAME_HEIGHT / 2; + fractal.cols = FRAME_WIDTH; + fractal.max_iter = PALETTE_SIZE; + fractal.iter_offset = 0; + fractal.minx = -2.25f; + fractal.maxx = 0.75f; + fractal.miny = -1.6f; + fractal.maxy = 0.f - (1.6f / FRAME_HEIGHT); // Half a row + fractal.use_cycle_check = true; + init_fractal(&fractal); +} + +#define NUM_ZOOMS 64 +static uint32_t zoom_count = 0; + +void zoom_mandel() { + if (++zoom_count == NUM_ZOOMS) + { + init_mandel(); + zoom_count = 0; + return; + } + + printf("Zoom: %ld\n", zoom_count); + + float zoomx = -.75f - .7f * ((float)zoom_count / (float)NUM_ZOOMS); + float sizex = fractal.maxx - fractal.minx; + float sizey = fractal.miny * -2.f; + float zoomr = 0.96f * 0.5f; + fractal.minx = zoomx - zoomr * sizex; + fractal.maxx = zoomx + zoomr * sizex; + fractal.miny = -zoomr * sizey; + fractal.maxy = 0.f + fractal.miny / FRAME_HEIGHT; + init_fractal(&fractal); +} + +// Core 1 handles DMA IRQs and runs TMDS encode on scanline buffers it +// receives through the mailbox FIFO +void __not_in_flash("core1_main") core1_main() { + dvi_register_irqs_this_core(&dvi0, DMA_IRQ_0); + sem_acquire_blocking(&dvi_start_sem); + dvi_start(&dvi0); + + while (1) { + const uint32_t *colourbuf = (const uint32_t*)multicore_fifo_pop_blocking(); + uint32_t *tmdsbuf = (uint32_t*)multicore_fifo_pop_blocking(); + tmds_encode_palette_data((const uint32_t*)colourbuf, tmds_palette, tmdsbuf, FRAME_WIDTH, PALETTE_BITS); + multicore_fifo_push_blocking(0); + while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_steal_one(&fractal); + } + __builtin_unreachable(); +} + +int __not_in_flash("main") main() { + vreg_set_voltage(VREG_VSEL); + sleep_ms(10); + set_sys_clock_khz(DVI_TIMING.bit_clk_khz, true); + + setup_default_uart(); + + gpio_init(PICO_DEFAULT_LED_PIN); + gpio_set_dir(PICO_DEFAULT_LED_PIN, GPIO_OUT); + + init_palette(); + init_mandel(); + + printf("Configuring DVI\n"); + + dvi0.timing = &DVI_TIMING; + dvi0.ser_cfg = DEFAULT_DVI_SERIAL_CONFIG; + dvi_init(&dvi0, next_striped_spin_lock_num(), next_striped_spin_lock_num()); + + printf("Core 1 start\n"); + sem_init(&dvi_start_sem, 0, 1); + hw_set_bits(&bus_ctrl_hw->priority, BUSCTRL_BUS_PRIORITY_PROC1_BITS); + multicore_launch_core1(core1_main); + + uint heartbeat = 0; + uint32_t encode_time = 0; + + sem_release(&dvi_start_sem); + while (1) { + if (++heartbeat >= 30) { + heartbeat = 0; + gpio_xor_mask(1u << PICO_DEFAULT_LED_PIN); + + printf("Encode total time: %ldus\n", encode_time); + encode_time = 0; + } + if (fractal.done) zoom_mandel(); + //if (heartbeat & 1) init_palette(); + for (int y = 0; y < FRAME_HEIGHT / 2; y += 2) { + uint32_t *our_tmds_buf, *their_tmds_buf; + queue_remove_blocking_u32(&dvi0.q_tmds_free, &their_tmds_buf); + multicore_fifo_push_blocking((uint32_t)(&mandel[y*FRAME_WIDTH])); + multicore_fifo_push_blocking((uint32_t)their_tmds_buf); + + queue_remove_blocking_u32(&dvi0.q_tmds_free, &our_tmds_buf); + absolute_time_t start_time = get_absolute_time(); + tmds_encode_palette_data((const uint32_t*)(&mandel[(y+1)*FRAME_WIDTH]), tmds_palette, our_tmds_buf, FRAME_WIDTH, PALETTE_BITS); + encode_time += absolute_time_diff_us(start_time, get_absolute_time()); + + multicore_fifo_pop_blocking(); + + while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_one_forward(&fractal); + + queue_add_blocking_u32(&dvi0.q_tmds_valid, &their_tmds_buf); + queue_add_blocking_u32(&dvi0.q_tmds_valid, &our_tmds_buf); + } + for (int y = FRAME_HEIGHT / 2 - 2; y >= 0; y -= 2) { + uint32_t *our_tmds_buf, *their_tmds_buf; + queue_remove_blocking_u32(&dvi0.q_tmds_free, &their_tmds_buf); + multicore_fifo_push_blocking((uint32_t)(&mandel[(y+1)*FRAME_WIDTH])); + multicore_fifo_push_blocking((uint32_t)their_tmds_buf); + + queue_remove_blocking_u32(&dvi0.q_tmds_free, &our_tmds_buf); + absolute_time_t start_time = get_absolute_time(); + tmds_encode_palette_data((const uint32_t*)(&mandel[y*FRAME_WIDTH]), tmds_palette, our_tmds_buf, FRAME_WIDTH, PALETTE_BITS); + encode_time += absolute_time_diff_us(start_time, get_absolute_time()); + + multicore_fifo_pop_blocking(); + + while (!fractal.done && queue_get_level(&dvi0.q_tmds_valid) >= 5) generate_one_forward(&fractal); + + queue_add_blocking_u32(&dvi0.q_tmds_valid, &their_tmds_buf); + queue_add_blocking_u32(&dvi0.q_tmds_valid, &our_tmds_buf); + } + } + __builtin_unreachable(); +} + diff --git a/software/apps/mandel-full/mandelbrot.c b/software/apps/mandel-full/mandelbrot.c new file mode 100644 index 0000000..41be9e1 --- /dev/null +++ b/software/apps/mandel-full/mandelbrot.c @@ -0,0 +1,232 @@ +// Copyright (C) Michael Bell 2021 + +#include +#include +#include "pico/stdlib.h" +#include "hardware/interp.h" +#include "hardware/dma.h" + +#include "mandelbrot.h" + +// Cycle checking parameters +#define MAX_CYCLE_LEN 8 // Must be power of 2 +#define MIN_CYCLE_CHECK_ITER 32 // Must be multiple of max cycle len +#define CYCLE_TOLERANCE (1<<18) + +// Fixed point with 6 bits to the left of the point. +// Range [-32,32) with precision 2^-26 +typedef int32_t fixed_pt_t; + +#define ESCAPE_SQUARE (4<<26) + +static inline fixed_pt_t mul(fixed_pt_t a, fixed_pt_t b) +{ + int32_t ah = a >> 13; + int32_t al = a & 0x1fff; + int32_t bh = b >> 13; + int32_t bl = b & 0x1fff; + + // Ignore al * bl as contribution to final result is only the carry. + fixed_pt_t r = ((ah * bl) + (al * bh)) >> 13; + r += ah * bh; + return r; +} + +// a * b * 2 +static inline fixed_pt_t mul2(fixed_pt_t a, fixed_pt_t b) +{ +#if 0 + int32_t ah = a >> 12; + int32_t al = a & 0xfff; + int32_t bh = b >> 13; + int32_t bl = b & 0x1fff; + + interp0->accum[0] = ah * bl; + interp0->accum[1] = al * bh; + interp0->base[2] = ah * bh; + return interp0->peek[2]; +#else + int32_t ah = a >> 12; + int32_t al = (a & 0xfff) << 1; + int32_t bh = b >> 13; + int32_t bl = b & 0x1fff; + + fixed_pt_t r = ((ah * bl) + (al * bh)) >> 13; + r += ah * bh; + return r; +#endif +} + +static inline fixed_pt_t square(fixed_pt_t a) { + int32_t ah = a >> 13; + int32_t al = a & 0x1fff; + + return ((ah * al) >> 12) + (ah * ah); +} + +fixed_pt_t make_fixed(int32_t x) { + return x << 26; +} + +fixed_pt_t make_fixedf(float x) { + return (int32_t)(x * (67108864.f)); +} + +void mandel_init() +{ + // Not curently used + interp_config cfg = interp_default_config(); + interp_config_set_add_raw(&cfg, false); + interp_config_set_shift(&cfg, 13); + interp_config_set_mask(&cfg, 0, 31 - 13); + interp_config_set_signed(&cfg, true); + interp_set_config(interp0, 0, &cfg); + interp_config_set_shift(&cfg, 12); + interp_config_set_mask(&cfg, 0, 31 - 12); + interp_set_config(interp0, 1, &cfg); +} + +void init_fractal(FractalBuffer* f) +{ + f->done = false; + f->min_iter = f->max_iter - 1; + f->iminx = make_fixedf(f->minx); + f->imaxx = make_fixedf(f->maxx); + f->iminy = make_fixedf(f->miny); + f->imaxy = make_fixedf(f->maxy); + f->incx = (f->imaxx - f->iminx) / (f->cols - 1); + f->incy = (f->imaxy - f->iminy) / (f->rows - 1); + f->count_inside = 0; + f->ipos = 0; + f->jpos = 0; + f->iend = f->rows - 1; + f->jend = f->cols - 1; +} + +static inline void generate_one(FractalBuffer* f, fixed_pt_t x0, fixed_pt_t y0, uint8_t* buffptr) +{ + fixed_pt_t x = x0; + fixed_pt_t y = y0; + + uint16_t k = 1; + for (; k < f->max_iter; ++k) { + fixed_pt_t x_square = square(x); + fixed_pt_t y_square = square(y); + if (x_square + y_square > ESCAPE_SQUARE) break; + + fixed_pt_t nextx = x_square - y_square + x0; + y = mul2(x,y) + y0; + x = nextx; + } + if (k == f->max_iter) { + *buffptr = 0; + f->count_inside++; + } else { + if (k > f->iter_offset) k -= f->iter_offset; + else k = 1; + *buffptr = k; + if (f->min_iter > k) f->min_iter = k; + } +} + +static inline void generate_one_cycle_check(FractalBuffer* f, fixed_pt_t x0, fixed_pt_t y0, uint8_t* buffptr) +{ + fixed_pt_t x = x0; + fixed_pt_t y = y0; + fixed_pt_t oldx = 0, oldy = 0; + + uint16_t k = 1; + for (; k < f->max_iter; ++k) { + fixed_pt_t x_square = square(x); + fixed_pt_t y_square = square(y); + if (x_square + y_square > ESCAPE_SQUARE) break; + + if (k >= MIN_CYCLE_CHECK_ITER) { + if ((k & (MAX_CYCLE_LEN - 1)) == 0) { + oldx = x - CYCLE_TOLERANCE; + oldy = y - CYCLE_TOLERANCE; + } + else + { + if ((uint32_t)(x - oldx) < (2*CYCLE_TOLERANCE) && (uint32_t)(y - oldy) < (2*CYCLE_TOLERANCE)) { + // Found a cycle + k = f->max_iter; + break; + } + } + } + + fixed_pt_t nextx = x_square - y_square + x0; + y = mul2(x,y) + y0; + x = nextx; + } + if (k == f->max_iter) { + *buffptr = 0; + f->count_inside++; + } else { + if (k > f->iter_offset) k -= f->iter_offset; + else k = 1; + *buffptr = k; + if (f->min_iter > k) f->min_iter = k; + } +} + +void generate_fractal(FractalBuffer* f) +{ + uint8_t* buffptr = f->buff; + + fixed_pt_t y0 = f->iminy; + int16_t i = 0; + for (; i < f->iend; ++i, y0 += f->incy) { + fixed_pt_t x0 = f->iminx; + for (int16_t j = 0; j < f->cols; ++j, x0 += f->incx) { + if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr++); + else generate_one(f, x0, y0, buffptr++); + } + } + + fixed_pt_t x0 = f->iminx; + for (int16_t j = 0; j < f->jend && i == f->iend; ++j, x0 += f->incx) { + if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr++); + else generate_one(f, x0, y0, buffptr++); + } + + f->done = true; +} + +void generate_one_forward(FractalBuffer* f) +{ + if (f->done) return; + + uint8_t* buffptr = f->buff + f->ipos * f->cols + f->jpos; + + fixed_pt_t y0 = f->iminy + f->ipos * f->incy; + fixed_pt_t x0 = f->iminx + f->jpos * f->incx; + if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr); + else generate_one(f, x0, y0, buffptr); + + if (++f->jpos == f->cols) + { + f->jpos = 0; + if (++f->ipos > f->iend) f->done = true; + } +} + +void generate_steal_one(FractalBuffer* f) +{ + if (f->done) { + return; + } + + uint8_t* buffptr = f->buff + f->iend * f->cols + f->jend; + + fixed_pt_t y0 = f->iminy + f->iend * f->incy; + fixed_pt_t x0 = f->iminx + f->jend * f->incx; + if (f->use_cycle_check) generate_one_cycle_check(f, x0, y0, buffptr); + else generate_one(f, x0, y0, buffptr); + + if (f->jend-- == 0) { + f->jend = f->cols - 1; + if (--f->iend < f->ipos) f->done = true; + } +} diff --git a/software/apps/mandel-full/mandelbrot.h b/software/apps/mandel-full/mandelbrot.h new file mode 100644 index 0000000..fba2896 --- /dev/null +++ b/software/apps/mandel-full/mandelbrot.h @@ -0,0 +1,41 @@ +// Init pico resources used for generation +void mandel_init(); + +// Fixed point with 6 bits to the left of the point. +// Range [-32,32) with precision 2^-26 +typedef int32_t fixed_pt_t; + +typedef struct { + // Configuration + uint8_t* buff; + int16_t rows; + int16_t cols; + + uint16_t max_iter; + uint16_t iter_offset; + float minx, miny, maxx, maxy; + bool use_cycle_check; + + // State + volatile bool done; + volatile uint16_t min_iter; + fixed_pt_t iminx, iminy, imaxx, imaxy; + fixed_pt_t incx, incy; + volatile uint32_t count_inside; + + int16_t ipos, jpos; + // Tracks work stealing on core 0 + volatile int16_t iend, jend; +} FractalBuffer; + +// Make a fixed_pt_t from an int or float. +fixed_pt_t make_fixed(int32_t x); +fixed_pt_t make_fixedf(float x); + +// Generate a section of the fractal into buff +// Result written to buff is 0 for inside Mandelbrot set +// Otherwise iteration of escape minus min_iter (clamped to 1) +void init_fractal(FractalBuffer* fractal); +void generate_fractal(FractalBuffer* fractal); +void generate_one_forward(FractalBuffer* f); +void generate_steal_one(FractalBuffer* f); diff --git a/software/libdvi/tmds_encode.S b/software/libdvi/tmds_encode.S index eecf62e..0bfdac9 100644 --- a/software/libdvi/tmds_encode.S +++ b/software/libdvi/tmds_encode.S @@ -395,6 +395,10 @@ tmds_2bpp_table: // DC balance defined to be 0 at start of scanline: movs r4, #0 str r4, [r2, #ACCUM1_OFFS] +#if TMDS_FULLRES_NO_DC_BALANCE + // Alternate parity between odd/even symbols if there's no balance feedback + mvns r4, r4 +#endif str r4, [r2, #ACCUM1_OFFS + INTERP1] // Keep loop start pointer in r8 so we can get a longer backward branch @@ -529,3 +533,118 @@ decl_func_x tmds_fullres_encode_loop_16bpp_leftshift_x tmds_fullres_encode_loop_16bpp_leftshift decl_func_y tmds_fullres_encode_loop_16bpp_leftshift_y tmds_fullres_encode_loop_16bpp_leftshift + + +// Variant of tmds_fullres_encode_loop_16bpp that reads +// 8-bit wide pixels packed 4 per word. The interpolator +// base is set to a reordered list of TMDS symbols based +// on a user colour palette. + +.macro tmds_palette_encode_loop + push {r4-r7, lr} + mov r4, r8 + push {r4} + + + lsls r2, #1 + add r2, r1 + mov ip, r2 + ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) + // DC balance defined to be 0 at start of scanline: + movs r4, #0 + str r4, [r2, #ACCUM1_OFFS] +#if TMDS_FULLRES_NO_DC_BALANCE + // Alternate parity between odd/even symbols if there's no balance feedback + mvns r4, r4 +#endif + str r4, [r2, #ACCUM1_OFFS + INTERP1] + + // Keep loop start pointer in r8 so we can get a longer backward branch + adr r4, 1f + adds r4, #1 // god damn thumb bit why is this a thing + mov r8, r4 + b 2f + .align 2 +1: +.rept 8 + ldmia r0!, {r3, r5} + lsrs r4, r3, #14 + lsls r3, #2 + lsrs r6, r5, #14 + lsls r5, #2 + str r3, [r2, #ACCUM0_OFFS + INTERP1] + str r3, [r2, #ACCUM0_OFFS] + ldr r3, [r2, #PEEK2_OFFS] + ldr r3, [r3] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r3, [r2, #ACCUM1_ADD_OFFS] +#endif + ldr r7, [r2, #PEEK2_OFFS + INTERP1] + ldr r7, [r7] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1] +#endif + lsls r7, #10 + orrs r3, r7 + + str r4, [r2, #ACCUM0_OFFS + INTERP1] + str r4, [r2, #ACCUM0_OFFS] + ldr r4, [r2, #PEEK2_OFFS] + ldr r4, [r4] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r4, [r2, #ACCUM1_ADD_OFFS] +#endif + ldr r7, [r2, #PEEK2_OFFS + INTERP1] + ldr r7, [r7] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1] +#endif + lsls r7, #10 + orrs r4, r7 + + str r5, [r2, #ACCUM0_OFFS + INTERP1] + str r5, [r2, #ACCUM0_OFFS] + ldr r5, [r2, #PEEK2_OFFS] + ldr r5, [r5] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r5, [r2, #ACCUM1_ADD_OFFS] +#endif + ldr r7, [r2, #PEEK2_OFFS + INTERP1] + ldr r7, [r7] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1] +#endif + lsls r7, #10 + orrs r5, r7 + + str r6, [r2, #ACCUM0_OFFS + INTERP1] + str r6, [r2, #ACCUM0_OFFS] + ldr r6, [r2, #PEEK2_OFFS] + ldr r6, [r6] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r6, [r2, #ACCUM1_ADD_OFFS] +#endif + ldr r7, [r2, #PEEK2_OFFS + INTERP1] + ldr r7, [r7] +#if !TMDS_FULLRES_NO_DC_BALANCE + str r7, [r2, #ACCUM1_ADD_OFFS + INTERP1] +#endif + lsls r7, #10 + orrs r6, r7 + + stmia r1!, {r3, r4, r5, r6} +.endr +2: + cmp r1, ip + beq 1f + bx r8 +1: + pop {r4} + mov r8, r4 + pop {r4-r7, pc} +.endm + +decl_func_x tmds_palette_encode_loop_x + tmds_palette_encode_loop +decl_func_y tmds_palette_encode_loop_y + tmds_palette_encode_loop diff --git a/software/libdvi/tmds_encode.c b/software/libdvi/tmds_encode.c index a031ddc..367fc0d 100644 --- a/software/libdvi/tmds_encode.c +++ b/software/libdvi/tmds_encode.c @@ -162,3 +162,85 @@ void __not_in_flash_func(tmds_encode_data_channel_fullres_16bpp)(const uint32_t interp_restore(interp1_hw, &interp1_save); #endif } + +// This takes a 16-bit (RGB 565) colour palette and makes palettes of TMDS symbols suitable +// for performing fullres encode. +// The TMDS palette buffer should be 6 * n_palette words long. +// n_palette must be a power of 2 <= 256. +void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *tmds_palette, size_t n_palette) { + uint32_t* tmds_palette_blue = tmds_palette; + uint32_t* tmds_palette_green = tmds_palette + 2 * n_palette; + uint32_t* tmds_palette_red = tmds_palette + 4 * n_palette; + for (int i = 0; i < n_palette; ++i) { + uint16_t blue = (palette[i] << 1) & 0x3e; + uint16_t green = (palette[i] >> 5) & 0x3f; + uint16_t red = (palette[i] >> 10) & 0x3e; + tmds_palette_blue[i] = tmds_table_fullres_x[blue]; + tmds_palette_blue[i + n_palette] = tmds_table_fullres_x[64 + blue]; + tmds_palette_green[i] = tmds_table_fullres_x[green]; + tmds_palette_green[i + n_palette] = tmds_table_fullres_x[64 + green]; + tmds_palette_red[i] = tmds_table_fullres_x[red]; + tmds_palette_red[i + n_palette] = tmds_table_fullres_x[64 + red]; + } +} + +// Encode palette data for all 3 channels. +// pixbuf is an array of n_pix 8-bit wide pixels containing palette values (32-bit word aligned) +// tmds_palette is a palette of TMDS symbols produced by tmds_setup_palette_symbols +// symbuf is 3*n_pix 32-bit words, this function writes the symbol values for each of the channels to it. +void __not_in_flash_func(tmds_encode_palette_data)(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits) { + uint core = get_core_num(); +#if !TMDS_FULLRES_NO_INTERP_SAVE + interp_hw_save_t interp0_save, interp1_save; + interp_save(interp0_hw, &interp0_save); + interp_save(interp1_hw, &interp1_save); +#endif + + interp0_hw->base[2] = (uint32_t)tmds_palette; + interp1_hw->base[2] = (uint32_t)tmds_palette; + + // Lane 0 on both interpolators masks the palette bits, starting at bit 2, + // The second interpolator also shifts to read the 2nd or 4th byte of the word. + interp0_hw->ctrl[0] = + (2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) | + ((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB); + interp1_hw->ctrl[0] = + (8 << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) | + (2 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) | + ((palette_bits + 1) << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB); + + // Lane 1 shifts and masks the sign bit into the right position to add to the symbol + // table index to choose the negative disparity symbols if the sign is negative. + const uint32_t ctrl_lane_1 = + ((31 - (palette_bits + 2)) << SIO_INTERP0_CTRL_LANE0_SHIFT_LSB) | + (palette_bits + 2) * ((1 << SIO_INTERP0_CTRL_LANE0_MASK_LSB_LSB) | (1 << SIO_INTERP0_CTRL_LANE0_MASK_MSB_LSB)); + interp0_hw->ctrl[1] = ctrl_lane_1; + interp1_hw->ctrl[1] = ctrl_lane_1; + + if (core) { + tmds_palette_encode_loop_x(pixbuf, symbuf, n_pix); + + interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits)); + interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits)); + tmds_palette_encode_loop_x(pixbuf, symbuf + (n_pix >> 1), n_pix); + + interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits)); + interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits)); + tmds_palette_encode_loop_x(pixbuf, symbuf + n_pix, n_pix); + } else { + tmds_palette_encode_loop_y(pixbuf, symbuf, n_pix); + + interp0_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits)); + interp1_hw->base[2] = (uint32_t)(tmds_palette + (2 << palette_bits)); + tmds_palette_encode_loop_y(pixbuf, symbuf + (n_pix >> 1), n_pix); + + interp0_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits)); + interp1_hw->base[2] = (uint32_t)(tmds_palette + (4 << palette_bits)); + tmds_palette_encode_loop_y(pixbuf, symbuf + n_pix, n_pix); + } + +#if !TMDS_FULLRES_NO_INTERP_SAVE + interp_restore(interp0_hw, &interp0_save); + interp_restore(interp1_hw, &interp1_save); +#endif +} diff --git a/software/libdvi/tmds_encode.h b/software/libdvi/tmds_encode.h index 524a7d6..18695a5 100644 --- a/software/libdvi/tmds_encode.h +++ b/software/libdvi/tmds_encode.h @@ -8,6 +8,8 @@ void tmds_encode_data_channel_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); void tmds_encode_data_channel_8bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); void tmds_encode_data_channel_fullres_16bpp(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint channel_msb, uint channel_lsb); +void tmds_setup_palette_symbols(const uint16_t *palette, uint32_t *symbuf, size_t n_palette); +void tmds_encode_palette_data(const uint32_t *pixbuf, const uint32_t *tmds_palette, uint32_t *symbuf, size_t n_pix, uint32_t palette_bits); // Functions from tmds_encode.S @@ -28,5 +30,7 @@ void tmds_fullres_encode_loop_16bpp_x(const uint32_t *pixbuf, uint32_t *symbuf, void tmds_fullres_encode_loop_16bpp_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); void tmds_fullres_encode_loop_16bpp_leftshift_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift); void tmds_fullres_encode_loop_16bpp_leftshift_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix, uint leftshift); +void tmds_palette_encode_loop_x(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); +void tmds_palette_encode_loop_y(const uint32_t *pixbuf, uint32_t *symbuf, size_t n_pix); #endif