From 58cc97e29fccd80d6803e1c763c762f03eb1a2e4 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sun, 28 Feb 2021 11:23:38 +0000 Subject: [PATCH] Hacking on 2 symbols per word (single->diff inside PIO), it compiles, and that is the best I can say about it --- software/include/common_dvi_pin_configs.h | 2 +- software/libdvi/CMakeLists.txt | 1 + software/libdvi/dvi.c | 20 ++-- software/libdvi/dvi_config_defs.h | 16 ++- software/libdvi/dvi_serialiser.c | 56 ++++----- software/libdvi/dvi_serialiser.h | 1 - software/libdvi/dvi_serialiser.pio | 82 +++++--------- software/libdvi/dvi_timing.c | 53 +++++---- software/libdvi/tmds_encode.S | 52 ++++----- software/libdvi/tmds_table.h | 132 +++++++++++----------- software/libdvi/tmds_table_gen.py | 38 +++++-- 11 files changed, 234 insertions(+), 219 deletions(-) diff --git a/software/include/common_dvi_pin_configs.h b/software/include/common_dvi_pin_configs.h index c562e50..d853283 100644 --- a/software/include/common_dvi_pin_configs.h +++ b/software/include/common_dvi_pin_configs.h @@ -8,7 +8,7 @@ #include "dvi_serialiser.h" #ifndef DEFAULT_DVI_SERIAL_CONFIG -#define DEFAULT_DVI_SERIAL_CONFIG picodvi_dvi_cfg +#define DEFAULT_DVI_SERIAL_CONFIG pico_sock_cfg #endif // ---------------------------------------------------------------------------- diff --git a/software/libdvi/CMakeLists.txt b/software/libdvi/CMakeLists.txt index d615140..7c52661 100644 --- a/software/libdvi/CMakeLists.txt +++ b/software/libdvi/CMakeLists.txt @@ -26,6 +26,7 @@ target_link_libraries(libdvi INTERFACE hardware_dma hardware_interp hardware_pio + hardware_pwm ) pico_generate_pio_header(libdvi ${CMAKE_CURRENT_LIST_DIR}/dvi_serialiser.pio) diff --git a/software/libdvi/dvi.c b/software/libdvi/dvi.c index 3efbe20..7b827b6 100644 --- a/software/libdvi/dvi.c +++ b/software/libdvi/dvi.c @@ -43,9 +43,9 @@ void dvi_init(struct dvi_inst *inst, uint spinlock_tmds_queue, uint spinlock_col for (int i = 0; i < DVI_N_TMDS_BUFFERS; ++i) { void *tmdsbuf; #if DVI_MONOCHROME_TMDS - tmdsbuf = malloc(inst->timing->h_active_pixels * sizeof(uint32_t)); + tmdsbuf = malloc(inst->timing->h_active_pixels / DVI_SYMBOLS_PER_WORD * sizeof(uint32_t)); #else - tmdsbuf = malloc(3 * inst->timing->h_active_pixels * sizeof(uint32_t)); + tmdsbuf = malloc(3 * inst->timing->h_active_pixels / DVI_SYMBOLS_PER_WORD * sizeof(uint32_t)); #endif if (!tmdsbuf) panic("TMDS buffer allocation failed"); @@ -118,6 +118,7 @@ static inline void __dvi_func_x(_dvi_prepare_scanline_8bpp)(struct dvi_inst *ins uint32_t *tmdsbuf; queue_remove_blocking_u32(&inst->q_tmds_free, &tmdsbuf); uint pixwidth = inst->timing->h_active_pixels; + uint words_per_channel = pixwidth / DVI_SYMBOLS_PER_WORD; // TODO maybe want to make this configurable one day // anyhoo we are abutting the buffers in TMDS channel order const uint red_msb = 7; @@ -126,10 +127,10 @@ static inline void __dvi_func_x(_dvi_prepare_scanline_8bpp)(struct dvi_inst *ins const uint green_lsb = 2; const uint blue_msb = 1; const uint blue_lsb = 0; - // NB the scanline buffers are half-resolution! - tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf, pixwidth / 2, blue_msb, blue_lsb); - tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + pixwidth, pixwidth / 2, green_msb, green_lsb); - tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 2 * pixwidth, pixwidth / 2, red_msb, red_lsb); + // Scanline buffers are half-resolution; the functions take the number of *input* pixels as parameter. + tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 0 * words_per_channel, pixwidth / 2, blue_msb, blue_lsb); + tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 1 * words_per_channel, pixwidth / 2, green_msb, green_lsb); + tmds_encode_data_channel_8bpp(scanbuf, tmdsbuf + 2 * words_per_channel, pixwidth / 2, red_msb, red_lsb); queue_add_blocking_u32(&inst->q_tmds_valid, &tmdsbuf); } @@ -137,15 +138,16 @@ static inline void __dvi_func_x(_dvi_prepare_scanline_16bpp)(struct dvi_inst *in uint32_t *tmdsbuf; queue_remove_blocking_u32(&inst->q_tmds_free, &tmdsbuf); uint pixwidth = inst->timing->h_active_pixels; + uint words_per_channel = pixwidth / DVI_SYMBOLS_PER_WORD; const uint red_msb = 15; const uint red_lsb = 11; const uint green_msb = 10; const uint green_lsb = 5; const uint blue_msb = 4; const uint blue_lsb = 0; - tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf, pixwidth / 2, blue_msb, blue_lsb); - tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + pixwidth, pixwidth / 2, green_msb, green_lsb); - tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 2 * pixwidth, pixwidth / 2, red_msb, red_lsb); + tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 0 * words_per_channel, pixwidth / 2, blue_msb, blue_lsb); + tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 1 * words_per_channel, pixwidth / 2, green_msb, green_lsb); + tmds_encode_data_channel_16bpp(scanbuf, tmdsbuf + 2 * words_per_channel, pixwidth / 2, red_msb, red_lsb); queue_add_blocking_u32(&inst->q_tmds_valid, &tmdsbuf); } diff --git a/software/libdvi/dvi_config_defs.h b/software/libdvi/dvi_config_defs.h index 3245787..a8992f9 100644 --- a/software/libdvi/dvi_config_defs.h +++ b/software/libdvi/dvi_config_defs.h @@ -7,7 +7,7 @@ // target_compile_definitions()) // Pull in base headers to make sure board definitions override the -// definitions provided here. +// definitions provided here. Note this file is included in asm and C. #include "hardware/platform_defs.h" #include "pico/config.h" @@ -40,6 +40,20 @@ #define DVI_MONOCHROME_TMDS 0 #endif +// By default, we assume each 32-bit word written to a PIO FIFO contains 2x +// 10-bit TMDS symbols, concatenated into the lower 20 bits, least-significant +// first. This is convenient if you are generating two or more pixels at once, +// e.g. using the pixel-doubling TMDS encode. You can change this value to 1 +// (so each word contains 1 symbol) for e.g. full resolution RGB encode. Note +// that this value needs to divide the DVI horizontal timings, so is limited +// to 1 or 2. +#ifndef DVI_SYMBOLS_PER_WORD +#define DVI_SYMBOLS_PER_WORD 2 +#endif + +#if DVI_SYMBOLS_PER_WORD != 1 && DVI_SYMBOLS_PER_WORD !=2 +#error "Unsupported value for DVI_SYMBOLS_PER_WORD" +#endif // ---------------------------------------------------------------------------- // TMDS encode controls diff --git a/software/libdvi/dvi_serialiser.c b/software/libdvi/dvi_serialiser.c index 823bf30..f740680 100644 --- a/software/libdvi/dvi_serialiser.c +++ b/software/libdvi/dvi_serialiser.c @@ -1,16 +1,17 @@ #include "pico.h" #include "hardware/pio.h" #include "hardware/gpio.h" +#include "hardware/pwm.h" #include "hardware/structs/padsbank0.h" #include "dvi.h" #include "dvi_serialiser.h" #include "dvi_serialiser.pio.h" -static void dvi_init_gpio(uint gpio, bool invert) { +static void dvi_configure_pad(uint gpio, bool invert) { // 2 mA drive, enable slew rate limiting (this seems fine even at 720p30, and // the 3V3 LDO doesn't get warm like when turning all the GPIOs up to 11). - // Also disable digital reciever. + // Also disable digital receiver. hw_write_masked( &padsbank0_hw->io[gpio], (0 << PADS_BANK0_GPIO0_DRIVE_LSB), @@ -22,52 +23,51 @@ static void dvi_init_gpio(uint gpio, bool invert) { void dvi_serialiser_init(struct dvi_serialiser_cfg *cfg) { #if DVI_SERIAL_DEBUG uint offset = pio_add_program(cfg->pio, &dvi_serialiser_debug_program); - uint offset_clk = offset; #else uint offset = pio_add_program(cfg->pio, &dvi_serialiser_program); - uint offset_clk = pio_add_program(cfg->pio, &dvi_serialiser_clk_program); #endif cfg->prog_offs = offset; - cfg->prog_offs_clk = offset_clk; for (int i = 0; i < N_TMDS_LANES; ++i) { pio_sm_claim(cfg->pio, cfg->sm_tmds[i]); dvi_serialiser_program_init( cfg->pio, cfg->sm_tmds[i], - i == TMDS_SYNC_LANE ? offset_clk : offset, + offset, cfg->pins_tmds[i], - cfg->pins_clk, - i == TMDS_SYNC_LANE, DVI_SERIAL_DEBUG ); - dvi_init_gpio(cfg->pins_tmds[i], cfg->invert_diffpairs); - dvi_init_gpio(cfg->pins_tmds[i] + 1, cfg->invert_diffpairs); + dvi_configure_pad(cfg->pins_tmds[i], cfg->invert_diffpairs); + dvi_configure_pad(cfg->pins_tmds[i] + 1, cfg->invert_diffpairs); } - dvi_init_gpio(cfg->pins_clk, cfg->invert_diffpairs); - dvi_init_gpio(cfg->pins_clk + 1, cfg->invert_diffpairs); + + // Use a PWM slice to drive the pixel clock. Both GPIOs must be on the same + // slice (lower-numbered GPIO must be even). + assert(cfg->pins_clk % 2 == 0); + uint slice = pwm_gpio_to_slice_num(cfg->pins_clk); + // 5 cycles high, 5 low. Invert one channel so that we get complementary outputs. + const uint pwm_wrap = 10 - 1; + const uint pwm_level = pwm_wrap / 2; + pwm_config pwm_cfg = pwm_get_default_config(); + pwm_config_set_output_polarity(&pwm_cfg, true, false); + pwm_config_set_wrap(&pwm_cfg, pwm_wrap); + pwm_init(slice, &pwm_cfg, false); + pwm_set_both_levels(slice, pwm_level, pwm_level); + + dvi_configure_pad(cfg->pins_clk, cfg->invert_diffpairs); + dvi_configure_pad(cfg->pins_clk + 1, cfg->invert_diffpairs); } void dvi_serialiser_enable(struct dvi_serialiser_cfg *cfg, bool enable) { uint mask = 0; for (int i = 0; i < N_TMDS_LANES; ++i) mask |= 1u << (cfg->sm_tmds[i] + PIO_CTRL_SM_ENABLE_LSB); - if (enable) + if (enable) { hw_set_bits(&cfg->pio->ctrl, mask); - else - hw_clear_bits(&cfg->pio->ctrl, mask); -} - -uint32_t dvi_single_to_diff(uint32_t in) { - uint32_t accum = 0; - const uint TMDS_SIZE = 10; - for (int i = 0; i < TMDS_SIZE; ++i) { - accum <<= 2; - if (in & 1 << (TMDS_SIZE - 1)) - accum |= 0x1; - else - accum |= 0x2; - in <<= 1; + pwm_set_enabled(pwm_gpio_to_slice_num(cfg->pins_clk), true); + } + else { + hw_clear_bits(&cfg->pio->ctrl, mask); + pwm_set_enabled(pwm_gpio_to_slice_num(cfg->pins_clk), false); } - return accum; } diff --git a/software/libdvi/dvi_serialiser.h b/software/libdvi/dvi_serialiser.h index 9e0845b..d978f60 100644 --- a/software/libdvi/dvi_serialiser.h +++ b/software/libdvi/dvi_serialiser.h @@ -13,7 +13,6 @@ struct dvi_serialiser_cfg { uint pins_clk; bool invert_diffpairs; uint prog_offs; - uint prog_offs_clk; }; void dvi_serialiser_init(struct dvi_serialiser_cfg *cfg); diff --git a/software/libdvi/dvi_serialiser.pio b/software/libdvi/dvi_serialiser.pio index cb0720a..520c8e0 100644 --- a/software/libdvi/dvi_serialiser.pio +++ b/software/libdvi/dvi_serialiser.pio @@ -1,73 +1,51 @@ -.program dvi_serialiser_clk -.side_set 2 - -.wrap_target - out pins, 2 side 0b10 - out pins, 2 side 0b10 - out pins, 2 side 0b10 - out pins, 2 side 0b10 - out pins, 2 side 0b10 - out pins, 2 side 0b01 - out pins, 2 side 0b01 - out pins, 2 side 0b01 - out pins, 2 side 0b01 - out pins, 2 side 0b01 -.wrap - - .program dvi_serialiser +.side_set 2 +.origin 0 -.wrap_target - out pins, 2 -.wrap +; Single-ended -> differential serial + out pc, 1 side 0b10 + out pc, 1 side 0b01 + +.program dvi_serialiser_debug +.side_set 1 opt ; The debug variant behaves as a UART with 1 start bit, 10 data bits, 1 stop ; bit, and 5/6ths the data throughput of the TMDS version. -.program dvi_serialiser_debug -.side_set 2 opt - -.wrap_target - pull side 0x1 ; FIFO stall extends stop bit - nop side 0x2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 - out pins, 2 -.wrap - + pull ifempty side 1 ; Extend stop bit with FIFO stall + nop side 0 + out pins, 1 ; Unrolled because we require 1 bit / clk + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 + out pins, 1 % c-sdk { -static inline void dvi_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pins, uint clk_pins, bool clk, bool debug) { - pio_sm_set_pins_with_mask(pio, sm, 1u << data_pins | 1u << clk_pins, 3u << data_pins | 3u << clk_pins); - pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 3u << data_pins | 3u << clk_pins); - // Pseudo-differential pairs: +#include "dvi_config_defs.h" + +static inline void dvi_serialiser_program_init(PIO pio, uint sm, uint offset, uint data_pins, bool debug) { + pio_sm_set_pins_with_mask(pio, sm, 2u << data_pins, 3u << data_pins); + pio_sm_set_pindirs_with_mask(pio, sm, ~0u, 3u << data_pins); pio_gpio_init(pio, data_pins); pio_gpio_init(pio, data_pins + 1); - pio_gpio_init(pio, clk_pins); - pio_gpio_init(pio, clk_pins + 1); + pio_sm_config c; if (debug) { c = dvi_serialiser_debug_program_get_default_config(offset); - sm_config_set_sideset_pins(&c, data_pins); - } - else if (clk) { - c = dvi_serialiser_clk_program_get_default_config(offset); - sm_config_set_sideset_pins(&c, clk_pins); } else { c = dvi_serialiser_program_get_default_config(offset); } - sm_config_set_out_pins(&c, data_pins, 2); - // Each TMDS symbol is 10 pairs of pseudo-differential bits: - sm_config_set_out_shift(&c, true, !debug, 20); + sm_config_set_sideset_pins(&c, data_pins); + if (debug) + sm_config_set_out_pins(&c, data_pins, 1); + sm_config_set_out_shift(&c, true, !debug, 10 * DVI_SYMBOLS_PER_WORD); sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX); pio_sm_init(pio, sm, offset, &c); pio_sm_set_enabled(pio, sm, false); diff --git a/software/libdvi/dvi_timing.c b/software/libdvi/dvi_timing.c index 958d0b2..aa8308f 100644 --- a/software/libdvi/dvi_timing.c +++ b/software/libdvi/dvi_timing.c @@ -190,21 +190,25 @@ const struct dvi_timing __dvi_const(dvi_timing_1600x900p_reduced_30hz) = { // four regular IRQs per scanline and return early from 3 of them, but this // breaks down when you have very short scanline sections like guard bands. -// Note we particularly want these to be in memory because these addresses get -// a LOT of DMA traffic! +// Each symbol appears twice, concatenated in one word. Note these must be in +// RAM because they see a lot of DMA traffic const uint32_t __dvi_const(dvi_ctrl_syms)[4] = { - 0x5999a, - 0xa6665, - 0x9999a, - 0x66665, + 0xd5354, + 0x2acab, + 0x55154, + 0xaaeab }; // Output solid red scanline if we are given NULL for tmdsbuff -static uint32_t __attribute__((aligned(8))) __dvi_const(empty_scanline_tmds)[6] = { - 0x9aaaa, 0x95555, // 0x00 - 0x9aaaa, 0x95555, // 0x00 - 0x6aaa9, 0x65556 // 0xfc +#if DVI_SYMBOLS_PER_WORD == 2 +static uint32_t __attribute__((aligned(8))) __dvi_const(empty_scanline_tmds)[3] = { + 0x523520u, // 0x00 + 0x523520u, // 0x00 + 0x784897u // 0xfc }; +#else +#error "Can't handle empty scanlines with pixel-per-word right now" +#endif void dvi_timing_state_init(struct dvi_timing_state *t) { t->v_ctr = 0; @@ -255,17 +259,17 @@ void dvi_setup_scanline_for_vblank(const struct dvi_timing *t, const struct dvi_ const uint32_t *sym_no_sync = get_ctrl_sym(false, false ); dma_cb_t *synclist = dvi_lane_from_list(l, TMDS_SYNC_LANE); - _set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch, 2, false); - _set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width, 2, false); - _set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch, 2, true); - _set_data_cb(&synclist[3], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_active_pixels, 2, false); + _set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch / DVI_SYMBOLS_PER_WORD, 2, false); + _set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width / DVI_SYMBOLS_PER_WORD, 2, false); + _set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch / DVI_SYMBOLS_PER_WORD, 2, true); + _set_data_cb(&synclist[3], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 2, false); for (int i = 0; i < N_TMDS_LANES; ++i) { if (i == TMDS_SYNC_LANE) continue; dma_cb_t *cblist = dvi_lane_from_list(l, i); - _set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync, t->h_front_porch + t->h_sync_width + t->h_back_porch, 2, false); - _set_data_cb(&cblist[1], &dma_cfg[i], sym_no_sync, t->h_active_pixels, 2, false); + _set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync,(t->h_front_porch + t->h_sync_width + t->h_back_porch) / DVI_SYMBOLS_PER_WORD, 2, false); + _set_data_cb(&cblist[1], &dma_cfg[i], sym_no_sync, t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 2, false); } } @@ -277,23 +281,26 @@ void dvi_setup_scanline_for_active(const struct dvi_timing *t, const struct dvi_ const uint32_t *sym_no_sync = get_ctrl_sym(false, false ); dma_cb_t *synclist = dvi_lane_from_list(l, TMDS_SYNC_LANE); - _set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch, 2, false); - _set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width, 2, false); - _set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch, 2, true); + _set_data_cb(&synclist[0], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_front_porch / DVI_SYMBOLS_PER_WORD, 2, false); + _set_data_cb(&synclist[1], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_on, t->h_sync_width / DVI_SYMBOLS_PER_WORD, 2, false); + _set_data_cb(&synclist[2], &dma_cfg[TMDS_SYNC_LANE], sym_hsync_off, t->h_back_porch / DVI_SYMBOLS_PER_WORD, 2, true); for (int i = 0; i < N_TMDS_LANES; ++i) { dma_cb_t *cblist = dvi_lane_from_list(l, i); if (i != TMDS_SYNC_LANE) { - _set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync, t->h_front_porch + t->h_sync_width + t->h_back_porch, 2, false); + _set_data_cb(&cblist[0], &dma_cfg[i], sym_no_sync, + (t->h_front_porch + t->h_sync_width + t->h_back_porch) / DVI_SYMBOLS_PER_WORD, 2, false); } int target_block = i == TMDS_SYNC_LANE ? DVI_SYNC_LANE_CHUNKS - 1 : DVI_NOSYNC_LANE_CHUNKS - 1; if (tmdsbuf) { // Non-repeating DMA for the freshly-encoded TMDS buffer - _set_data_cb(&cblist[target_block], &dma_cfg[i], tmdsbuf + i * t->h_active_pixels, t->h_active_pixels, 0, false); + _set_data_cb(&cblist[target_block], &dma_cfg[i], tmdsbuf + i * (t->h_active_pixels / DVI_SYMBOLS_PER_WORD), + t->h_active_pixels / DVI_SYMBOLS_PER_WORD, 0, false); } else { // 8-byte read ring mode to repeat the correct DC-balanced symbol pair on blank scanlines - _set_data_cb(&cblist[target_block], &dma_cfg[i], &empty_scanline_tmds[2 * i], t->h_active_pixels, 3, false); + _set_data_cb(&cblist[target_block], &dma_cfg[i], &empty_scanline_tmds[2 * i / DVI_SYMBOLS_PER_WORD], + t->h_active_pixels / DVI_SYMBOLS_PER_WORD, DVI_SYMBOLS_PER_WORD == 2 ? 3 : 2, false); } } } @@ -303,7 +310,7 @@ void __dvi_func(dvi_update_scanline_data_dma)(const struct dvi_timing *t, const #if DVI_MONOCHROME_TMDS const uint32_t *lane_tmdsbuf = tmdsbuf; #else - const uint32_t *lane_tmdsbuf = tmdsbuf + i * t->h_active_pixels; + const uint32_t *lane_tmdsbuf = tmdsbuf + i * t->h_active_pixels / DVI_SYMBOLS_PER_WORD; #endif if (i == TMDS_SYNC_LANE) dvi_lane_from_list(l, i)[3].read_addr = lane_tmdsbuf; diff --git a/software/libdvi/tmds_encode.S b/software/libdvi/tmds_encode.S index 3e21708..9a68fd1 100644 --- a/software/libdvi/tmds_encode.S +++ b/software/libdvi/tmds_encode.S @@ -44,7 +44,7 @@ decl_func tmds_encode_loop_16bpp push {r4, r5, r6, r7, lr} - lsls r2, #3 + lsls r2, #2 add r2, r1 mov ip, r2 ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) @@ -55,10 +55,12 @@ decl_func tmds_encode_loop_16bpp ldmia r0!, {r4} str r4, [r2, #ACCUM0_OFFS] ldr r4, [r2, #PEEK0_OFFS] - ldmia r4, {r4, r5} + ldr r4, [r4] ldr r6, [r2, #PEEK1_OFFS] - ldmia r6, {r6, r7} - stmia r1!, {r4, r5, r6, r7} + ldr r6, [r6] + // TODO our pixels are now 2 per word instead of 1 per word, so this store is + // now 2 words instead of 4; reexpand it. + stmia r1!, {r4, r6} .endr 2: cmp r1, ip @@ -75,7 +77,7 @@ decl_func tmds_encode_loop_16bpp decl_func tmds_encode_loop_16bpp_leftshift push {r4, r5, r6, r7, lr} - lsls r2, #3 + lsls r2, #2 add r2, r1 mov ip, r2 ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) @@ -87,10 +89,10 @@ decl_func tmds_encode_loop_16bpp_leftshift lsls r4, r3 str r4, [r2, #ACCUM0_OFFS] ldr r4, [r2, #PEEK0_OFFS] - ldmia r4, {r4, r5} + ldr r4, [r4] ldr r6, [r2, #PEEK1_OFFS] - ldmia r6, {r6, r7} - stmia r1!, {r4, r5, r6, r7} + ldr r6, [r6] + stmia r1!, {r4, r6} .endr 2: cmp r1, ip @@ -103,7 +105,7 @@ decl_func tmds_encode_loop_16bpp_leftshift decl_func tmds_encode_loop_8bpp push {r4, r5, r6, r7, lr} - lsls r2, #3 + lsls r2, #2 add r2, r1 mov ip, r2 ldr r2, =(SIO_BASE + SIO_INTERP0_ACCUM0_OFFSET) @@ -112,17 +114,16 @@ decl_func tmds_encode_loop_8bpp 1: .rept TMDS_ENCODE_UNROLL ldmia r0!, {r4} - str r4, [r2, #ACCUM0_OFFS] str r4, [r2, #ACCUM0_OFFS + INTERP1] + str r4, [r2, #ACCUM0_OFFS] ldr r4, [r2, #PEEK0_OFFS] - ldmia r4, {r4, r5} - ldr r6, [r2, #PEEK1_OFFS] - ldmia r6, {r6, r7} - stmia r1!, {r4, r5, r6, r7} - ldr r4, [r2, #PEEK0_OFFS + INTERP1] - ldmia r4, {r4, r5} - ldr r6, [r2, #PEEK1_OFFS + INTERP1] - ldmia r6, {r6, r7} + ldr r4, [r4] + ldr r5, [r2, #PEEK1_OFFS] + ldr r5, [r5] + ldr r6, [r2, #PEEK0_OFFS + INTERP1] + ldr r6, [r6] + ldr r7, [r2, #PEEK1_OFFS + INTERP1] + ldr r7, [r7] stmia r1!, {r4, r5, r6, r7} .endr 2: @@ -155,14 +156,13 @@ decl_func tmds_encode_loop_8bpp_leftshift lsls r4, r3 str r4, [r2, #ACCUM0_OFFS] ldr r4, [r2, #PEEK0_OFFS] - ldmia r4, {r4, r5} - ldr r6, [r2, #PEEK1_OFFS] - ldmia r6, {r6, r7} - stmia r1!, {r4, r5, r6, r7} - ldr r4, [r2, #PEEK0_OFFS + INTERP1] - ldmia r4, {r4, r5} - ldr r6, [r2, #PEEK1_OFFS + INTERP1] - ldmia r6, {r6, r7} + ldr r4, [r4] + ldr r5, [r2, #PEEK1_OFFS] + ldr r5, [r5] + ldr r6, [r2, #PEEK0_OFFS + INTERP1] + ldr r6, [r6] + ldr r7, [r2, #PEEK1_OFFS + INTERP1] + ldr r7, [r7] stmia r1!, {r4, r5, r6, r7} .endr 2: diff --git a/software/libdvi/tmds_table.h b/software/libdvi/tmds_table.h index ce7c52d..216100d 100644 --- a/software/libdvi/tmds_table.h +++ b/software/libdvi/tmds_table.h @@ -4,73 +4,73 @@ // with data content *almost* equal (1 LSB off) to input value left shifted by // two. The pairs of symbols have a net DC balance of 0. // -// Each symbol is represented by a 20 bit value consisting of 10 differential -// bit pairs. +// The two symbols are concatenated in the 20 LSBs of a data word, with the +// first symbol in least-significant position. // // Note the declaration isn't included here, just the table body. This is in // case you want multiple copies of the table in different SRAMs (particularly // scratch X/Y). -0x9aaaa, 0x95555, -0x9555a, 0x9aaa5, -0x9556a, 0x9aa95, -0x9aa9a, 0x95565, -0x955aa, 0x9aa55, -0x9aa5a, 0x955a5, -0x9aa6a, 0x95595, -0x9559a, 0x9aa65, -0x956aa, 0x9a955, -0x9a95a, 0x956a5, -0x9a96a, 0x95695, -0x9569a, 0x9a965, -0x9a9aa, 0x95655, -0x9565a, 0x9a9a5, -0x9566a, 0x9a995, -0x69aa9, 0x66556, -0x95aaa, 0x9a555, -0x9a55a, 0x95aa5, -0x9a56a, 0x95a95, -0x95a9a, 0x9a565, -0x9a5aa, 0x95a55, -0x95a5a, 0x9a5a5, -0x95a6a, 0x9a595, -0x696a9, 0x66956, -0x9a6aa, 0x95955, -0x9595a, 0x9a6a5, -0x9596a, 0x9a695, -0x695a9, 0x66a56, -0x959aa, 0x9a655, -0x69569, 0x66a96, -0x69559, 0x66aa6, -0x66aa9, 0x69556, -0x96aaa, 0x99555, -0x9955a, 0x96aa5, -0x9956a, 0x96a95, -0x96a9a, 0x99565, -0x995aa, 0x96a55, -0x96a5a, 0x995a5, -0x96a6a, 0x99595, -0x6a6a9, 0x65956, -0x996aa, 0x96955, -0x9695a, 0x996a5, -0x9696a, 0x99695, -0x6a5a9, 0x65a56, -0x969aa, 0x99655, -0x6a569, 0x65a96, -0x6a559, 0x65aa6, -0x65aa9, 0x6a556, -0x99aaa, 0x96555, -0x9655a, 0x99aa5, -0x9656a, 0x99a95, -0x6a9a9, 0x65656, -0x965aa, 0x99a55, -0x6a969, 0x65696, -0x6a959, 0x656a6, -0x656a9, 0x6a956, -0x966aa, 0x99955, -0x6aa69, 0x65596, -0x6aa59, 0x655a6, -0x655a9, 0x6aa56, -0x6aa99, 0x65566, -0x65569, 0x6aa96, -0x65559, 0x6aaa6, -0x6aaa9, 0x65556, +0x523520u, +0x265724u, +0x269816u, +0x519428u, +0x278000u, +0x511244u, +0x515336u, +0x273908u, +0x294368u, +0x494876u, +0x498968u, +0x290276u, +0x507152u, +0x282092u, +0x286184u, +0x719425u, +0x327104u, +0x462140u, +0x466232u, +0x323012u, +0x474416u, +0x314828u, +0x318920u, +0x686689u, +0x490784u, +0x298460u, +0x302552u, +0x670321u, +0x310736u, +0x662137u, +0x658045u, +0x653953u, +0x392576u, +0x396668u, +0x400760u, +0x388484u, +0x408944u, +0x380300u, +0x384392u, +0x752161u, +0x425312u, +0x363932u, +0x368024u, +0x735793u, +0x376208u, +0x727609u, +0x723517u, +0x588481u, +0x458048u, +0x331196u, +0x335288u, +0x768529u, +0x343472u, +0x760345u, +0x756253u, +0x555745u, +0x359840u, +0x776713u, +0x772621u, +0x539377u, +0x780805u, +0x531193u, +0x527101u, +0x784897u, diff --git a/software/libdvi/tmds_table_gen.py b/software/libdvi/tmds_table_gen.py index 8b39b35..e66c0da 100755 --- a/software/libdvi/tmds_table_gen.py +++ b/software/libdvi/tmds_table_gen.py @@ -81,20 +81,34 @@ def differentialise(x, n): enc = TMDSEncode() -def disptable_format(sym): - return differentialise(sym, 10) | ((popcount(sym) * 2 - 10 & 0x3f) << 26) +### +# Pixel-doubled table: -print("// Non-negative running disparity:") -for i in range(0, 256, 4): - enc.imbalance = 1 - print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1)))) +# for i in range(0, 256, 4): +# sym0 = enc.encode(i, 0, 1) +# sym1 = enc.encode(i ^ 1, 0, 1) +# print(f"0x{sym0 | (sym1 << 10)}u,") -print("// Negative running disparity:") -for i in range(0, 256, 4): - enc.imbalance = -1 - print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1)))) +### +# Fullres table stuff: +# def disptable_format(sym): +# return differentialise(sym, 10) | ((popcount(sym) * 2 - 10 & 0x3f) << 26) -# for i in range(4): -# print("0x{:05x},".format(differentialise(enc.encode(0, i, 0), 10))) +# print("// Non-negative running disparity:") +# for i in range(0, 256, 4): +# enc.imbalance = 1 +# print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1)))) + +# print("// Negative running disparity:") +# for i in range(0, 256, 4): +# enc.imbalance = -1 +# print("0x{:08x},".format(disptable_format(enc.encode(i, 0, 1)))) + +### +# Control symbols: + +for i in range(4): + sym = enc.encode(0, i, 0) + print(f"0x{sym << 10 | sym:05x},")