kopia lustrzana https://github.com/hoglet67/RGBtoHDMI
Further speed optimisations for fastest capture loops
rodzic
68c49f461c
commit
503b904297
|
@ -50,7 +50,7 @@ loop:
|
|||
addeq r0, r0, r2
|
||||
stmeqia r0, {r7, r10}
|
||||
subeq r0, r0, r2
|
||||
add r0, r0, #8
|
||||
add r0, r0, #8
|
||||
subs r1, r1, #1
|
||||
bne loop
|
||||
pop {pc}
|
||||
|
@ -100,7 +100,7 @@ loop_8bpp:
|
|||
addeq r0, r0, r2
|
||||
stmeqia r0, {r5, r6, r7, r10}
|
||||
subeq r0, r0, r2
|
||||
add r0, r0, #16
|
||||
add r0, r0, #16
|
||||
subs r1, r1, #1
|
||||
bne loop_8bpp
|
||||
pop {pc}
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
//
|
||||
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
|
||||
|
||||
|
||||
b preload_capture_line_fast_sixbits
|
||||
capture_line_fast_sixbits_4bpp:
|
||||
push {lr}
|
||||
|
@ -30,26 +29,25 @@ capture_line_fast_sixbits_4bpp:
|
|||
eorne r11, r11, #0x50 //magenta in leftmost
|
||||
eorne r11, r11, #0x02000000 //green in rightmost
|
||||
mov r1, r1, lsr #1
|
||||
SKIP_PSYNC_FAST
|
||||
SKIP_PSYNC_VERY_FAST
|
||||
loop:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r7, r10, r11
|
||||
MOV r7, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r10, r10, r11
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
stmia r0!, {r7, r10}
|
||||
subs r1, r1, #1
|
||||
bne loop
|
||||
|
@ -75,33 +73,32 @@ capture_line_fast_sixbits_8bpp:
|
|||
eorne r11, r11, #0x05 //magenta in leftmost
|
||||
eorne r12, r12, #0x02000000 //green in rightmost
|
||||
mov r1, r1, lsr #1
|
||||
SKIP_PSYNC_FAST
|
||||
SKIP_PSYNC_VERY_FAST
|
||||
loop_8bpp:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r5, r10, r11
|
||||
mov r5, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r6, r10, r12
|
||||
mov r6, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r7, r10, r11
|
||||
mov r7, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r10, r10, r12
|
||||
stmia r0!, {r5, r6, r7, r10}
|
||||
subs r1, r1, #1
|
||||
bne loop_8bpp
|
||||
pop {pc}
|
||||
|
||||
|
||||
preload_capture_line_fast_sixbits_8bpp:
|
||||
SETUP_DUMMY_PARAMETERS
|
||||
b capture_line_fast_sixbits_8bpp
|
||||
|
|
|
@ -11,15 +11,18 @@
|
|||
.global sentinel
|
||||
.global paletteFlags
|
||||
.global inBandPointer
|
||||
.global paletteHighNibble
|
||||
|
||||
// The capture line function is provided the following:
|
||||
// r0 = pointer to current line in frame buffer
|
||||
// r1 = number of 8-pixel blocks to capture (=param_chars_per_line)
|
||||
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
|
||||
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
|
||||
// r3 = flags register
|
||||
// r4 = GPLEV0 constant
|
||||
// r5 = frame buffer height (=param_fb_height)
|
||||
// r5 = line number count down to 0
|
||||
// r6 = scan line count modulo 10
|
||||
// r7 = number of psyncs to skip
|
||||
// r8 = frame buffer height (=param_fb_height)
|
||||
//
|
||||
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
|
||||
|
||||
|
@ -27,6 +30,9 @@
|
|||
inBandData:
|
||||
.space 32768, 0
|
||||
|
||||
paletteHighNibble:
|
||||
.space 1024, 0
|
||||
|
||||
b preload_capture_line_inband_4bpp
|
||||
capture_line_inband_4bpp:
|
||||
push {lr}
|
||||
|
@ -179,15 +185,22 @@ inBandPointer:
|
|||
b preload_capture_line_inband_8bpp
|
||||
capture_line_inband_8bpp:
|
||||
push {lr}
|
||||
ldr r11, inBandPointer
|
||||
adrl r9, paletteHighNibble
|
||||
subs r5, r5, #VERTICAL_OFFSET //r5 = line number count down to 0
|
||||
movmi r5, #0
|
||||
cmp r5, #0x100
|
||||
movge r5, #0xff
|
||||
rsb r5, r5, #0xff
|
||||
ldrb r5, [r9, r5]
|
||||
ldr r8, paletteFlags
|
||||
mov r6, #0
|
||||
tst r8, #BIT_MULTI_PALETTE
|
||||
bic r3, #MASKDUP_PALETTE_HIGH_NIBBLE
|
||||
orrne r3, r3, r5, lsl #OFFSETDUP_PALETTE_HIGH_NIBBLE
|
||||
tst r8, #BIT_SET_MODE2_16COLOUR
|
||||
orrne r3, r3, #BITDUP_MODE2_16COLOUR
|
||||
biceq r3, r3, #BITDUP_MODE2_16COLOUR
|
||||
mov r8, r8, lsr #28
|
||||
bic r3, #MASKDUP_PALETTE_HIGH_NIBBLE
|
||||
orr r3, r3, r8, lsl #OFFSETDUP_PALETTE_HIGH_NIBBLE
|
||||
mov r6, #0
|
||||
ldr r11, inBandPointer
|
||||
ldr r12, sentinel // 32 bit sentinel
|
||||
SKIP_PSYNC
|
||||
mov r7, #0
|
||||
|
|
|
@ -9,12 +9,14 @@
|
|||
|
||||
// The capture line function is provided the following:
|
||||
// r0 = pointer to current line in frame buffer
|
||||
// r1 = number of 8-pixel blocks to capture (=param_chars_per_line)
|
||||
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
|
||||
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
|
||||
// r3 = flags register
|
||||
// r4 = GPLEV0 constant
|
||||
// r5 = frame buffer height (=param_fb_height)
|
||||
// r5 = line number count down to 0
|
||||
// r6 = scan line count modulo 10
|
||||
// r7 = number of psyncs to skip
|
||||
// r8 = frame buffer height (=param_fb_height)
|
||||
//
|
||||
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
|
||||
|
||||
|
@ -29,16 +31,16 @@ capture_line_mode7_4bpp:
|
|||
tst r3, #BIT_CALIBRATE
|
||||
bne process_chars_7_none
|
||||
|
||||
ands r8, r3, #MASK_INTERLACE
|
||||
ands r5, r3, #MASK_INTERLACE
|
||||
beq process_chars_7_none // DEINTERLACE_NONE
|
||||
|
||||
mov r9, r8, lsr #OFFSET_INTERLACE // put interlace setting in R9 0-6
|
||||
mov r9, r5, lsr #OFFSET_INTERLACE // put interlace setting in R9 0-6
|
||||
|
||||
cmp r9, #1 //DEINTERLACE_BOB
|
||||
beq process_chars_7_bob
|
||||
|
||||
tst r3, #BIT_FIELD_TYPE // test odd or even field
|
||||
mla r11, r5, r2, r0 // offset to second buffer used for comparison not for display
|
||||
mla r11, r8, r2, r0 // offset to second buffer used for comparison not for display
|
||||
// now absolute address of pixel group in comparison buffer
|
||||
rsbeq r2, r2,#0 // negate R2 offset if odd field to write to line above (restored to original value on exit)
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
//
|
||||
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
|
||||
|
||||
|
||||
b preload_capture_line_sixbits
|
||||
capture_line_sixbits_4bpp:
|
||||
push {lr}
|
||||
|
@ -33,23 +32,22 @@ capture_line_sixbits_4bpp:
|
|||
SKIP_PSYNC_FAST
|
||||
loop:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r7, r10, r11
|
||||
MOV r7, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r10, r10, r11
|
||||
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
|
||||
stmia r0, {r7, r10}
|
||||
tst r3, #BIT_SCANLINES
|
||||
movne r7, #0
|
||||
|
@ -58,7 +56,7 @@ loop:
|
|||
addeq r0, r0, r2
|
||||
stmeqia r0, {r7, r10}
|
||||
subeq r0, r0, r2
|
||||
add r0, r0, #8
|
||||
add r0, r0, #8
|
||||
subs r1, r1, #1
|
||||
bne loop
|
||||
pop {pc}
|
||||
|
@ -71,7 +69,6 @@ preload_capture_line_sixbits:
|
|||
|
||||
// *** 8 bit ***
|
||||
|
||||
|
||||
b preload_capture_line_sixbits_8bpp
|
||||
capture_line_sixbits_8bpp:
|
||||
push {lr}
|
||||
|
@ -87,25 +84,24 @@ capture_line_sixbits_8bpp:
|
|||
SKIP_PSYNC_FAST
|
||||
loop_8bpp:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r5, r10, r11
|
||||
mov r5, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r6, r10, r12
|
||||
mov r6, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r7, r10, r11
|
||||
mov r7, r10
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
|
||||
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
|
||||
eor r10, r10, r12
|
||||
stmia r0, {r5, r6, r7, r10}
|
||||
tst r3, #BIT_SCANLINES
|
||||
movne r5, #0
|
||||
|
@ -116,11 +112,11 @@ loop_8bpp:
|
|||
addeq r0, r0, r2
|
||||
stmeqia r0, {r5, r6, r7, r10}
|
||||
subeq r0, r0, r2
|
||||
add r0, r0, #16
|
||||
add r0, r0, #16
|
||||
subs r1, r1, #1
|
||||
bne loop_8bpp
|
||||
pop {pc}
|
||||
|
||||
|
||||
preload_capture_line_sixbits_8bpp:
|
||||
SETUP_DUMMY_PARAMETERS
|
||||
b capture_line_sixbits_8bpp
|
||||
|
|
129
src/macros.S
129
src/macros.S
|
@ -1,3 +1,28 @@
|
|||
.macro WAIT_FOR_CSYNC_0
|
||||
waitlo\@:
|
||||
// Read the GPLEV0
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
bne waitlo\@
|
||||
// Check again in case of noise
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
bne waitlo\@
|
||||
.endm
|
||||
|
||||
.macro WAIT_FOR_CSYNC_1
|
||||
waithi\@:
|
||||
// Read the GPLEV0
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
beq waithi\@
|
||||
// Check again in case of noise
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
beq waithi\@
|
||||
.endm
|
||||
|
||||
|
||||
// Wait for the next edge on psync
|
||||
// if r3 bit 17 = 0 - wait for falling edge
|
||||
// if r3 bit 17 = 1 - wait for rising edge
|
||||
|
@ -51,23 +76,23 @@ waitPF\@:
|
|||
// - a "normal" hsync is 4.0us, increment h_offset by 1
|
||||
// - a "long" hsync is 4.5us, increment h_offset by 2
|
||||
// So test against two thresholds inbetween these values
|
||||
|
||||
|
||||
// old CPLD V1 & V2 code
|
||||
mov r8, r7
|
||||
cmp r10, #(4000 + 224)
|
||||
addgt r8, r8, #1
|
||||
cmp r10, #(4000 - 224)
|
||||
addgt r8, r8, #1
|
||||
|
||||
|
||||
// new CPLD V3 or later code
|
||||
cmp r10, #(4000 + 224)
|
||||
addlt r7, r7, #1
|
||||
cmp r10, #(4000 - 224)
|
||||
addlt r7, r7, #1
|
||||
|
||||
|
||||
tst r3, #BIT_OLD_CPLDV1V2
|
||||
movne r7, r8
|
||||
|
||||
|
||||
// Skip the configured number of psync edges (modes 0..6: edges every 250ns, mode 7: edges ever 333ns)
|
||||
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
|
||||
cmp r7, #0
|
||||
|
@ -82,36 +107,56 @@ skip_psync_loop_exit\@:
|
|||
.macro SKIP_PSYNC_FAST
|
||||
// Wait for the start of hsync
|
||||
WAIT_FOR_CSYNC_0
|
||||
READ_CYCLE_COUNTER r10
|
||||
// Wait for the end of hsync
|
||||
WAIT_FOR_CSYNC_1
|
||||
READ_CYCLE_COUNTER r9
|
||||
// Calculate length of low hsync pulse (in ARM cycles = ns)
|
||||
subs r10, r9, r10
|
||||
rsbmi r10, r10, #0
|
||||
// Calculate length of low hsync pulse (in ARM cycles = ns)
|
||||
// Start with the configured horizontal offset
|
||||
// Implement half character horizontal scrolling:
|
||||
// - a "short" hsync is 3.5us, leave h_offset as-is
|
||||
// - a "normal" hsync is 4.0us, increment h_offset by 1
|
||||
// - a "long" hsync is 4.5us, increment h_offset by 2
|
||||
// So test against two thresholds inbetween these values
|
||||
|
||||
|
||||
// new CPLD code only in FAST version (not called from CPLD v1 & v2)
|
||||
cmp r10, #(4000 + 224)
|
||||
addlt r7, r7, #1
|
||||
cmp r10, #(4000 - 224)
|
||||
addlt r7, r7, #1
|
||||
// Skip the configured number of psync edges (modes 0..6: edges every 250ns, mode 7: edges ever 333ns)
|
||||
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
|
||||
cmp r7, #0
|
||||
beq skip_psync_loop_fast_exit\@
|
||||
skip_psync_loop_fast\@:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
|
||||
subs r7, r7, #1
|
||||
bne skip_psync_loop_fast\@
|
||||
skip_psync_loop_fast_exit\@:
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_0_BITS_WIDE
|
||||
.macro SKIP_PSYNC_VERY_FAST
|
||||
WAIT_FOR_CSYNC_1 //note test is reversed
|
||||
WAIT_FOR_CSYNC_0
|
||||
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
|
||||
add r7, r7, #1
|
||||
skip_psync_loop_very_fast\@:
|
||||
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
|
||||
subs r7, r7, #1
|
||||
bne skip_psync_loop_very_fast\@
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_0_BITS_WIDE_R11
|
||||
// Pixel 0 in GPIO 7.. 2 -> 7.. 4
|
||||
// Pixel 1 in GPIO 13.. 8 -> 3.. 0
|
||||
|
||||
and r10, r8, #(0x0f << PIXEL_BASE)
|
||||
and r9, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
mov r10, r10, lsl #(4 - PIXEL_BASE)
|
||||
orr r10, r10, r9, lsr #(6 + PIXEL_BASE)
|
||||
and r9, r8, #(0x0f << PIXEL_BASE)
|
||||
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
eor r10, r11, r9, lsl #(4 - PIXEL_BASE)
|
||||
eor r10, r10, r14, lsr #(6 + PIXEL_BASE)
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_1_BITS_WIDE
|
||||
|
@ -119,9 +164,9 @@ skip_psync_loop_fast\@:
|
|||
// Pixel 1 in GPIO 13.. 8 -> 11.. 8
|
||||
|
||||
and r9, r8, #(0x0f << (PIXEL_BASE))
|
||||
and r8, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
orr r10, r10, r9, lsl #(12 - PIXEL_BASE)
|
||||
orr r10, r10, r8, lsr #(2 - PIXEL_BASE)
|
||||
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
eor r10, r10, r9, lsl #(12 - PIXEL_BASE)
|
||||
eor r10, r10, r14, lsr #(2 - PIXEL_BASE)
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_2_BITS_WIDE
|
||||
|
@ -129,17 +174,17 @@ skip_psync_loop_fast\@:
|
|||
// Pixel 5 in GPIO 13.. 8 -> 19..16
|
||||
and r9, r8, #(0x0f << PIXEL_BASE)
|
||||
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
orr r10, r10, r9, lsl #(20 - PIXEL_BASE)
|
||||
orr r10, r10, r14, lsl #(10 - PIXEL_BASE)
|
||||
eor r10, r10, r9, lsl #(20 - PIXEL_BASE)
|
||||
eor r10, r10, r14, lsl #(10 - PIXEL_BASE)
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_3_BITS_WIDE
|
||||
// Pixel 6 in GPIO 7.. 2 -> 31..28
|
||||
// Pixel 7 in GPIO 13..8 -> 27..24
|
||||
and r9, r8, #(0x0f << PIXEL_BASE)
|
||||
and r8, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
orr r10, r10, r9, lsl #(28 - PIXEL_BASE)
|
||||
orr r10, r10, r8, lsl #(18 - PIXEL_BASE)
|
||||
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
|
||||
eor r10, r10, r9, lsl #(28 - PIXEL_BASE)
|
||||
eor r10, r10, r14, lsl #(18 - PIXEL_BASE)
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_LOW_BITS
|
||||
|
@ -176,14 +221,24 @@ skip_psync_loop_fast\@:
|
|||
orr r10, r10, r8, lsl #(15 - PIXEL_BASE)
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_LOW_BITS_8BPP_WIDE
|
||||
.macro CAPTURE_LOW_BITS_8BPP_WIDE_R11
|
||||
// Pixel 0 in GPIO 7.. 2 -> 7.. 0
|
||||
// Pixel 1 in GPIO 13.. 8 -> 15.. 8
|
||||
|
||||
and r10, r8, #(0x3f << PIXEL_BASE)
|
||||
and r9, r8, #(0x3f << (PIXEL_BASE + 6))
|
||||
mov r10, r10, lsr #(PIXEL_BASE)
|
||||
orr r10, r10, r9, lsl #(8 - (PIXEL_BASE + 6))
|
||||
and r9, r8, #(0x3f << PIXEL_BASE)
|
||||
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
|
||||
eor r10, r11, r9, lsr #(PIXEL_BASE)
|
||||
eor r10, r10, r14, lsl #(8 - (PIXEL_BASE + 6))
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_LOW_BITS_8BPP_WIDE_R12
|
||||
// Pixel 0 in GPIO 7.. 2 -> 7.. 0
|
||||
// Pixel 1 in GPIO 13.. 8 -> 15.. 8
|
||||
|
||||
and r9, r8, #(0x3f << PIXEL_BASE)
|
||||
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
|
||||
eor r10, r12, r9, lsr #(PIXEL_BASE)
|
||||
eor r10, r10, r14, lsl #(8 - (PIXEL_BASE + 6))
|
||||
.endm
|
||||
|
||||
.macro CAPTURE_HIGH_BITS_8BPP_WIDE
|
||||
|
@ -192,8 +247,8 @@ skip_psync_loop_fast\@:
|
|||
|
||||
and r9, r8, #(0x3f << PIXEL_BASE)
|
||||
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
|
||||
orr r10, r10, r9, lsl #(16 - PIXEL_BASE)
|
||||
orr r10, r10, r14, lsl #(24 - (PIXEL_BASE + 6))
|
||||
eor r10, r10, r9, lsl #(16 - PIXEL_BASE)
|
||||
eor r10, r10, r14, lsl #(24 - (PIXEL_BASE + 6))
|
||||
.endm
|
||||
|
||||
|
||||
|
@ -584,30 +639,6 @@ noflip\@:
|
|||
.endm
|
||||
#endif
|
||||
|
||||
.macro WAIT_FOR_CSYNC_0
|
||||
waitlo\@:
|
||||
// Read the GPLEV0
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
bne waitlo\@
|
||||
// Check again in case of noise
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
bne waitlo\@
|
||||
.endm
|
||||
|
||||
.macro WAIT_FOR_CSYNC_1
|
||||
waithi\@:
|
||||
// Read the GPLEV0
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
beq waithi\@
|
||||
// Check again in case of noise
|
||||
ldr r8, [r4]
|
||||
tst r8, #CSYNC_MASK
|
||||
beq waithi\@
|
||||
.endm
|
||||
|
||||
.macro KEY_PRESS_DETECT mask, ret, counter
|
||||
ldr r5, \counter // Load the counter value
|
||||
tst r8, #\mask // Is the button pressed (active low)?
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
.global default_vsync_line
|
||||
.global lock_fail
|
||||
.global customPalette
|
||||
.global paletteHighNibble
|
||||
.global dummyscreen
|
||||
.global hsync_width
|
||||
|
||||
|
@ -142,7 +141,7 @@ skip_swap:
|
|||
ands r7, #1
|
||||
bicne r3, r3, #BIT_NO_LINE_DOUBLE
|
||||
orreq r3, r3, #BIT_NO_LINE_DOUBLE
|
||||
|
||||
|
||||
ldr r8, param_h_offset
|
||||
ldr r9, param_sample_width
|
||||
ands r9, r9, #1
|
||||
|
@ -152,24 +151,24 @@ skip_swap:
|
|||
|
||||
ldr r6, param_cpld_version
|
||||
mov r6, r6, lsr #4 //VERSION_MAJOR_BIT
|
||||
cmp r6, #3
|
||||
cmp r6, #3
|
||||
// If version < 3 do the second PSYNC read
|
||||
// (maintains backwards compatibility with CPLDv1 and CPLDv2)
|
||||
orrlt r3, r3, #BIT_OLD_CPLDV1V2
|
||||
eor r7, r7, #1
|
||||
mov r7, r7, lsl #1
|
||||
//r7 0 =double height (use default), 2 = single height (use fast)
|
||||
//r7 0 =double height (use default), 2 = single height (use fast)
|
||||
// force use of default with CPLDv1 & CPLDv2 so fast version can use WAIT_FOR_PSYNC_EDGE_FAST
|
||||
movlt r7, #0
|
||||
|
||||
ldr r8, param_palette_control
|
||||
cmp r8, #1 //in band control
|
||||
|
||||
ldr r8, param_palette_control
|
||||
cmp r8, #1 //in band control
|
||||
moveq r7, #4
|
||||
cmp r8, #2 //ntsc artifact
|
||||
cmp r8, #2 //ntsc artifact
|
||||
moveq r7, #6
|
||||
//r9 0= 3bpp 1=6bpp
|
||||
add r7, r7, r9
|
||||
ldr r9, param_capture_line
|
||||
add r7, r7, r9
|
||||
ldr r9, param_capture_line
|
||||
ldr r8, [r9, r7, lsl #2]
|
||||
str r8, capture_address
|
||||
|
||||
|
@ -179,11 +178,6 @@ skip_swap:
|
|||
|
||||
|
||||
frame:
|
||||
|
||||
bl wait_for_vsync
|
||||
ldr r0, default_vsync_line
|
||||
str r0, vsync_line // default for vsync line if vsync in blanking area
|
||||
|
||||
ldr r8, =inBandPointer
|
||||
ldr r9, =inBandData
|
||||
str r9, [r8]
|
||||
|
@ -193,6 +187,9 @@ frame:
|
|||
bic r9, r9, #BIT_IN_BAND_DETECTED //in band data detected
|
||||
str r9, [r8]
|
||||
|
||||
bl wait_for_vsync
|
||||
ldr r0, default_vsync_line
|
||||
str r0, vsync_line // default for vsync line if vsync in blanking area
|
||||
|
||||
// Working registers while frame is being captured
|
||||
//
|
||||
|
@ -370,32 +367,16 @@ process_line_loop:
|
|||
// Preserve the state used by the outer code
|
||||
push {r1-r5, r11}
|
||||
|
||||
ldr r5, param_fb_height
|
||||
adrl r0, paletteHighNibble
|
||||
ldr r6, param_nlines
|
||||
sub r6, r6, r5
|
||||
subs r6, r6, #VERTICAL_OFFSET
|
||||
movmi r6, #0
|
||||
cmp r6, #0x100
|
||||
movge r6, #0xff
|
||||
ldrb r8, [r0, r6]
|
||||
|
||||
ldr r6, =paletteFlags
|
||||
ldr r9, [r6]
|
||||
bic r9, r9, #0xf0000000
|
||||
tst r9, #BIT_MULTI_PALETTE
|
||||
orrne r9, r9, r8, lsl#28
|
||||
str r9, [r6]
|
||||
|
||||
// The capture line function is provided the following:
|
||||
// r0 = pointer to current line in frame buffer
|
||||
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
|
||||
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
|
||||
// r3 = flags register
|
||||
// r4 = GPLEV0 constant
|
||||
// r5 = frame buffer height (=param_fb_height)
|
||||
// r5 = line number count down to 0
|
||||
// r6 = scan line count modulo 10
|
||||
// r7 = number of psyncs to skip
|
||||
// r8 = frame buffer height (=param_fb_height)
|
||||
//
|
||||
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
|
||||
|
||||
|
@ -403,6 +384,7 @@ process_line_loop:
|
|||
mov r0, r11
|
||||
ldr r6, linecountmod10
|
||||
ldr r7, param_h_offset
|
||||
ldr r8, param_fb_height
|
||||
// Load the address of the capture_line function into r12
|
||||
ldr r12, capture_address
|
||||
// Call capture line function
|
||||
|
@ -489,7 +471,7 @@ noInBandData:
|
|||
mov r1, r2 // bytes per line
|
||||
bl osd_update_fast
|
||||
pop {r1-r5, r11}
|
||||
bic r3, #BIT_FIELD_TYPE1_VALID // *** temp workaround disables interlace change detection after osd update in case it takes longer than frame
|
||||
// bic r3, #BIT_FIELD_TYPE1_VALID // *** temp workaround disables interlace change detection after osd update in case it takes longer than frame
|
||||
skip_osd_update:
|
||||
|
||||
|
||||
|
@ -574,7 +556,7 @@ analyse_loop:
|
|||
READ_CYCLE_COUNTER r11
|
||||
subs r12, r10, r11
|
||||
rsbmi r12, r12, #0
|
||||
cmp r12, #8388608<<3 // 32ms = over a frame / field @ 50Hz
|
||||
cmp r12, #8388608 // ~8ms
|
||||
blt analyse_loop
|
||||
cmp r6, r7 // is low time > high time
|
||||
movgt r0, #1 // inverted means positive going
|
||||
|
@ -815,10 +797,10 @@ param_palette_control:
|
|||
|
||||
param_sample_width:
|
||||
.word 0
|
||||
|
||||
|
||||
param_cpld_version:
|
||||
.word 0
|
||||
|
||||
|
||||
capture_address:
|
||||
.word 0
|
||||
|
||||
|
@ -836,15 +818,12 @@ lock_fail:
|
|||
|
||||
hsync_width:
|
||||
.word 8000
|
||||
|
||||
|
||||
.ltorg
|
||||
|
||||
customPalette:
|
||||
.space 1024, 0
|
||||
|
||||
paletteHighNibble:
|
||||
.space 1024, 0
|
||||
|
||||
dummyscreen: // used by capture preload
|
||||
.space 8192, 0
|
||||
|
||||
|
@ -858,7 +837,7 @@ capture_line_mode7_4bpp_table:
|
|||
.word capture_line_mode7_4bpp
|
||||
.word capture_line_mode7_4bpp
|
||||
.word capture_line_mode7_4bpp
|
||||
|
||||
|
||||
capture_line_normal_4bpp_table:
|
||||
.word capture_line_default_4bpp
|
||||
.word capture_line_sixbits_4bpp
|
||||
|
@ -933,7 +912,7 @@ capture_line_half_even_8bpp_table:
|
|||
.word capture_line_sixbits_8bpp // placeholder for in band six bits
|
||||
.word capture_line_half_8bpp // placeholder for ntsc artifacting
|
||||
.word capture_line_sixbits_8bpp // placeholder for ntsc artifacting
|
||||
|
||||
|
||||
capture_line_double_8bpp_table:
|
||||
.word capture_line_double_8bpp
|
||||
.word capture_line_sixbits_8bpp // placeholder for six bits
|
||||
|
@ -943,7 +922,7 @@ capture_line_double_8bpp_table:
|
|||
.word capture_line_sixbits_8bpp // placeholder for in band six bits
|
||||
.word capture_line_double_8bpp // placeholder for ntsc artifacting
|
||||
.word capture_line_sixbits_8bpp // placeholder for ntsc artifacting
|
||||
|
||||
|
||||
capture_line_atom_4bpp_table:
|
||||
.word capture_line_atom_4bpp;
|
||||
.word capture_line_atom_4bpp;
|
||||
|
@ -963,5 +942,4 @@ capture_line_atom_8bpp_table:
|
|||
.word capture_line_atom_8bpp;
|
||||
.word capture_line_atom_8bpp;
|
||||
.word capture_line_atom_8bpp;
|
||||
|
||||
|
||||
|
||||
|
|
Ładowanie…
Reference in New Issue