Further speed optimisations for fastest capture loops

pull/58/head
IanSB 2019-03-17 05:44:25 +00:00
rodzic 68c49f461c
commit 503b904297
7 zmienionych plików z 161 dodań i 144 usunięć

Wyświetl plik

@ -50,7 +50,7 @@ loop:
addeq r0, r0, r2
stmeqia r0, {r7, r10}
subeq r0, r0, r2
add r0, r0, #8
add r0, r0, #8
subs r1, r1, #1
bne loop
pop {pc}
@ -100,7 +100,7 @@ loop_8bpp:
addeq r0, r0, r2
stmeqia r0, {r5, r6, r7, r10}
subeq r0, r0, r2
add r0, r0, #16
add r0, r0, #16
subs r1, r1, #1
bne loop_8bpp
pop {pc}

Wyświetl plik

@ -19,7 +19,6 @@
//
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
b preload_capture_line_fast_sixbits
capture_line_fast_sixbits_4bpp:
push {lr}
@ -30,26 +29,25 @@ capture_line_fast_sixbits_4bpp:
eorne r11, r11, #0x50 //magenta in leftmost
eorne r11, r11, #0x02000000 //green in rightmost
mov r1, r1, lsr #1
SKIP_PSYNC_FAST
SKIP_PSYNC_VERY_FAST
loop:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
eor r7, r10, r11
MOV r7, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
eor r10, r10, r11
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
stmia r0!, {r7, r10}
subs r1, r1, #1
bne loop
@ -75,33 +73,32 @@ capture_line_fast_sixbits_8bpp:
eorne r11, r11, #0x05 //magenta in leftmost
eorne r12, r12, #0x02000000 //green in rightmost
mov r1, r1, lsr #1
SKIP_PSYNC_FAST
SKIP_PSYNC_VERY_FAST
loop_8bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r5, r10, r11
mov r5, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r6, r10, r12
mov r6, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r7, r10, r11
mov r7, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r10, r10, r12
stmia r0!, {r5, r6, r7, r10}
subs r1, r1, #1
bne loop_8bpp
pop {pc}
preload_capture_line_fast_sixbits_8bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_sixbits_8bpp

Wyświetl plik

@ -11,15 +11,18 @@
.global sentinel
.global paletteFlags
.global inBandPointer
.global paletteHighNibble
// The capture line function is provided the following:
// r0 = pointer to current line in frame buffer
// r1 = number of 8-pixel blocks to capture (=param_chars_per_line)
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
// r3 = flags register
// r4 = GPLEV0 constant
// r5 = frame buffer height (=param_fb_height)
// r5 = line number count down to 0
// r6 = scan line count modulo 10
// r7 = number of psyncs to skip
// r8 = frame buffer height (=param_fb_height)
//
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
@ -27,6 +30,9 @@
inBandData:
.space 32768, 0
paletteHighNibble:
.space 1024, 0
b preload_capture_line_inband_4bpp
capture_line_inband_4bpp:
push {lr}
@ -179,15 +185,22 @@ inBandPointer:
b preload_capture_line_inband_8bpp
capture_line_inband_8bpp:
push {lr}
ldr r11, inBandPointer
adrl r9, paletteHighNibble
subs r5, r5, #VERTICAL_OFFSET //r5 = line number count down to 0
movmi r5, #0
cmp r5, #0x100
movge r5, #0xff
rsb r5, r5, #0xff
ldrb r5, [r9, r5]
ldr r8, paletteFlags
mov r6, #0
tst r8, #BIT_MULTI_PALETTE
bic r3, #MASKDUP_PALETTE_HIGH_NIBBLE
orrne r3, r3, r5, lsl #OFFSETDUP_PALETTE_HIGH_NIBBLE
tst r8, #BIT_SET_MODE2_16COLOUR
orrne r3, r3, #BITDUP_MODE2_16COLOUR
biceq r3, r3, #BITDUP_MODE2_16COLOUR
mov r8, r8, lsr #28
bic r3, #MASKDUP_PALETTE_HIGH_NIBBLE
orr r3, r3, r8, lsl #OFFSETDUP_PALETTE_HIGH_NIBBLE
mov r6, #0
ldr r11, inBandPointer
ldr r12, sentinel // 32 bit sentinel
SKIP_PSYNC
mov r7, #0

Wyświetl plik

@ -9,12 +9,14 @@
// The capture line function is provided the following:
// r0 = pointer to current line in frame buffer
// r1 = number of 8-pixel blocks to capture (=param_chars_per_line)
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
// r3 = flags register
// r4 = GPLEV0 constant
// r5 = frame buffer height (=param_fb_height)
// r5 = line number count down to 0
// r6 = scan line count modulo 10
// r7 = number of psyncs to skip
// r8 = frame buffer height (=param_fb_height)
//
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
@ -29,16 +31,16 @@ capture_line_mode7_4bpp:
tst r3, #BIT_CALIBRATE
bne process_chars_7_none
ands r8, r3, #MASK_INTERLACE
ands r5, r3, #MASK_INTERLACE
beq process_chars_7_none // DEINTERLACE_NONE
mov r9, r8, lsr #OFFSET_INTERLACE // put interlace setting in R9 0-6
mov r9, r5, lsr #OFFSET_INTERLACE // put interlace setting in R9 0-6
cmp r9, #1 //DEINTERLACE_BOB
beq process_chars_7_bob
tst r3, #BIT_FIELD_TYPE // test odd or even field
mla r11, r5, r2, r0 // offset to second buffer used for comparison not for display
mla r11, r8, r2, r0 // offset to second buffer used for comparison not for display
// now absolute address of pixel group in comparison buffer
rsbeq r2, r2,#0 // negate R2 offset if odd field to write to line above (restored to original value on exit)

Wyświetl plik

@ -19,7 +19,6 @@
//
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
b preload_capture_line_sixbits
capture_line_sixbits_4bpp:
push {lr}
@ -33,23 +32,22 @@ capture_line_sixbits_4bpp:
SKIP_PSYNC_FAST
loop:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
eor r7, r10, r11
MOV r7, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_0_BITS_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_0_BITS_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_1_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_2_BITS_WIDE // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
eor r10, r10, r11
CAPTURE_3_BITS_WIDE // input in r8, result in r10, corrupts r9
stmia r0, {r7, r10}
tst r3, #BIT_SCANLINES
movne r7, #0
@ -58,7 +56,7 @@ loop:
addeq r0, r0, r2
stmeqia r0, {r7, r10}
subeq r0, r0, r2
add r0, r0, #8
add r0, r0, #8
subs r1, r1, #1
bne loop
pop {pc}
@ -71,7 +69,6 @@ preload_capture_line_sixbits:
// *** 8 bit ***
b preload_capture_line_sixbits_8bpp
capture_line_sixbits_8bpp:
push {lr}
@ -87,25 +84,24 @@ capture_line_sixbits_8bpp:
SKIP_PSYNC_FAST
loop_8bpp:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r5, r10, r11
mov r5, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r6, r10, r12
mov r6, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R11 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r7, r10, r11
mov r7, r10
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_LOW_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
CAPTURE_LOW_BITS_8BPP_WIDE_R12 // input in r8, result in r10, corrupts r9
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_HIGH_BITS_8BPP_WIDE // input in r8, result in r10, corrupts r9
eor r10, r10, r12
stmia r0, {r5, r6, r7, r10}
tst r3, #BIT_SCANLINES
movne r5, #0
@ -116,11 +112,11 @@ loop_8bpp:
addeq r0, r0, r2
stmeqia r0, {r5, r6, r7, r10}
subeq r0, r0, r2
add r0, r0, #16
add r0, r0, #16
subs r1, r1, #1
bne loop_8bpp
pop {pc}
preload_capture_line_sixbits_8bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_sixbits_8bpp

Wyświetl plik

@ -1,3 +1,28 @@
.macro WAIT_FOR_CSYNC_0
waitlo\@:
// Read the GPLEV0
ldr r8, [r4]
tst r8, #CSYNC_MASK
bne waitlo\@
// Check again in case of noise
ldr r8, [r4]
tst r8, #CSYNC_MASK
bne waitlo\@
.endm
.macro WAIT_FOR_CSYNC_1
waithi\@:
// Read the GPLEV0
ldr r8, [r4]
tst r8, #CSYNC_MASK
beq waithi\@
// Check again in case of noise
ldr r8, [r4]
tst r8, #CSYNC_MASK
beq waithi\@
.endm
// Wait for the next edge on psync
// if r3 bit 17 = 0 - wait for falling edge
// if r3 bit 17 = 1 - wait for rising edge
@ -51,23 +76,23 @@ waitPF\@:
// - a "normal" hsync is 4.0us, increment h_offset by 1
// - a "long" hsync is 4.5us, increment h_offset by 2
// So test against two thresholds inbetween these values
// old CPLD V1 & V2 code
mov r8, r7
cmp r10, #(4000 + 224)
addgt r8, r8, #1
cmp r10, #(4000 - 224)
addgt r8, r8, #1
// new CPLD V3 or later code
cmp r10, #(4000 + 224)
addlt r7, r7, #1
cmp r10, #(4000 - 224)
addlt r7, r7, #1
tst r3, #BIT_OLD_CPLDV1V2
movne r7, r8
// Skip the configured number of psync edges (modes 0..6: edges every 250ns, mode 7: edges ever 333ns)
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
cmp r7, #0
@ -82,36 +107,56 @@ skip_psync_loop_exit\@:
.macro SKIP_PSYNC_FAST
// Wait for the start of hsync
WAIT_FOR_CSYNC_0
READ_CYCLE_COUNTER r10
// Wait for the end of hsync
WAIT_FOR_CSYNC_1
READ_CYCLE_COUNTER r9
// Calculate length of low hsync pulse (in ARM cycles = ns)
subs r10, r9, r10
rsbmi r10, r10, #0
// Calculate length of low hsync pulse (in ARM cycles = ns)
// Start with the configured horizontal offset
// Implement half character horizontal scrolling:
// - a "short" hsync is 3.5us, leave h_offset as-is
// - a "normal" hsync is 4.0us, increment h_offset by 1
// - a "long" hsync is 4.5us, increment h_offset by 2
// So test against two thresholds inbetween these values
// new CPLD code only in FAST version (not called from CPLD v1 & v2)
cmp r10, #(4000 + 224)
addlt r7, r7, #1
cmp r10, #(4000 - 224)
addlt r7, r7, #1
// Skip the configured number of psync edges (modes 0..6: edges every 250ns, mode 7: edges ever 333ns)
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
cmp r7, #0
beq skip_psync_loop_fast_exit\@
skip_psync_loop_fast\@:
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
subs r7, r7, #1
bne skip_psync_loop_fast\@
skip_psync_loop_fast_exit\@:
.endm
.macro CAPTURE_0_BITS_WIDE
.macro SKIP_PSYNC_VERY_FAST
WAIT_FOR_CSYNC_1 //note test is reversed
WAIT_FOR_CSYNC_0
orr r3, r3, #PSYNC_MASK // first edge is a 0->1
add r7, r7, #1
skip_psync_loop_very_fast\@:
WAIT_FOR_PSYNC_EDGE_FAST // wait for next edge of psync
subs r7, r7, #1
bne skip_psync_loop_very_fast\@
.endm
.macro CAPTURE_0_BITS_WIDE_R11
// Pixel 0 in GPIO 7.. 2 -> 7.. 4
// Pixel 1 in GPIO 13.. 8 -> 3.. 0
and r10, r8, #(0x0f << PIXEL_BASE)
and r9, r8, #(0x0f << (PIXEL_BASE + 6))
mov r10, r10, lsl #(4 - PIXEL_BASE)
orr r10, r10, r9, lsr #(6 + PIXEL_BASE)
and r9, r8, #(0x0f << PIXEL_BASE)
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
eor r10, r11, r9, lsl #(4 - PIXEL_BASE)
eor r10, r10, r14, lsr #(6 + PIXEL_BASE)
.endm
.macro CAPTURE_1_BITS_WIDE
@ -119,9 +164,9 @@ skip_psync_loop_fast\@:
// Pixel 1 in GPIO 13.. 8 -> 11.. 8
and r9, r8, #(0x0f << (PIXEL_BASE))
and r8, r8, #(0x0f << (PIXEL_BASE + 6))
orr r10, r10, r9, lsl #(12 - PIXEL_BASE)
orr r10, r10, r8, lsr #(2 - PIXEL_BASE)
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
eor r10, r10, r9, lsl #(12 - PIXEL_BASE)
eor r10, r10, r14, lsr #(2 - PIXEL_BASE)
.endm
.macro CAPTURE_2_BITS_WIDE
@ -129,17 +174,17 @@ skip_psync_loop_fast\@:
// Pixel 5 in GPIO 13.. 8 -> 19..16
and r9, r8, #(0x0f << PIXEL_BASE)
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
orr r10, r10, r9, lsl #(20 - PIXEL_BASE)
orr r10, r10, r14, lsl #(10 - PIXEL_BASE)
eor r10, r10, r9, lsl #(20 - PIXEL_BASE)
eor r10, r10, r14, lsl #(10 - PIXEL_BASE)
.endm
.macro CAPTURE_3_BITS_WIDE
// Pixel 6 in GPIO 7.. 2 -> 31..28
// Pixel 7 in GPIO 13..8 -> 27..24
and r9, r8, #(0x0f << PIXEL_BASE)
and r8, r8, #(0x0f << (PIXEL_BASE + 6))
orr r10, r10, r9, lsl #(28 - PIXEL_BASE)
orr r10, r10, r8, lsl #(18 - PIXEL_BASE)
and r14, r8, #(0x0f << (PIXEL_BASE + 6))
eor r10, r10, r9, lsl #(28 - PIXEL_BASE)
eor r10, r10, r14, lsl #(18 - PIXEL_BASE)
.endm
.macro CAPTURE_LOW_BITS
@ -176,14 +221,24 @@ skip_psync_loop_fast\@:
orr r10, r10, r8, lsl #(15 - PIXEL_BASE)
.endm
.macro CAPTURE_LOW_BITS_8BPP_WIDE
.macro CAPTURE_LOW_BITS_8BPP_WIDE_R11
// Pixel 0 in GPIO 7.. 2 -> 7.. 0
// Pixel 1 in GPIO 13.. 8 -> 15.. 8
and r10, r8, #(0x3f << PIXEL_BASE)
and r9, r8, #(0x3f << (PIXEL_BASE + 6))
mov r10, r10, lsr #(PIXEL_BASE)
orr r10, r10, r9, lsl #(8 - (PIXEL_BASE + 6))
and r9, r8, #(0x3f << PIXEL_BASE)
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
eor r10, r11, r9, lsr #(PIXEL_BASE)
eor r10, r10, r14, lsl #(8 - (PIXEL_BASE + 6))
.endm
.macro CAPTURE_LOW_BITS_8BPP_WIDE_R12
// Pixel 0 in GPIO 7.. 2 -> 7.. 0
// Pixel 1 in GPIO 13.. 8 -> 15.. 8
and r9, r8, #(0x3f << PIXEL_BASE)
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
eor r10, r12, r9, lsr #(PIXEL_BASE)
eor r10, r10, r14, lsl #(8 - (PIXEL_BASE + 6))
.endm
.macro CAPTURE_HIGH_BITS_8BPP_WIDE
@ -192,8 +247,8 @@ skip_psync_loop_fast\@:
and r9, r8, #(0x3f << PIXEL_BASE)
and r14, r8, #(0x3f << (PIXEL_BASE + 6))
orr r10, r10, r9, lsl #(16 - PIXEL_BASE)
orr r10, r10, r14, lsl #(24 - (PIXEL_BASE + 6))
eor r10, r10, r9, lsl #(16 - PIXEL_BASE)
eor r10, r10, r14, lsl #(24 - (PIXEL_BASE + 6))
.endm
@ -584,30 +639,6 @@ noflip\@:
.endm
#endif
.macro WAIT_FOR_CSYNC_0
waitlo\@:
// Read the GPLEV0
ldr r8, [r4]
tst r8, #CSYNC_MASK
bne waitlo\@
// Check again in case of noise
ldr r8, [r4]
tst r8, #CSYNC_MASK
bne waitlo\@
.endm
.macro WAIT_FOR_CSYNC_1
waithi\@:
// Read the GPLEV0
ldr r8, [r4]
tst r8, #CSYNC_MASK
beq waithi\@
// Check again in case of noise
ldr r8, [r4]
tst r8, #CSYNC_MASK
beq waithi\@
.endm
.macro KEY_PRESS_DETECT mask, ret, counter
ldr r5, \counter // Load the counter value
tst r8, #\mask // Is the button pressed (active low)?

Wyświetl plik

@ -17,7 +17,6 @@
.global default_vsync_line
.global lock_fail
.global customPalette
.global paletteHighNibble
.global dummyscreen
.global hsync_width
@ -142,7 +141,7 @@ skip_swap:
ands r7, #1
bicne r3, r3, #BIT_NO_LINE_DOUBLE
orreq r3, r3, #BIT_NO_LINE_DOUBLE
ldr r8, param_h_offset
ldr r9, param_sample_width
ands r9, r9, #1
@ -152,24 +151,24 @@ skip_swap:
ldr r6, param_cpld_version
mov r6, r6, lsr #4 //VERSION_MAJOR_BIT
cmp r6, #3
cmp r6, #3
// If version < 3 do the second PSYNC read
// (maintains backwards compatibility with CPLDv1 and CPLDv2)
orrlt r3, r3, #BIT_OLD_CPLDV1V2
eor r7, r7, #1
mov r7, r7, lsl #1
//r7 0 =double height (use default), 2 = single height (use fast)
//r7 0 =double height (use default), 2 = single height (use fast)
// force use of default with CPLDv1 & CPLDv2 so fast version can use WAIT_FOR_PSYNC_EDGE_FAST
movlt r7, #0
ldr r8, param_palette_control
cmp r8, #1 //in band control
ldr r8, param_palette_control
cmp r8, #1 //in band control
moveq r7, #4
cmp r8, #2 //ntsc artifact
cmp r8, #2 //ntsc artifact
moveq r7, #6
//r9 0= 3bpp 1=6bpp
add r7, r7, r9
ldr r9, param_capture_line
add r7, r7, r9
ldr r9, param_capture_line
ldr r8, [r9, r7, lsl #2]
str r8, capture_address
@ -179,11 +178,6 @@ skip_swap:
frame:
bl wait_for_vsync
ldr r0, default_vsync_line
str r0, vsync_line // default for vsync line if vsync in blanking area
ldr r8, =inBandPointer
ldr r9, =inBandData
str r9, [r8]
@ -193,6 +187,9 @@ frame:
bic r9, r9, #BIT_IN_BAND_DETECTED //in band data detected
str r9, [r8]
bl wait_for_vsync
ldr r0, default_vsync_line
str r0, vsync_line // default for vsync line if vsync in blanking area
// Working registers while frame is being captured
//
@ -370,32 +367,16 @@ process_line_loop:
// Preserve the state used by the outer code
push {r1-r5, r11}
ldr r5, param_fb_height
adrl r0, paletteHighNibble
ldr r6, param_nlines
sub r6, r6, r5
subs r6, r6, #VERTICAL_OFFSET
movmi r6, #0
cmp r6, #0x100
movge r6, #0xff
ldrb r8, [r0, r6]
ldr r6, =paletteFlags
ldr r9, [r6]
bic r9, r9, #0xf0000000
tst r9, #BIT_MULTI_PALETTE
orrne r9, r9, r8, lsl#28
str r9, [r6]
// The capture line function is provided the following:
// r0 = pointer to current line in frame buffer
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
// r2 = frame buffer line pitch in bytes (=param_fb_pitch)
// r3 = flags register
// r4 = GPLEV0 constant
// r5 = frame buffer height (=param_fb_height)
// r5 = line number count down to 0
// r6 = scan line count modulo 10
// r7 = number of psyncs to skip
// r8 = frame buffer height (=param_fb_height)
//
// All registers are available as scratch registers (i.e. nothing needs to be preserved)
@ -403,6 +384,7 @@ process_line_loop:
mov r0, r11
ldr r6, linecountmod10
ldr r7, param_h_offset
ldr r8, param_fb_height
// Load the address of the capture_line function into r12
ldr r12, capture_address
// Call capture line function
@ -489,7 +471,7 @@ noInBandData:
mov r1, r2 // bytes per line
bl osd_update_fast
pop {r1-r5, r11}
bic r3, #BIT_FIELD_TYPE1_VALID // *** temp workaround disables interlace change detection after osd update in case it takes longer than frame
// bic r3, #BIT_FIELD_TYPE1_VALID // *** temp workaround disables interlace change detection after osd update in case it takes longer than frame
skip_osd_update:
@ -574,7 +556,7 @@ analyse_loop:
READ_CYCLE_COUNTER r11
subs r12, r10, r11
rsbmi r12, r12, #0
cmp r12, #8388608<<3 // 32ms = over a frame / field @ 50Hz
cmp r12, #8388608 // ~8ms
blt analyse_loop
cmp r6, r7 // is low time > high time
movgt r0, #1 // inverted means positive going
@ -815,10 +797,10 @@ param_palette_control:
param_sample_width:
.word 0
param_cpld_version:
.word 0
capture_address:
.word 0
@ -836,15 +818,12 @@ lock_fail:
hsync_width:
.word 8000
.ltorg
customPalette:
.space 1024, 0
paletteHighNibble:
.space 1024, 0
dummyscreen: // used by capture preload
.space 8192, 0
@ -858,7 +837,7 @@ capture_line_mode7_4bpp_table:
.word capture_line_mode7_4bpp
.word capture_line_mode7_4bpp
.word capture_line_mode7_4bpp
capture_line_normal_4bpp_table:
.word capture_line_default_4bpp
.word capture_line_sixbits_4bpp
@ -933,7 +912,7 @@ capture_line_half_even_8bpp_table:
.word capture_line_sixbits_8bpp // placeholder for in band six bits
.word capture_line_half_8bpp // placeholder for ntsc artifacting
.word capture_line_sixbits_8bpp // placeholder for ntsc artifacting
capture_line_double_8bpp_table:
.word capture_line_double_8bpp
.word capture_line_sixbits_8bpp // placeholder for six bits
@ -943,7 +922,7 @@ capture_line_double_8bpp_table:
.word capture_line_sixbits_8bpp // placeholder for in band six bits
.word capture_line_double_8bpp // placeholder for ntsc artifacting
.word capture_line_sixbits_8bpp // placeholder for ntsc artifacting
capture_line_atom_4bpp_table:
.word capture_line_atom_4bpp;
.word capture_line_atom_4bpp;
@ -963,5 +942,4 @@ capture_line_atom_8bpp_table:
.word capture_line_atom_8bpp;
.word capture_line_atom_8bpp;
.word capture_line_atom_8bpp;