First part of universal binary (Pi0,1,2,3) conversion

pull/256/head
IanSB 2021-11-21 17:49:31 +00:00
rodzic 05b649fe91
commit 60453dce62
9 zmienionych plików z 156 dodań i 135 usunięć

Wyświetl plik

@ -4,14 +4,15 @@
#include "rpi-interrupts.h"
//#include "tube-defs.h"
#include "startup.h"
#include "defs.h"
// From here: https://www.raspberrypi.org/forums/viewtopic.php?f=72&t=53862
void reboot_now(void)
{
const int PM_PASSWORD = 0x5a000000;
const int PM_RSTC_WRCFG_FULL_RESET = 0x00000020;
unsigned int *PM_WDOG = (unsigned int *) (_get_peripheral_base() + 0x00100024);
unsigned int *PM_RSTC = (unsigned int *) (_get_peripheral_base() + 0x0010001c);
//const int PM_PASSWORD = 0x5a000000;
//const int PM_RSTC_WRCFG_FULL_RESET = 0x00000020;
//unsigned int *PM_WDOG = (unsigned int *) (_get_peripheral_base() + 0x00100024);
//unsigned int *PM_RSTC = (unsigned int *) (_get_peripheral_base() + 0x0010001c);
// timeout = 1/16th of a second? (whatever)
*PM_WDOG = PM_PASSWORD | 1;
@ -75,10 +76,10 @@ void dump_info(unsigned int *context, int offset, char *type) {
// The stacked LR points one or two words afer the exception address
addr = (unsigned int *)((reg[13] & ~3) - offset);
dump_hex((unsigned int)addr);
#ifdef HAS_MULTICORE
dump_string(" on core ");
dump_digit(_get_core());
#endif
if (_get_hardware_id() >= _RPI2) {
dump_string(" on core ");
dump_digit(_get_core());
}
dump_string("\r\n");
dump_string("Registers:\r\n");
for (i = 0; i <= 13; i++) {

Wyświetl plik

@ -28,6 +28,7 @@
// Relocate to just below 32MB
#include "defs.h"
#include "rpi-base.h"
.equ STACK_SIZE, 0x00100000
@ -38,14 +39,12 @@
.equ C0_ABORT_STACK, STACK_SIZE*5
.equ C0_UNDEFINED_STACK, STACK_SIZE*6
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
.equ C1_SVR_STACK, STACK_SIZE*7
.equ C1_IRQ_STACK, STACK_SIZE*8
.equ C1_FIQ_STACK, STACK_SIZE*9
.equ C1_USER_STACK, STACK_SIZE*10
.equ C1_ABORT_STACK, STACK_SIZE*11
.equ C1_UNDEFINED_STACK, STACK_SIZE*12
#endif
.equ SCTLR_ENABLE_DATA_CACHE, 0x4
.equ SCTLR_ENABLE_BRANCH_PREDICTION, 0x800
@ -79,11 +78,9 @@
.global _get_gpu_data_base_r4
.global _get_gpu_command_base_r10
#ifdef HAS_MULTICORE
.global _get_core
.global _init_core
.global _spin_core
#endif
// From the ARM ARM (Architecture Reference Manual). Make sure you get the
// ARMv5 documentation which includes the ARMv6 documentation which is the
@ -141,8 +138,9 @@ _reset_:
#endif
// BL _enable_l1_cache
#ifdef HAS_MULTICORE
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_d
#ifdef KERNEL_OLD
// if kernel_old=1 all cores are running and we need to sleep 1-3
@ -192,8 +190,7 @@ _not_in_hyp_mode:
_reset_continue:
#endif
#endif
rpi0_1_d:
// We enter execution in supervisor mode. For more information on
// processor modes see ARM Section A2.2 (Processor Modes)
@ -238,10 +235,10 @@ _reset_continue:
// Enable VFP ------------------------------------------------------------
#ifdef HAS_MULTICORE
bl _get_hardware_id
cmp r0, #_RPI2
bge rpi2_4_a
#else
// r1 = Access Control Register
MRC p15, #0, r1, c1, c0, #2
// enable full access for p10,11
@ -256,7 +253,8 @@ _reset_continue:
MOV r0,#0x40000000
// FPEXC = r0
FMXR FPEXC, r0
#endif
rpi2_4_a:
// The c-startup function which we never return from. This function will
// initialise the ro data section (most things that have the const
@ -338,26 +336,34 @@ _get_cpsr:
.section ".text._init_cycle_counter"
_init_cycle_counter:
// Enable the cycle counter, and run at the ARM clock rate
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
mov r0, #7
mcr p15, 0, r0, c9, c12, 0
mov r0, #(1 << 31)
mcr p15, 0, r0, c9, c12, 1
#else
mov r0, #7
mcr p15, 0, r0, c15, c12, 0
#endif
mov pc, lr
// Enable the cycle counter, and run at the ARM clock rate
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_a
mov r0, #7
mcr p15, 0, r0, c9, c12, 0
mov r0, #(1 << 31)
mcr p15, 0, r0, c9, c12, 1
b donerpi0_1_a
rpi0_1_a:
mov r0, #7
mcr p15, 0, r0, c15, c12, 0
donerpi0_1_a:
pop {r0, pc}
.section ".text._get_cycle_counter"
_get_cycle_counter:
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_b
mrc p15, 0, r0, c9, c13, 0
#else
b donerpi0_1_b
rpi0_1_b:
mrc p15, 0, r0, c15, c12, 1
#endif
mov pc, lr
donerpi0_1_b:
pop {r0, pc}
.section ".text._set_interrupts"
_set_interrupts:
@ -480,13 +486,17 @@ _invalidate_dtlb_mva:
.section ".text._data_memory_barrier"
_data_memory_barrier:
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_c
dmb
#else
b donerpi0_1_c
rpi0_1_c:
mov r0, #0
mcr p15, 0, r0, c7, c10, 5
#endif
mov pc, lr
donerpi0_1_c:
pop {r0, pc}
#ifdef USE_MULTICORE
.section ".text._init_core"
@ -551,7 +561,10 @@ _init_continue:
bl run_core
#endif
#ifdef HAS_MULTICORE
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_e
.section ".text._spin_core"
// If main does return for some reason, just catch it and stay here.
_spin_core:
@ -581,8 +594,8 @@ _get_core:
mrc p15, 0, r0, c0, c0, 5
and r0, #3
mov pc, lr
#endif
rpi0_1_e:
// Default handlers for FIQ/IRQ do nothing

Wyświetl plik

@ -22,8 +22,6 @@ const static int aa = 1;
const static int bb = 1;
const static int shareable = 1;
#if defined(RPI2) || defined (RPI3) || defined(RPI4)
#define SETWAY_LEVEL_SHIFT 1
// 4 ways x 128 sets x 64 bytes per line 32KB
@ -134,7 +132,6 @@ void CleanDataCache (void)
}
}
#endif
// TLB 4KB Section Descriptor format
// 31..12 Section Base Address
@ -157,11 +154,11 @@ void map_4k_page(int logical, int physical) {
// XP (bit 23) in SCTRL no longer exists, and we see to be using ARMv6 table formats
// this means bit 0 of the page table is actually XN and must be clear to allow native ARM code to execute
// (this was the cause of issue #27)
#if defined(RPI2) || defined (RPI3) || defined(RPI4)
PageTable2[logical] = (physical<<12) | 0x132 | (bb << 6) | (aa << 2);
#else
PageTable2[logical] = (physical<<12) | 0x133 | (bb << 6) | (aa << 2);
#endif
if (_get_hardware_id() >= _RPI2) {
PageTable2[logical] = (physical<<12) | 0x132 | (bb << 6) | (aa << 2);
} else {
PageTable2[logical] = (physical<<12) | 0x133 | (bb << 6) | (aa << 2);
}
}
void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size)
@ -296,34 +293,33 @@ void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size)
asm volatile ("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
//log_debug("ttbcr = %08x", ttbcr);
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
// set TTBR0 - page table walk memory cacheability/shareable
// [Bit 0, Bit 6] indicates inner cachability: 01 = normal memory, inner write-back write-allocate cacheable
// [Bit 4, Bit 3] indicates outer cachability: 01 = normal memory, outer write-back write-allocate cacheable
// Bit 1 indicates sharable
// 4A = 0100 1010
int attr = ((aa & 1) << 6) | (bb << 3) | (shareable << 1) | ((aa & 2) >> 1);
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (attr | (unsigned) &PageTable));
#else
// set TTBR0 (page table walk inner cacheable, outer non-cacheable, shareable memory)
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (0x03 | (unsigned) &PageTable));
#endif
if (_get_hardware_id() >= _RPI2) {
// set TTBR0 - page table walk memory cacheability/shareable
// [Bit 0, Bit 6] indicates inner cachability: 01 = normal memory, inner write-back write-allocate cacheable
// [Bit 4, Bit 3] indicates outer cachability: 01 = normal memory, outer write-back write-allocate cacheable
// Bit 1 indicates sharable
// 4A = 0100 1010
int attr = ((aa & 1) << 6) | (bb << 3) | (shareable << 1) | ((aa & 2) >> 1);
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (attr | (unsigned) &PageTable));
} else {
// set TTBR0 (page table walk inner cacheable, outer non-cacheable, shareable memory)
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (0x03 | (unsigned) &PageTable));
}
unsigned ttbr0;
asm volatile ("mrc p15, 0, %0, c2, c0, 0" : "=r" (ttbr0));
//log_debug("ttbr0 = %08x", ttbr0);
// Invalidate entire data cache
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
// asm volatile ("isb" ::: "memory");
asm volatile (".word 0xf57ff06f" ::: "memory");
InvalidateDataCache();
#else
// invalidate data cache and flush prefetch buffer
// NOTE: The below code seems to cause a Pi 2 to crash
asm volatile ("mcr p15, 0, %0, c7, c5, 4" :: "r" (0) : "memory");
asm volatile ("mcr p15, 0, %0, c7, c6, 0" :: "r" (0) : "memory");
#endif
if (_get_hardware_id() >= _RPI2) {
asm volatile (".word 0xf57ff06f" ::: "memory"); // asm volatile ("isb" ::: "memory"); (won't compile on arm v6)
InvalidateDataCache();
} else {
// invalidate data cache and flush prefetch buffer
// NOTE: The below code seems to cause a Pi 2 to crash
asm volatile ("mcr p15, 0, %0, c7, c5, 4" :: "r" (0) : "memory");
asm volatile ("mcr p15, 0, %0, c7, c6, 0" :: "r" (0) : "memory");
}
// enable MMU, L1 cache and instruction cache, L2 cache, write buffer,
// branch prediction and extended page table on

Wyświetl plik

@ -11,27 +11,15 @@
//do not leave USE_ARM_CAPTURE uncommented during a release build as all versions will be ARM
//#define USE_ARM_CAPTURE //uncomment to select ARM capture build
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define CACHED_SCREEN_OFFSET 0x00B00000 // offset to cached screen area
#define CACHED_SCREEN_SIZE 0x00100000 // size of cached screen area
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#if defined(RPI2) || defined(RPI3)
#define HAS_MULTICORE // indicates multiple cores are available
#if defined(USE_ARM_CAPTURE)
#define WARN_12BIT // warn that 9bpp & 12bpp won't work
#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
#endif
#endif
#if defined(RPI4)
#define HAS_MULTICORE // indicates multiple cores are available
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define MODE7_ALWAYS_ARM // always runs mode7 capture code on ARM
#endif
//#define WARN_12BIT // warn that 9bpp & 12bpp won't work
//#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
//#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
//#define USE_MULTICORE //can be used to add code in an extra core

Wyświetl plik

@ -428,15 +428,11 @@ static param_t features[] = {
{ F_RETURN, "Return Position", "return", 0, 1, 1 },
{ F_DEBUG, "Debug", "debug", 0, 1, 1 },
{ F_DIRECTION, "Button Reverse", "button_reverse", 0, 1, 1 },
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
{ F_OCLOCK_CPU, "Overclock CPU", "overclock_cpu", 0, 100, 1 },
{ F_OCLOCK_CORE, "Overclock Core", "overclock_core", 0, 125, 1 },
{ F_OCLOCK_SDRAM, "Overclock SDRAM", "overclock_sdram", 0, 175, 1 },
#else
{ F_OCLOCK_CPU, "Overclock CPU", "overclock_cpu", 0, 75, 1 },
{ F_OCLOCK_CORE, "Overclock Core", "overclock_core", 0, 175, 1 },
{ F_OCLOCK_SDRAM, "Overclock SDRAM", "overclock_sdram", 0, 175, 1 },
#endif
{ F_RSTATUS, "Powerup Message", "powerup_message", 0, 1, 1 },
{ F_FRONTEND, "Interface", "interface", 0, NUM_FRONTENDS - 1, 1 },
{ -1, NULL, NULL, 0, 0, 0 }

Wyświetl plik

@ -54,16 +54,8 @@ typedef void (*func_ptr)();
#if defined(RPI4)
#define USE_PLLD4
#define SYS_CLK_DIVIDER 5
#elif defined(RPI3)
#define USE_PLLA
#define SYS_CLK_DIVIDER 3
#elif defined(RPI2)
#define USE_PLLA
#define SYS_CLK_DIVIDER 4
#else
#define USE_PLLA
#define SYS_CLK_DIVIDER 3 // should be 4 for Pi 1 depending on core clock speed
#endif
//PLL defaults for different Pi versions
@ -954,9 +946,22 @@ int calibrate_sampling_clock(int profile_changed) {
set_pll_frequency(((double) (pll_freq >> prediv)) / 1e6, PLL_CTRL, PLL_FRAC);
#ifdef USE_PLLC
#if defined(USE_PLLC)
int sys_clk_divider = 3;
switch (_get_hardware_id()) {
case 2:
sys_clk_divider = 4;
break;
case 4:
sys_clk_divider = 5;
break;
default:
sys_clk_divider = 3; // should be 4 for Pi 1 depending on core clock speed
break;
}
// Reinitialize the UART as the Core Clock has changed
RPI_AuxMiniUartInit_With_Freq(115200, 8, pll_freq / pll_scale / SYS_CLK_DIVIDER);
RPI_AuxMiniUartInit_With_Freq(115200, 8, pll_freq / pll_scale / sys_clk_divider);
#endif
// And remember for next time
@ -1854,13 +1859,11 @@ int extra_flags() {
return extra;
}
#ifdef HAS_MULTICORE
static void start_core(int core, func_ptr func) {
printf("starting core %d\r\n", core);
*(unsigned int *)(0x4000008c + 0x10 * core) = (unsigned int) func;
asm ( "sev" );
}
#endif
// =============================================================
// Public methods
@ -2421,7 +2424,7 @@ void swapBuffer(int buffer) {
#ifndef RPI4
if (capinfo->bpp == 16) {
// directly manipulate the display list in 16BPP mode otherwise display list gets reconstructed
int dli = ((int)capinfo->fb | 0xc0000000) + (buffer * capinfo->height * capinfo->pitch);
int dli = ((int)capinfo->fb | 0xc0000000) + (buffer * capinfo->height * capinfo->pitch);
do {
display_list[display_list_index + 5] = dli;
} while (dli != display_list[display_list_index + 5]);
@ -3595,24 +3598,24 @@ void kernel_main(unsigned int r0, unsigned int r1, unsigned int atags)
}
log_info("Pi Hardware detected as type %d", _get_hardware_id());
display_list = SCALER_DISPLAY_LIST;
gpioreg = (volatile uint32_t *)(_get_peripheral_base() + 0x101000UL);
gpioreg = (volatile uint32_t *)(_get_peripheral_base() + 0x101000UL);
init_hardware();
#ifdef HAS_MULTICORE
int i;
printf("main running on core %u\r\n", _get_core());
for (i = 0; i < 10000000; i++);
if (_get_hardware_id() >= _RPI2) {
int i;
printf("main running on core %u\r\n", _get_core());
for (i = 0; i < 10000000; i++);
#ifdef USE_MULTICORE
start_core(1, _init_core);
start_core(1, _init_core);
#else
start_core(1, _spin_core);
#endif
for (i = 0; i < 10000000; i++);
start_core(2, _spin_core);
for (i = 0; i < 10000000; i++);
start_core(3, _spin_core);
for (i = 0; i < 10000000; i++);
start_core(1, _spin_core);
#endif
for (i = 0; i < 10000000; i++);
start_core(2, _spin_core);
for (i = 0; i < 10000000; i++);
start_core(3, _spin_core);
for (i = 0; i < 10000000; i++);
}
rgb_to_hdmi_main();
}

Wyświetl plik

@ -38,7 +38,8 @@
// Raspberry Pi3 has a differentway of controlling the LED
#if defined(RPIZERO) || defined(RPIBPLUS) || defined(RPI2) || defined(RPI3)|| defined(RPI4)
//#if defined(RPIZERO) || defined(RPIBPLUS) || defined(RPI2) || defined(RPI3)|| defined(RPI4)
#define LED_GPFSEL GPFSEL[4]
#define LED_GPFBIT 21
#define LED_GPSET GPSET1
@ -46,15 +47,16 @@
#define LED_GPIO_BIT 15
#define LED_ON() do { RPI_GpioBase->LED_GPCLR = (1 << LED_GPIO_BIT); } while(0)
#define LED_OFF() do { RPI_GpioBase->LED_GPSET = (1 << LED_GPIO_BIT); } while(0)
#else
#define LED_GPFSEL GPFSEL[1]
#define LED_GPFBIT 18
#define LED_GPSET GPSET0
#define LED_GPCLR GPCLR0
#define LED_GPIO_BIT 16
#define LED_ON() do { RPI_GpioBase->LED_GPSET = (1 << LED_GPIO_BIT); } while(0)
#define LED_OFF() do { RPI_GpioBase->LED_GPCLR = (1 << LED_GPIO_BIT); } while(0)
#endif
//#else
// #define LED_GPFSEL GPFSEL[1]
// #define LED_GPFBIT 18
// #define LED_GPSET GPSET0
// #define LED_GPCLR GPCLR0
// #define LED_GPIO_BIT 16
// #define LED_ON() do { RPI_GpioBase->LED_GPSET = (1 << LED_GPIO_BIT); } while(0)
// #define LED_OFF() do { RPI_GpioBase->LED_GPCLR = (1 << LED_GPIO_BIT); } while(0)
//#endif
typedef enum
{

Wyświetl plik

@ -56,13 +56,35 @@ set( CMAKE_OBJCOPY ${TC_PATH}${CROSS_COMPILE}objcopy
CACHE FILEPATH "The toolchain objcopy command " FORCE )
# Set the CMAKE C flags (which should also be used by the assembler!
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp" )
#use hardware floating point
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfloat-abi=hard" )
#pi1 flags
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv6zk" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=arm1176jzf-s" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp" )
#pi2 flags
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=cortex-a7" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon-vfpv4" )
#pi3 flags
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=cortex-a53" )
#set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=crypto-neon-fp-armv8" )
#current flags for pi2 & pi3
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv6zk" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=arm1176jzf-s" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" )
set( CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv7-a" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon-vfpv4" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}" CACHE STRING "" )
set( KERNEL_NAME "./kernelrpi.img" )

Wyświetl plik

@ -81,8 +81,8 @@ set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mtune=arm1176jzf-s" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfp" )
set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv8-a" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=crypto-neon-fp-armv8" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -march=armv7-a" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -mfpu=neon-vfpv4" )
set( CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS}" CACHE STRING "" )
# Add the raspberry-pi 3 definition so conditional compilation works