summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Dariz <luca.dariz@gmail.com>2022-02-05 18:51:24 +0100
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2022-08-27 19:09:19 +0200
commitf90ba97d5091ad85f089817231f53e34a0f6f259 (patch)
tree9586f5d9d0e11f508b150315b7466d5197726d56
parentf2626beb39dc416ceebfa35076d89cd291e2b903 (diff)
add support for booting from grub with x86_64
* configure: compile for native x86_64 by default instead of xen * x86_64/Makefrag.am: introduce KERNEL_MAP_BASE to reuse the constant in both code and linker script * x86_64/ldscript: use a .boot section for the very first operations, until we reach long mode. This section is not really allocated, so it doesn't need to be freed later. The vm system is later initialized starting from .text and not including .boot * link kernel at 0x4000000 as the xen version, higher values causes linker errors * we can't use full segmentation in long mode, so we need to create a temporary mapping during early boot to be able to jump to high addresses * build direct map for first 4G in boothdr, it seems required by Linux drivers * add INTEL_PTE_PS bit definition to enable 2MB pages during bootstrap * ensure write bit is set in PDP entry access rights. This only applies to PAE-enabled kernels, mandatory for x86_64. On xen platform it seems to be handled differently Signed-off-by: Luca Dariz <luca@orpolo.org> Message-Id: <20220205175129.309469-2-luca@orpolo.org>
-rw-r--r--configure.ac3
-rw-r--r--i386/configfrag.ac2
-rw-r--r--i386/i386/i386asm.sym1
-rw-r--r--i386/i386/vm_param.h2
-rw-r--r--i386/intel/pmap.c4
-rw-r--r--i386/intel/pmap.h1
-rw-r--r--x86_64/Makefrag.am17
-rw-r--r--x86_64/boothdr.S238
-rw-r--r--x86_64/ldscript28
9 files changed, 280 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac
index 019842db..3aaa935c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -56,8 +56,7 @@ case $host_platform:$host_cpu in
default:i?86)
host_platform=at;;
default:x86_64)]
- AC_MSG_WARN([Platform set to Xen by default, this can not boot on non-Xen systems, you currently need a 32bit build for that.])
- [host_platform=xen;;
+ [host_platform=at;;
at:i?86 | xen:i?86 | at:x86_64 | xen:x86_64)
:;;
*)]
diff --git a/i386/configfrag.ac b/i386/configfrag.ac
index f697e277..f07a98ca 100644
--- a/i386/configfrag.ac
+++ b/i386/configfrag.ac
@@ -106,6 +106,8 @@ AC_ARG_ENABLE([apic],
enable_pae=${enable_pae-yes};;
*:i?86)
:;;
+ *:x86_64)
+ enable_pae=${enable_pae-yes};;
*)
if [ x"$enable_pae" = xyes ]; then]
AC_MSG_ERROR([can only enable the `PAE' feature on ix86.])
diff --git a/i386/i386/i386asm.sym b/i386/i386/i386asm.sym
index 0662aea0..9e1d13d7 100644
--- a/i386/i386/i386asm.sym
+++ b/i386/i386/i386asm.sym
@@ -122,6 +122,7 @@ expr sizeof(pt_entry_t) PTE_SIZE
expr INTEL_PTE_PFN PTE_PFN
expr INTEL_PTE_VALID PTE_V
expr INTEL_PTE_WRITE PTE_W
+expr INTEL_PTE_PS PTE_S
expr ~INTEL_PTE_VALID PTE_INVALID
expr NPTES PTES_PER_PAGE
expr INTEL_PTE_VALID|INTEL_PTE_WRITE INTEL_PTE_KERNEL
diff --git a/i386/i386/vm_param.h b/i386/i386/vm_param.h
index edd9522c..314fdb35 100644
--- a/i386/i386/vm_param.h
+++ b/i386/i386/vm_param.h
@@ -36,7 +36,7 @@
* for better trace support in kdb; the _START symbol has to be offset by the
* same amount. */
#ifdef __x86_64__
-#define VM_MIN_KERNEL_ADDRESS 0x40000000UL
+#define VM_MIN_KERNEL_ADDRESS KERNEL_MAP_BASE
#else
#define VM_MIN_KERNEL_ADDRESS 0xC0000000UL
#endif
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index 5302b092..57c18a0d 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -655,7 +655,7 @@ void pmap_bootstrap(void)
pa_to_pte(_kvtophys((void *) kernel_page_dir
+ i * INTEL_PGBYTES))
| INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
+#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES)
| INTEL_PTE_WRITE
#endif
);
@@ -1298,7 +1298,7 @@ pmap_t pmap_create(vm_size_t size)
WRITE_PTE(&p->pdpbase[i],
pa_to_pte(kvtophys((vm_offset_t) page_dir[i]))
| INTEL_PTE_VALID
-#ifdef MACH_PV_PAGETABLES
+#if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES)
| INTEL_PTE_WRITE
#endif
);
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 63683bc5..bad640c1 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -148,6 +148,7 @@ typedef phys_addr_t pt_entry_t;
#define INTEL_PTE_NCACHE 0x00000010
#define INTEL_PTE_REF 0x00000020
#define INTEL_PTE_MOD 0x00000040
+#define INTEL_PTE_PS 0x00000080
#ifdef MACH_PV_PAGETABLES
/* Not supported */
#define INTEL_PTE_GLOBAL 0x00000000
diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 6b6bb2cb..0139940a 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -212,10 +212,25 @@ nodist_libkernel_a_SOURCES += \
#
if PLATFORM_at
+# This should probably be 0xffffffff80000000 for mcmodel=kernel, but let's try
+# to stay in the first 8G first, otherwise we have to fix the pmap module to
+# actually use the l4 page level
+#KERNEL_MAP_BASE=0x100000000
+# but for nor try with < 4G, otherwise we have linker errors
+KERNEL_MAP_BASE=0x40000000
gnumach_LINKFLAGS += \
--defsym _START_MAP=$(_START_MAP) \
- --defsym _START=_START_MAP+0x40000000 \
+ --defsym _START=_START_MAP \
+ --defsym KERNEL_MAP_BASE=$(KERNEL_MAP_BASE) \
-T '$(srcdir)'/x86_64/ldscript
+
+AM_CFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+AM_CCASFLAGS += -D_START_MAP=$(_START_MAP) \
+ -DKERNEL_MAP_BASE=$(KERNEL_MAP_BASE)
+
+AM_CCASFLAGS += \
+ -Ii386
endif
AM_CPPFLAGS += \
diff --git a/x86_64/boothdr.S b/x86_64/boothdr.S
new file mode 100644
index 00000000..12fc7ca2
--- /dev/null
+++ b/x86_64/boothdr.S
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2022 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <mach/machine/asm.h>
+
+#include <i386/i386asm.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/seg.h>
+ /*
+ * This section will be put first into .boot. See also x86_64/ldscript.
+ */
+ .section .boot.text,"ax"
+ .globl boot_start
+
+ /* We should never be entered this way. */
+ .code32
+boot_start:
+ jmp boot_entry
+
+ /* MultiBoot header - see multiboot.h. */
+#define MULTIBOOT_MAGIC 0x1BADB002
+#ifdef __ELF__
+#define MULTIBOOT_FLAGS 0x00000003
+#else /* __ELF__ */
+#define MULTIBOOT_FLAGS 0x00010003
+#endif /* __ELF__ */
+ P2ALIGN(2)
+boot_hdr:
+ .long MULTIBOOT_MAGIC
+ .long MULTIBOOT_FLAGS
+ /*
+ * The next item here is the checksum.
+ * XX this works OK until we need at least the 30th bit.
+ */
+ .long - (MULTIBOOT_MAGIC+MULTIBOOT_FLAGS)
+#ifndef __ELF__ /* a.out kludge */
+ .long boot_hdr /* header_addr */
+ .long _start /* load_addr */
+ .long _edata /* load_end_addr */
+ .long _end /* bss_end_addr */
+ .long boot_entry /* entry */
+#endif /* __ELF__ */
+
+boot_entry:
+ /*
+ * Prepare minimal page mapping to jump to 64 bit and to C code.
+ * The first 4GB is identity mapped, and the first 2GB are re-mapped
+ * to high addresses at KERNEL_MAP_BASE
+ */
+
+ movl $p3table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table)
+ /*
+ * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+ * space. Part of it might be remapped later if the kernel is mapped
+ * below 4G.
+ */
+ movl $p2table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table)
+ movl $p2table1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 8)
+ movl $p2table2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 16)
+ movl $p2table3,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + 24)
+ /* point each page table level two entry to a page */
+ mov $0,%ecx
+.map_p2_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k
+ mov %eax,p2table(,%ecx,8)
+ inc %ecx
+ cmp $2048,%ecx // 512 entries per table, map 4 L2 tables
+ jne .map_p2_table
+
+ /*
+ * KERNEL_MAP_BASE must me aligned to 2GB.
+ * Depending on kernel starting address, we might need to add another
+ * entry in the L4 table (controlling 512 GB chunks). In any case, we
+ * add two entries in L3 table to make sure we map 2GB for the kernel.
+ * Note that this may override part of the mapping create above.
+ */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+ movl $p3ktable,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) // select 512G block
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#else
+ movl $p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) )) // select first 1G block
+ movl $p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) )) // select second 1G block
+#endif
+
+ mov $0,%ecx
+.map_p2k_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K
+ mov %eax,p2ktable1(,%ecx,8)
+ inc %ecx
+ cmp $1024,%ecx // 512 entries per table, map 2 L2 tables
+ jne .map_p2k_table
+
+switch64:
+ /*
+ * Jump to 64 bit mode, we have to
+ * - enable PAE
+ * - enable long mode
+ * - enable paging and load the tables filled above in CR3
+ * - jump to a 64-bit code segment
+ */
+ mov %cr4,%eax
+ or $CR4_PAE,%eax
+ mov %eax,%cr4
+ mov $0xC0000080,%ecx // select EFER register
+ rdmsr
+ or $(1 << 8),%eax // long mode enable bit
+ wrmsr
+ mov $p4table,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $CR0_PG,%eax
+ or $CR0_WP,%eax
+ mov %eax,%cr0
+
+ lgdt gdt64pointer
+ movw $0,%ax
+ movw %ax,%fs
+ movw %ax,%gs
+ movw $16,%ax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ ljmp $8,$boot_entry64
+
+ .code64
+
+ /* why do we need this? it seems overwritten by linker */
+ .globl _start
+_start:
+
+boot_entry64:
+ /* Switch to our own interrupt stack. */
+ movq $(_intstack+INTSTACK_SIZE),%rax
+ andq $(~15),%rax
+ movq %rax,%rsp
+
+ /* Reset EFLAGS to a known state. */
+ pushq $0
+ popf
+ /* save multiboot info for later */
+ movq %rbx,%r8
+
+ /* Fix ifunc entries */
+ movq $__rela_iplt_start,%rsi
+ movq $__rela_iplt_end,%rdi
+iplt_cont:
+ cmpq %rdi,%rsi
+ jae iplt_done
+ movq (%rsi),%rbx /* r_offset */
+ movb 4(%rsi),%al /* info */
+ cmpb $42,%al /* IRELATIVE */
+ jnz iplt_next
+ call *(%ebx) /* call ifunc */
+ movq %rax,(%rbx) /* fixed address */
+iplt_next:
+ addq $8,%rsi
+ jmp iplt_cont
+iplt_done:
+
+ /* restore multiboot info */
+ movq %r8,%rdi
+ /* Jump into C code. */
+ call EXT(c_boot_entry)
+ /* not reached */
+ nop
+
+ .section .boot.data
+ .comm _intstack,INTSTACK_SIZE
+
+ .code32
+ .section .boot.data
+ .align 4096
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANULARITY_OFS 52
+gdt64:
+ .quad 0
+gdt64code:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_CODE_R << SEG_ACCESS_OFS) | (SZ_64 << SEG_GRANULARITY_OFS)
+gdt64data:
+ .quad (ACC_P << SEG_ACCESS_OFS) | (ACC_DATA_W << SEG_ACCESS_OFS)
+gdt64end:
+ .skip (4096 - (gdt64end - gdt64))
+gdt64pointer:
+ .word gdt64end - gdt64 - 1
+ .quad gdt64
+
+ .section .boot.data
+ .align 4096
+p4table: .space 4096
+p3table: .space 4096
+p2table: .space 4096
+p2table1: .space 4096
+p2table2: .space 4096
+p2table3: .space 4096
+p3ktable: .space 4096
+p2ktable1: .space 4096
+p2ktable2: .space 4096
diff --git a/x86_64/ldscript b/x86_64/ldscript
index 375e8104..de99795e 100644
--- a/x86_64/ldscript
+++ b/x86_64/ldscript
@@ -2,7 +2,7 @@
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64",
"elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
-ENTRY(_start)
+ENTRY(boot_start)
SECTIONS
{
/*
@@ -11,22 +11,30 @@ SECTIONS
* be first in there. See also `i386/i386at/boothdr.S' and
* `gnumach_LINKFLAGS' in `i386/Makefrag.am'.
*/
- . = _START;
- .text :
- AT (_START_MAP)
+
+ . = _START_MAP;
+ .boot :
+ {
+ *(.boot.text)
+ *(.boot.data)
+ } =0x90909090
+
+ . += KERNEL_MAP_BASE;
+ _start = .;
+ .text : AT(((ADDR(.text)) - KERNEL_MAP_BASE))
{
- *(.text.start)
+ *(.text*)
*(.text .stub .text.* .gnu.linkonce.t.*)
*(.text.unlikely .text.*_unlikely)
KEEP (*(.text.*personality*))
/* .gnu.warning sections are handled specially by elf32.em. */
*(.gnu.warning)
} =0x90909090
- .init :
+ .init : AT(((ADDR(.init)) - KERNEL_MAP_BASE))
{
KEEP (*(.init))
} =0x90909090
- .fini :
+ .fini : AT(((ADDR(.fini)) - KERNEL_MAP_BASE))
{
KEEP (*(.fini))
} =0x90909090
@@ -69,7 +77,7 @@ SECTIONS
PROVIDE_HIDDEN (__rela_iplt_end = .);
}
.plt : { *(.plt) *(.iplt) }
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+ .rodata : AT(((ADDR(.rodata)) - KERNEL_MAP_BASE)) { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
.eh_frame_hdr : { *(.eh_frame_hdr) }
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
@@ -139,7 +147,7 @@ SECTIONS
.got : { *(.got) *(.igot) }
. = DATA_SEGMENT_RELRO_END (24, .);
.got.plt : { *(.got.plt) *(.igot.plt) }
- .data :
+ .data : AT(((ADDR(.data)) - KERNEL_MAP_BASE))
{
*(.data .data.* .gnu.linkonce.d.*)
SORT(CONSTRUCTORS)
@@ -147,7 +155,7 @@ SECTIONS
.data1 : { *(.data1) }
_edata = .; PROVIDE (edata = .);
__bss_start = .;
- .bss :
+ .bss : AT(((ADDR(.bss)) - KERNEL_MAP_BASE))
{
*(.dynbss)
*(.bss .bss.* .gnu.linkonce.b.*)