From 0b3504b6db86c531e8b53b8e9aa9030db6e72357 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Sun, 18 Mar 2012 20:31:22 +0100 Subject: pmap.h: Add 64bit variant * i386/intel/pmap.h (L4SHIFT, L4MASK, lin2l4num): New macros (PDPNUM, PDPMASK, set_pmap): Add 64bit variant. Make PAE use the 64bit mask too. (pmap): Add l4base, user_l4base, user_pdpbase fields. * i386/intel/pmap.c (pmap_bootstrap): Clear the whole PDP. Enable write bit in PDP. Set user pagetable to NULL. Initialize l4base. (pmap_clear_bootstrap_pagetable): Add 4th-level support. (pmap_ceate): Clear the whole PDP. Enable write bit in PDP. Initialize l4base, user_pdpbase, user_l4base. (pmap_destroy): Clear l4base, user_pdpbase, user_l4base. * i386/i386at/model_dep.c (i386at_init): Load l4base on 64bits. --- i386/i386at/model_dep.c | 10 ++-- i386/intel/pmap.c | 120 ++++++++++++++++++++++++++++++++++++++++++------ i386/intel/pmap.h | 41 ++++++++++++++++- 3 files changed, 153 insertions(+), 18 deletions(-) diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index 61ed4748..02adec17 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -452,14 +452,18 @@ i386at_init(void) #endif /* PAE */ #endif /* MACH_PV_PAGETABLES */ #if PAE - set_cr3((unsigned)_kvtophys(kernel_pmap->pdpbase)); +#ifdef __x86_64__ + set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base)); +#else + set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase)); +#endif #ifndef MACH_HYP if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE)) panic("CPU doesn't have support for PAE."); set_cr4(get_cr4() | CR4_PAE); #endif /* MACH_HYP */ #else - set_cr3((unsigned)_kvtophys(kernel_page_dir)); + set_cr3((unsigned long)_kvtophys(kernel_page_dir)); #endif /* PAE */ #ifndef MACH_HYP /* Turn paging on. @@ -527,7 +531,7 @@ i386at_init(void) /* * C boot entrypoint - called by boot_entry in boothdr.S. - * Running in 32-bit flat mode, but without paging yet. + * Running in flat mode, but without paging yet. */ void c_boot_entry(vm_offset_t bi) { diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index c55b8f2d..4b223ec5 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -642,14 +642,25 @@ void pmap_bootstrap(void) kernel_page_dir = (pt_entry_t*)phystokv(addr); } kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page()); + memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES); { int i; for (i = 0; i < PDPNUM; i++) WRITE_PTE(&kernel_pmap->pdpbase[i], pa_to_pte(_kvtophys((void *) kernel_page_dir + i * INTEL_PGBYTES)) - | INTEL_PTE_VALID); + | INTEL_PTE_VALID | INTEL_PTE_WRITE); } +#ifdef __x86_64__ +#ifdef MACH_HYP + kernel_pmap->user_l4base = NULL; + kernel_pmap->user_pdpbase = NULL; +#endif + kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page()); + memset(kernel_pmap->l4base, 0, INTEL_PGBYTES); + WRITE_PTE(&kernel_pmap->l4base[0], pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); + pmap_set_page_readonly_init(kernel_pmap->l4base); +#endif /* x86_64 */ #else /* PAE */ kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page()); #endif /* PAE */ @@ -681,6 +692,9 @@ void pmap_bootstrap(void) int n_l1map; for (n_l1map = 0, la = VM_MIN_KERNEL_ADDRESS; la >= VM_MIN_KERNEL_ADDRESS; la += NPTES * PAGE_SIZE) { #ifdef PAE +#ifdef __x86_64__ + base = (pt_entry_t*) ptetokv(base[0]); +#endif /* x86_64 */ pt_entry_t *l2_map = (pt_entry_t*) ptetokv(base[lin2pdpnum(la)]); #else /* PAE */ pt_entry_t *l2_map = base; @@ -848,6 +862,9 @@ void pmap_set_page_readonly_init(void *_vaddr) { vm_offset_t vaddr = (vm_offset_t) _vaddr; #if PAE pt_entry_t *pdpbase = (void*) boot_info.pt_base; +#ifdef __x86_64__ + pdpbase = (pt_entry_t *) ptetokv(pdpbase[lin2l4num(vaddr)]); +#endif /* The bootstrap table does not necessarily use contiguous pages for the pde tables */ pt_entry_t *dirbase = (void*) ptetokv(pdpbase[lin2pdpnum(vaddr)]); #else @@ -870,38 +887,68 @@ void pmap_clear_bootstrap_pagetable(pt_entry_t *base) { unsigned i; pt_entry_t *dir; vm_offset_t va = 0; +#ifdef __x86_64__ + int l4i, l3i; +#else #if PAE unsigned j; #endif /* PAE */ +#endif if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(base))) panic("pmap_clear_bootstrap_pagetable: couldn't unpin page %p(%lx)\n", base, (vm_offset_t) kv_to_ma(base)); +#ifdef __x86_64__ + /* 4-level page table */ + for (l4i = 0; l4i < NPTES && va < HYP_VIRT_START && va < 0x0000800000000000UL; l4i++) { + pt_entry_t l4e = base[l4i]; + pt_entry_t *l3; + if (!(l4e & INTEL_PTE_VALID)) { + va += NPTES * NPTES * NPTES * INTEL_PGBYTES; + continue; + } + l3 = (pt_entry_t *) ptetokv(l4e); + + for (l3i = 0; l3i < NPTES && va < HYP_VIRT_START; l3i++) { + pt_entry_t l3e = l3[l3i]; + if (!(l3e & INTEL_PTE_VALID)) { + va += NPTES * NPTES * INTEL_PGBYTES; + continue; + } + dir = (pt_entry_t *) ptetokv(l3e); +#else #if PAE - for (j = 0; j < PDPNUM; j++) + /* 3-level page table */ + for (j = 0; j < PDPNUM && va < HYP_VIRT_START; j++) { - pt_entry_t pdpe = base[j]; - if (pdpe & INTEL_PTE_VALID) { + pt_entry_t pdpe = base[j]; + if (!(pdpe & INTEL_PTE_VALID)) { + va += NPTES * NPTES * INTEL_PGBYTES; + continue; + } dir = (pt_entry_t *) ptetokv(pdpe); #else /* PAE */ + /* 2-level page table */ dir = base; #endif /* PAE */ - for (i = 0; i < NPTES; i++) { +#endif + for (i = 0; i < NPTES && va < HYP_VIRT_START; i++) { pt_entry_t pde = dir[i]; unsigned long pfn = atop(pte_to_pa(pde)); void *pgt = (void*) phystokv(ptoa(pfn)); if (pde & INTEL_PTE_VALID) hyp_free_page(pfn, pgt); va += NPTES * INTEL_PGBYTES; - if (va >= HYP_VIRT_START) - break; } +#ifndef __x86_64__ #if PAE hyp_free_page(atop(_kvtophys(dir)), dir); - } else - va += NPTES * NPTES * INTEL_PGBYTES; - if (va >= HYP_VIRT_START) - break; } #endif /* PAE */ +#else + hyp_free_page(atop(_kvtophys(dir)), dir); + } + hyp_free_page(atop(_kvtophys(l3)), l3); + } +#endif hyp_free_page(atop(_kvtophys(base)), base); } #endif /* MACH_PV_PAGETABLES */ @@ -1235,13 +1282,48 @@ pmap_t pmap_create(vm_size_t size) return PMAP_NULL; } + memset(p->pdpbase, 0, INTEL_PGBYTES); { for (i = 0; i < PDPNUM; i++) WRITE_PTE(&p->pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) - | INTEL_PTE_VALID); + | INTEL_PTE_VALID | INTEL_PTE_WRITE); } +#ifdef __x86_64__ + // FIXME: use kmem_cache_alloc instead + if (kmem_alloc_wired(kernel_map, + (vm_offset_t *)&p->l4base, INTEL_PGBYTES) + != KERN_SUCCESS) + panic("pmap_create"); + memset(p->l4base, 0, INTEL_PGBYTES); + WRITE_PTE(&p->l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); #ifdef MACH_PV_PAGETABLES + // FIXME: use kmem_cache_alloc instead + if (kmem_alloc_wired(kernel_map, + (vm_offset_t *)&p->user_pdpbase, INTEL_PGBYTES) + != KERN_SUCCESS) + panic("pmap_create"); + memset(p->user_pdpbase, 0, INTEL_PGBYTES); + { + int i; + for (i = 0; i < lin2pdpnum(VM_MAX_ADDRESS); i++) + WRITE_PTE(&p->user_pdpbase[i], pa_to_pte(kvtophys((vm_offset_t) page_dir[i])) | INTEL_PTE_VALID | INTEL_PTE_WRITE); + } + // FIXME: use kmem_cache_alloc instead + if (kmem_alloc_wired(kernel_map, + (vm_offset_t *)&p->user_l4base, INTEL_PGBYTES) + != KERN_SUCCESS) + panic("pmap_create"); + memset(p->user_l4base, 0, INTEL_PGBYTES); + WRITE_PTE(&p->user_l4base[0], pa_to_pte(kvtophys((vm_offset_t) p->user_pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); +#endif /* MACH_PV_PAGETABLES */ +#endif /* _x86_64 */ +#ifdef MACH_PV_PAGETABLES +#ifdef __x86_64__ + pmap_set_page_readonly(p->l4base); + pmap_set_page_readonly(p->user_l4base); + pmap_set_page_readonly(p->user_pdpbase); +#endif pmap_set_page_readonly(p->pdpbase); #endif /* MACH_PV_PAGETABLES */ #else /* PAE */ @@ -1339,8 +1421,20 @@ void pmap_destroy(pmap_t p) } #ifdef MACH_PV_PAGETABLES +#ifdef __x86_64__ + pmap_set_page_readwrite(p->l4base); + pmap_set_page_readwrite(p->user_l4base); + pmap_set_page_readwrite(p->user_pdpbase); +#endif pmap_set_page_readwrite(p->pdpbase); #endif /* MACH_PV_PAGETABLES */ +#ifdef __x86_64__ + kmem_free(kernel_map, (vm_offset_t)p->l4base, INTEL_PGBYTES); +#ifdef MACH_PV_PAGETABLES + kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES); + kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES); +#endif +#endif kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase); #endif /* PAE */ kmem_cache_free(&pmap_cache, (vm_offset_t) p); @@ -2904,7 +2998,7 @@ void pmap_update_interrupt(void) } #endif /* NCPUS > 1 */ -#if defined(__i386__) +#if defined(__i386__) || defined (__x86_64__) /* Unmap page 0 to trap NULL references. */ void pmap_unmap_page_zero (void) diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index 5fa2a0c4..4c852543 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -48,7 +48,7 @@ * Define the generic in terms of the specific */ -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) #define INTEL_PGBYTES I386_PGBYTES #define INTEL_PGSHIFT I386_PGSHIFT #define intel_btop(x) i386_btop(x) @@ -71,9 +71,18 @@ typedef phys_addr_t pt_entry_t; #define INTEL_OFFMASK 0xfff /* offset within page */ #if PAE +#ifdef __x86_64__ +#define L4SHIFT 39 /* L4 shift */ +#define L4MASK 0x1ff /* mask for L4 index */ +#endif #define PDPSHIFT 30 /* page directory pointer */ +#ifdef __x86_64__ +/* Enough for 8GiB addressing space. */ +#define PDPNUM 8 /* number of page directory pointers */ +#else #define PDPNUM 4 /* number of page directory pointers */ -#define PDPMASK 3 /* mask for page directory pointer index */ +#endif +#define PDPMASK 0x1ff /* mask for page directory pointer index */ #define PDESHIFT 21 /* page descriptor shift */ #define PDEMASK 0x1ff /* mask for page descriptor index */ #define PTESHIFT 12 /* page table shift */ @@ -86,6 +95,13 @@ typedef phys_addr_t pt_entry_t; #define PTEMASK 0x3ff /* mask for page table index */ #endif /* PAE */ +/* + * Convert linear offset to L4 pointer index + */ +#ifdef __x86_64__ +#define lin2l4num(a) (((a) >> L4SHIFT) & L4MASK) +#endif + /* * Convert linear offset to page descriptor index */ @@ -167,6 +183,13 @@ struct pmap { #else pt_entry_t *pdpbase; /* page directory pointer table */ #endif /* ! PAE */ +#ifdef __x86_64__ + pt_entry_t *l4base; /* l4 table */ +#ifdef MACH_HYP + pt_entry_t *user_l4base; /* Userland l4 table */ + pt_entry_t *user_pdpbase; /* Userland l4 table */ +#endif /* MACH_HYP */ +#endif /* x86_64 */ int ref_count; /* reference count */ decl_simple_lock_data(,lock) /* lock on map */ @@ -187,7 +210,21 @@ extern void pmap_clear_bootstrap_pagetable(pt_entry_t *addr); #endif /* MACH_PV_PAGETABLES */ #if PAE +#ifdef __x86_64__ +#ifdef MACH_HYP +#define set_pmap(pmap) \ + MACRO_BEGIN \ + set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)); \ + if (pmap->user_l4base) \ + if (!hyp_set_user_cr3(kvtophys((vm_offset_t)(pmap)->user_l4base))) \ + panic("set_user_cr3"); \ + MACRO_END +#else /* MACH_HYP */ +#define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->l4base)) +#endif /* MACH_HYP */ +#else /* x86_64 */ #define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->pdpbase)) +#endif /* x86_64 */ #else /* PAE */ #define set_pmap(pmap) set_cr3(kvtophys((vm_offset_t)(pmap)->dirbase)) #endif /* PAE */ -- cgit v1.2.3