summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Dariz <luca@orpolo.org>2023-05-21 10:57:56 +0200
committerSamuel Thibault <samuel.thibault@ens-lyon.org>2023-05-21 20:55:01 +0200
commit222020cff440921e987dcd92e308dd775e5d543d (patch)
treef12d67c7bcb96a6528604c8a5c881ddd792c55b4
parent95bf57a0625140e4b60f817150cb516bda65b446 (diff)
pmap: dynamically allocate the whole user page tree map
* i386/intel/pmap.c: switch to dynamic allocation of all the page tree map levels for the user-space address range, using a separate kmem cache for each level. This allows to extend the usable memory space on x86_64 to use more than one L3 page for user space. The kernel address map is left untouched for now as it needs a different initialization. * i386/intel/pmap.h: remove hardcoded user pages and add macro to recontruct the page-to-virtual mapping Message-Id: <20230521085758.365640-1-luca@orpolo.org>
-rw-r--r--i386/intel/pmap.c544
-rw-r--r--i386/intel/pmap.h21
2 files changed, 277 insertions, 288 deletions
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index e867ed59..3a30271e 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -398,6 +398,7 @@ struct pmap kernel_pmap_store;
pmap_t kernel_pmap;
struct kmem_cache pmap_cache; /* cache of pmap structures */
+struct kmem_cache pt_cache; /* cache of page tables */
struct kmem_cache pd_cache; /* cache of page directories */
#if PAE
struct kmem_cache pdpt_cache; /* cache of page directory pointer tables */
@@ -429,6 +430,14 @@ pt_entry_t *kernel_page_dir;
*/
static pmap_mapwindow_t mapwindows[PMAP_NMAPWINDOWS * NCPUS];
+#ifdef __x86_64__
+static inline pt_entry_t *
+pmap_l4base(const pmap_t pmap, vm_offset_t lin_addr)
+{
+ return &pmap->l4base[lin2l4num(lin_addr)];
+}
+#endif
+
#ifdef PAE
static inline pt_entry_t *
pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
@@ -443,7 +452,7 @@ pmap_ptp(const pmap_t pmap, vm_offset_t lin_addr)
#else /* __x86_64__ */
pdp_table = pmap->pdpbase;
#endif /* __x86_64__ */
- return pdp_table;
+ return &pdp_table[lin2pdpnum(lin_addr)];
}
#endif
@@ -456,7 +465,9 @@ pmap_pde(const pmap_t pmap, vm_offset_t addr)
#if PAE
pt_entry_t *pdp_table;
pdp_table = pmap_ptp(pmap, addr);
- pt_entry_t pde = pdp_table[lin2pdpnum(addr)];
+ if (pdp_table == 0)
+ return(PT_ENTRY_NULL);
+ pt_entry_t pde = *pdp_table;
if ((pde & INTEL_PTE_VALID) == 0)
return PT_ENTRY_NULL;
page_dir = (pt_entry_t *) ptetokv(pde);
@@ -1092,15 +1103,18 @@ void pmap_init(void)
*/
s = (vm_size_t) sizeof(struct pmap);
kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, 0);
- kmem_cache_init(&pd_cache, "pd",
+ kmem_cache_init(&pt_cache, "pmap_L1",
+ INTEL_PGBYTES, INTEL_PGBYTES, NULL,
+ KMEM_CACHE_PHYSMEM);
+ kmem_cache_init(&pd_cache, "pmap_L2",
INTEL_PGBYTES, INTEL_PGBYTES, NULL,
KMEM_CACHE_PHYSMEM);
#if PAE
- kmem_cache_init(&pdpt_cache, "pdpt",
+ kmem_cache_init(&pdpt_cache, "pmap_L3",
INTEL_PGBYTES, INTEL_PGBYTES, NULL,
KMEM_CACHE_PHYSMEM);
#ifdef __x86_64__
- kmem_cache_init(&l4_cache, "L4",
+ kmem_cache_init(&l4_cache, "pmap_L4",
INTEL_PGBYTES, INTEL_PGBYTES, NULL,
KMEM_CACHE_PHYSMEM);
#endif /* __x86_64__ */
@@ -1244,6 +1258,11 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
vm_object_lock(pmap_object);
m = vm_page_lookup(pmap_object, pa);
vm_page_lock_queues();
+#ifdef MACH_PV_PAGETABLES
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
+ panic("couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
+ pmap_set_page_readwrite((void*) phystokv(pa));
+#endif /* MACH_PV_PAGETABLES */
vm_page_free(m);
inuse_ptepages_count--;
vm_page_unlock_queues();
@@ -1265,7 +1284,7 @@ pmap_page_table_page_dealloc(vm_offset_t pa)
pmap_t pmap_create(vm_size_t size)
{
#ifdef __x86_64__
- // needs to be reworked if we want to dynamically allocate PDPs
+ // needs to be reworked if we want to dynamically allocate PDPs for kernel
const int PDPNUM = PDPNUM_KERNEL;
#endif
pt_entry_t *page_dir[PDPNUM];
@@ -1360,30 +1379,6 @@ pmap_t pmap_create(vm_size_t size)
memset(p->l4base, 0, INTEL_PGBYTES);
WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_KERNEL_ADDRESS)],
pa_to_pte(kvtophys((vm_offset_t) pdp_kernel)) | INTEL_PTE_VALID | INTEL_PTE_WRITE);
-#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS)
- // kernel vm and user vm are not in the same l4 entry, so add the user one
- // TODO alloc only PDPTE for the user range VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS
- // and keep the same for kernel range, in l4 table we have different entries
- pt_entry_t *pdp_user = (pt_entry_t *) kmem_cache_alloc(&pdpt_cache);
- if (pdp_user == NULL) {
- panic("pmap create");
- }
- memset(pdp_user, 0, INTEL_PGBYTES);
- WRITE_PTE(&p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)],
- pa_to_pte(kvtophys((vm_offset_t) pdp_user)) | INTEL_PTE_VALID | INTEL_PTE_WRITE | INTEL_PTE_USER);
-#endif /* lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS) */
- for (int i = 0; i < PDPNUM_USER; i++) {
- pt_entry_t *user_page_dir = (pt_entry_t *) kmem_cache_alloc(&pd_cache);
- memset(user_page_dir, 0, INTEL_PGBYTES);
- WRITE_PTE(&pdp_user[i + lin2pdpnum(VM_MIN_USER_ADDRESS)], // pdp_user
- pa_to_pte(kvtophys((vm_offset_t)user_page_dir))
- | INTEL_PTE_VALID
-#if (defined(__x86_64__) && !defined(MACH_HYP)) || defined(MACH_PV_PAGETABLES)
- | INTEL_PTE_WRITE | INTEL_PTE_USER
-#endif
- );
- }
-
#ifdef MACH_PV_PAGETABLES
// FIXME: use kmem_cache_alloc instead
if (kmem_alloc_wired(kernel_map,
@@ -1443,15 +1438,7 @@ pmap_t pmap_create(vm_size_t size)
void pmap_destroy(pmap_t p)
{
-#if PAE
- int i;
-#endif
- boolean_t free_all;
- pt_entry_t *page_dir;
- pt_entry_t *pdep;
- phys_addr_t pa;
int c, s;
- vm_page_t m;
if (p == PMAP_NULL)
return;
@@ -1466,87 +1453,54 @@ void pmap_destroy(pmap_t p)
return; /* still in use */
}
+ /*
+ * Free the page table tree.
+ */
#if PAE
- for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) {
#ifdef __x86_64__
-#ifdef USER32
- /* In this case we know we have one PDP for user space */
- pt_entry_t *pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
-#else
-#warning "TODO do 64-bit userspace need more that 512G?"
- pt_entry_t *pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
-#endif /* USER32 */
- page_dir = (pt_entry_t *) ptetokv(pdp[i]);
+ for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+ for (int l3i = 0; l3i < 512; l3i++) {
#else /* __x86_64__ */
- page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
+ pt_entry_t *pdpbase = p->pdpbase;
+ for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) {
#endif /* __x86_64__ */
- free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
+ pt_entry_t pde = (pt_entry_t) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ for (int l2i = 0; l2i < 512; l2i++) {
#else /* PAE */
- free_all = FALSE;
- page_dir = p->dirbase;
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) {
#endif /* PAE */
-
-#ifdef __x86_64__
-#warning FIXME 64bit need to free l3
-#endif
- /*
- * Free the memory maps, then the
- * pmap structure.
- */
- for (pdep = page_dir;
- (free_all
- || pdep < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
- && pdep < &page_dir[NPTES];
- pdep += ptes_per_vm_page) {
- if (*pdep & INTEL_PTE_VALID) {
- pa = pte_to_pa(*pdep);
- assert(pa == (vm_offset_t) pa);
- vm_object_lock(pmap_object);
- m = vm_page_lookup(pmap_object, pa);
- if (m == VM_PAGE_NULL)
- panic("pmap_destroy: pte page not in object");
- vm_page_lock_queues();
-#ifdef MACH_PV_PAGETABLES
- if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, pa_to_mfn(pa)))
- panic("pmap_destroy: couldn't unpin page %llx(%lx)\n", pa, (vm_offset_t) kv_to_ma(pa));
- pmap_set_page_readwrite((void*) phystokv(pa));
-#endif /* MACH_PV_PAGETABLES */
- vm_page_free(m);
- inuse_ptepages_count--;
- vm_page_unlock_queues();
- vm_object_unlock(pmap_object);
- }
- }
-#ifdef MACH_PV_PAGETABLES
- pmap_set_page_readwrite((void*) page_dir);
-#endif /* MACH_PV_PAGETABLES */
- kmem_cache_free(&pd_cache, (vm_offset_t) page_dir);
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
+ }
#if PAE
- }
-
-#ifdef MACH_PV_PAGETABLES
+ kmem_cache_free(&pd_cache, (vm_offset_t)pdebase);
+ }
#ifdef __x86_64__
- pmap_set_page_readwrite(p->l4base);
- pmap_set_page_readwrite(p->user_l4base);
- pmap_set_page_readwrite(p->user_pdpbase);
+ kmem_cache_free(&pdpt_cache, (vm_offset_t)pdpbase);
+ }
#endif /* __x86_64__ */
- pmap_set_page_readwrite(p->pdpbase);
-#endif /* MACH_PV_PAGETABLES */
+#endif /* PAE */
+ /* Finally, free the page table tree root and the pmap itself */
+#if PAE
#ifdef __x86_64__
- kmem_cache_free(&pdpt_cache, (vm_offset_t) pmap_ptp(p, VM_MIN_USER_ADDRESS));
-#if lin2l4num(VM_MIN_KERNEL_ADDRESS) != lin2l4num(VM_MAX_USER_ADDRESS)
- // TODO kernel vm and user vm are not in the same l4 entry
-#endif
kmem_cache_free(&l4_cache, (vm_offset_t) p->l4base);
-#ifdef MACH_PV_PAGETABLES
- kmem_free(kernel_map, (vm_offset_t)p->user_l4base, INTEL_PGBYTES);
- kmem_free(kernel_map, (vm_offset_t)p->user_pdpbase, INTEL_PGBYTES);
-#endif /* MACH_PV_PAGETABLES */
#else /* __x86_64__ */
- kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
+ kmem_cache_free(&pdpt_cache, (vm_offset_t) p->pdpbase);
#endif /* __x86_64__ */
-#endif /* PAE */
+#else /* PAE */
+ kmem_cache_free(&pd_cache, (vm_offset_t) p->dirbase);
+#endif /* PAE */
kmem_cache_free(&pmap_cache, (vm_offset_t) p);
}
@@ -1756,7 +1710,7 @@ void pmap_remove(
l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1);
if (l > e)
l = e;
- if (*pde & INTEL_PTE_VALID) {
+ if (pde && (*pde & INTEL_PTE_VALID)) {
spte = (pt_entry_t *)ptetokv(*pde);
spte = &spte[ptenum(s)];
epte = &spte[intel_btop(l-s)];
@@ -2036,86 +1990,24 @@ void pmap_protect(
SPLX(spl);
}
+typedef pt_entry_t* (*pmap_level_getter_t)(const pmap_t pmap, vm_offset_t addr);
/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-void pmap_enter(
- pmap_t pmap,
- vm_offset_t v,
- phys_addr_t pa,
- vm_prot_t prot,
- boolean_t wired)
+* Expand one single level of the page table tree
+*/
+static inline pt_entry_t* pmap_expand_level(pmap_t pmap, vm_offset_t v, int spl,
+ pmap_level_getter_t pmap_level,
+ pmap_level_getter_t pmap_level_upper,
+ int n_per_vm_page,
+ struct kmem_cache *cache)
{
- boolean_t is_physmem;
pt_entry_t *pte;
- pv_entry_t pv_h;
- unsigned long i, pai;
- pv_entry_t pv_e;
- pt_entry_t template;
- int spl;
- phys_addr_t old_pa;
-
- assert(pa != vm_page_fictitious_addr);
- if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
- if (pmap == PMAP_NULL)
- return;
-
-#if !MACH_KDB
- if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
- panic("pmap_enter(%zx, %llx) falls in physical memory area!\n", v, (unsigned long long) pa);
-#endif
-#if !(__i486__ || __i586__ || __i686__)
- if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
- && !wired /* hack for io_wire */ ) {
- /*
- * Because the 386 ignores write protection in kernel mode,
- * we cannot enter a read-only kernel mapping, and must
- * remove an existing mapping if changing it.
- */
- PMAP_READ_LOCK(pmap, spl);
-
- pte = pmap_pte(pmap, v);
- if (pte != PT_ENTRY_NULL && *pte != 0) {
- /*
- * Invalidate the translation buffer,
- * then remove the mapping.
- */
- pmap_remove_range(pmap, v, pte,
- pte + ptes_per_vm_page);
- PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
- }
- PMAP_READ_UNLOCK(pmap, spl);
- return;
- }
-#endif
-
- /*
- * Must allocate a new pvlist entry while we're unlocked;
- * Allocating may cause pageout (which will lock the pmap system).
- * If we determine we need a pvlist entry, we will unlock
- * and allocate one. Then we will retry, throughing away
- * the allocated entry later (if we no longer need it).
- */
- pv_e = PV_ENTRY_NULL;
-Retry:
- PMAP_READ_LOCK(pmap, spl);
/*
* Expand pmap to include this pte. Assume that
* pmap is always expanded to include enough hardware
* pages to map one VM page.
*/
-
- while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
+ while ((pte = pmap_level(pmap, v)) == PT_ENTRY_NULL) {
/*
* Need to allocate a new page-table page.
*/
@@ -2136,7 +2028,9 @@ Retry:
*/
PMAP_READ_UNLOCK(pmap, spl);
- ptp = phystokv(pmap_page_table_page_alloc());
+ while (!(ptp = kmem_cache_alloc(cache)))
+ VM_PAGE_WAIT((void (*)()) 0);
+ memset((void *)ptp, 0, PAGE_SIZE);
/*
* Re-lock the pmap and check that another thread has
@@ -2146,12 +2040,12 @@ Retry:
*/
PMAP_READ_LOCK(pmap, spl);
- if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
+ if (pmap_level(pmap, v) != PT_ENTRY_NULL) {
/*
* Oops...
*/
PMAP_READ_UNLOCK(pmap, spl);
- pmap_page_table_page_dealloc(kvtophys(ptp));
+ kmem_cache_free(cache, ptp);
PMAP_READ_LOCK(pmap, spl);
continue;
}
@@ -2159,8 +2053,8 @@ Retry:
/*
* Enter the new page table page in the page directory.
*/
- i = ptes_per_vm_page;
- pdp = pmap_pde(pmap, v);
+ i = n_per_vm_page;
+ pdp = pmap_level_upper(pmap, v);
do {
#ifdef MACH_PV_PAGETABLES
pmap_set_page_readonly((void *) ptp);
@@ -2185,6 +2079,100 @@ Retry:
*/
continue;
}
+ return pte;
+}
+
+/*
+ * Expand, if required, the PMAP to include the virtual address V.
+ * PMAP needs to be locked, and it will be still locked on return. It
+ * can temporarily unlock the PMAP, during allocation or deallocation
+ * of physical pages.
+ */
+static inline pt_entry_t* pmap_expand(pmap_t pmap, vm_offset_t v, int spl)
+{
+#ifdef PAE
+#ifdef __x86_64__
+ pmap_expand_level(pmap, v, spl, pmap_ptp, pmap_l4base, ptes_per_vm_page, &pdpt_cache);
+#endif /* __x86_64__ */
+ pmap_expand_level(pmap, v, spl, pmap_pde, pmap_ptp, ptes_per_vm_page, &pd_cache);
+#endif /* PAE */
+ return pmap_expand_level(pmap, v, spl, pmap_pte, pmap_pde, ptes_per_vm_page, &pt_cache);
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+void pmap_enter(
+ pmap_t pmap,
+ vm_offset_t v,
+ phys_addr_t pa,
+ vm_prot_t prot,
+ boolean_t wired)
+{
+ boolean_t is_physmem;
+ pt_entry_t *pte;
+ pv_entry_t pv_h;
+ unsigned long i, pai;
+ pv_entry_t pv_e;
+ pt_entry_t template;
+ int spl;
+ phys_addr_t old_pa;
+
+ assert(pa != vm_page_fictitious_addr);
+ if (pmap_debug) printf("pmap(%zx, %llx)\n", v, (unsigned long long) pa);
+ if (pmap == PMAP_NULL)
+ return;
+
+#if !MACH_KDB
+ if (pmap == kernel_pmap && (v < kernel_virtual_start || v >= kernel_virtual_end))
+ panic("pmap_enter(%llx, %llx) falls in physical memory area!\n", v, (unsigned long long) pa);
+#endif
+#if !(__i486__ || __i586__ || __i686__)
+ if (pmap == kernel_pmap && (prot & VM_PROT_WRITE) == 0
+ && !wired /* hack for io_wire */ ) {
+ /*
+ * Because the 386 ignores write protection in kernel mode,
+ * we cannot enter a read-only kernel mapping, and must
+ * remove an existing mapping if changing it.
+ */
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_pte(pmap, v);
+ if (pte != PT_ENTRY_NULL && *pte != 0) {
+ /*
+ * Invalidate the translation buffer,
+ * then remove the mapping.
+ */
+ pmap_remove_range(pmap, v, pte,
+ pte + ptes_per_vm_page);
+ PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE);
+ }
+ PMAP_READ_UNLOCK(pmap, spl);
+ return;
+ }
+#endif
+
+ /*
+ * Must allocate a new pvlist entry while we're unlocked;
+ * Allocating may cause pageout (which will lock the pmap system).
+ * If we determine we need a pvlist entry, we will unlock
+ * and allocate one. Then we will retry, throughing away
+ * the allocated entry later (if we no longer need it).
+ */
+ pv_e = PV_ENTRY_NULL;
+Retry:
+ PMAP_READ_LOCK(pmap, spl);
+
+ pte = pmap_expand(pmap, v, spl);
if (vm_page_ready())
is_physmem = (vm_page_lookup_pa(pa) != NULL);
@@ -2462,10 +2450,7 @@ void pmap_copy(
*/
void pmap_collect(pmap_t p)
{
- int i;
- boolean_t free_all;
- pt_entry_t *page_dir;
- pt_entry_t *pdp, *ptp;
+ pt_entry_t *ptp;
pt_entry_t *eptp;
phys_addr_t pa;
int spl, wired;
@@ -2476,119 +2461,104 @@ void pmap_collect(pmap_t p)
if (p == kernel_pmap)
return;
+ /*
+ * Free the page table tree.
+ */
#if PAE
- for (i = 0; i < lin2pdpnum(VM_MAX_USER_ADDRESS); i++) {
#ifdef __x86_64__
-#ifdef USER32
- /* In this case we know we have one PDP for user space */
- pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
-#else
-#warning "TODO do 64-bit userspace need more that 512G?"
- pdp = (pt_entry_t *) ptetokv(p->l4base[lin2l4num(VM_MIN_USER_ADDRESS)]);
-#endif /* USER32 */
- page_dir = (pt_entry_t *) ptetokv(pdp[i]);
+ for (int l4i = 0; l4i < lin2l4num(VM_MAX_USER_ADDRESS); l4i++) {
+ pt_entry_t pdp = (pt_entry_t) p->l4base[l4i];
+ if (!(pdp & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdpbase = (pt_entry_t*) ptetokv(pdp);
+ for (int l3i = 0; l3i < 512; l3i++) {
#else /* __x86_64__ */
- page_dir = (pt_entry_t *) ptetokv(p->pdpbase[i]);
+ pt_entry_t *pdpbase = p->pdpbase;
+ for (int l3i = 0; l3i < lin2pdpnum(VM_MAX_USER_ADDRESS); l3i++) {
#endif /* __x86_64__ */
- free_all = i < lin2pdpnum(LINEAR_MIN_KERNEL_ADDRESS);
-#else
- i = 0;
- free_all = FALSE;
- page_dir = p->dirbase;
-#endif
-
- /*
- * Garbage collect map.
- */
- PMAP_READ_LOCK(p, spl);
- for (pdp = page_dir;
- (free_all
- || pdp < &page_dir[lin2pdenum(LINEAR_MIN_KERNEL_ADDRESS)])
- && pdp < &page_dir[NPTES];
- pdp += ptes_per_vm_page) {
- if (*pdp & INTEL_PTE_VALID) {
-
- pa = pte_to_pa(*pdp);
- ptp = (pt_entry_t *)phystokv(pa);
- eptp = ptp + NPTES*ptes_per_vm_page;
-
- /*
- * If the pte page has any wired mappings, we cannot
- * free it.
- */
- wired = 0;
- {
- pt_entry_t *ptep;
- for (ptep = ptp; ptep < eptp; ptep++) {
- if (*ptep & INTEL_PTE_WIRED) {
- wired = 1;
- break;
- }
- }
- }
- if (!wired) {
- /*
- * Remove the virtual addresses mapped by this pte page.
- */
- { /*XXX big hack*/
- vm_offset_t va = pdenum2lin(pdp - page_dir
- + i * NPTES);
- if (p == kernel_pmap)
- va = lintokv(va);
- pmap_remove_range(p,
- va,
- ptp,
- eptp);
- }
-
- /*
- * Invalidate the page directory pointer.
- */
- {
- int i = ptes_per_vm_page;
- pt_entry_t *pdep = pdp;
- do {
+ pt_entry_t pde = (pt_entry_t ) pdpbase[l3i];
+ if (!(pde & INTEL_PTE_VALID))
+ continue;
+ pt_entry_t *pdebase = (pt_entry_t*) ptetokv(pde);
+ for (int l2i = 0; l2i < 512; l2i++) {
+#else /* PAE */
+ pt_entry_t *pdebase = p->dirbase;
+ for (int l2i = 0; l2i < lin2pdenum(VM_MAX_USER_ADDRESS); l2i++) {
+#endif /* PAE */
+ pt_entry_t pte = (pt_entry_t) pdebase[l2i];
+ if (!(pte & INTEL_PTE_VALID))
+ continue;
+
+ pa = pte_to_pa(pte);
+ ptp = (pt_entry_t *)phystokv(pa);
+ eptp = ptp + NPTES*ptes_per_vm_page;
+
+ /*
+ * If the pte page has any wired mappings, we cannot
+ * free it.
+ */
+ wired = 0;
+ {
+ pt_entry_t *ptep;
+ for (ptep = ptp; ptep < eptp; ptep++) {
+ if (*ptep & INTEL_PTE_WIRED) {
+ wired = 1;
+ break;
+ }
+ }
+ }
+ if (!wired) {
+ /*
+ * Remove the virtual addresses mapped by this pte page.
+ */
+ { /*XXX big hack*/
+ vm_offset_t va = pagenum2lin(l4i, l3i, l2i, 0);
+ if (p == kernel_pmap)
+ va = lintokv(va);
+ pmap_remove_range(p, va, ptp, eptp);
+ }
+
+ /*
+ * Invalidate the page directory pointer.
+ */
+ {
+ int i = ptes_per_vm_page;
+ pt_entry_t *pdep = &pdebase[l2i];
+ do {
#ifdef MACH_PV_PAGETABLES
- unsigned long pte = *pdep;
- void *ptable = (void*) ptetokv(pte);
- if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
- panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
- if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
- panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
- pmap_set_page_readwrite(ptable);
+ unsigned long pte = *pdep;
+ void *ptable = (void*) ptetokv(pte);
+ if (!(hyp_mmu_update_pte(pa_to_ma(kvtophys((vm_offset_t)pdep++)), 0)))
+ panic("%s:%d could not clear pde %p\n",__FILE__,__LINE__,pdep-1);
+ if (!hyp_mmuext_op_mfn (MMUEXT_UNPIN_TABLE, kv_to_mfn(ptable)))
+ panic("couldn't unpin page %p(%lx)\n", ptable, (vm_offset_t) pa_to_ma(kvtophys((vm_offset_t)ptable)));
+ pmap_set_page_readwrite(ptable);
#else /* MACH_PV_PAGETABLES */
- *pdep++ = 0;
+ *pdep++ = 0;
#endif /* MACH_PV_PAGETABLES */
- } while (--i > 0);
- }
+ } while (--i > 0);
+ }
- PMAP_READ_UNLOCK(p, spl);
+ PMAP_READ_UNLOCK(p, spl);
- /*
- * And free the pte page itself.
- */
- {
- vm_page_t m;
-
- vm_object_lock(pmap_object);
- assert(pa == (vm_offset_t) pa);
- m = vm_page_lookup(pmap_object, pa);
- if (m == VM_PAGE_NULL)
- panic("pmap_collect: pte page not in object");
- vm_page_lock_queues();
- vm_page_free(m);
- inuse_ptepages_count--;
- vm_page_unlock_queues();
- vm_object_unlock(pmap_object);
- }
+ /*
+ * And free the pte page itself.
+ */
+ kmem_cache_free(&pt_cache, (vm_offset_t)ptetokv(pte));
- PMAP_READ_LOCK(p, spl);
- }
- }
- }
+ PMAP_READ_LOCK(p, spl);
+
+ }
+ }
#if PAE
+ // TODO check l2?
+ }
+#ifdef __x86_64__
+ // TODO check l3?
}
-#endif
+#endif /* __x86_64__ */
+#endif /* PAE */
+
PMAP_UPDATE_TLBS(p, VM_MIN_USER_ADDRESS, VM_MAX_USER_ADDRESS);
PMAP_READ_UNLOCK(p, spl);
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 4c1b9bd5..5fc7fb25 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -75,7 +75,6 @@ typedef phys_addr_t pt_entry_t;
#define L4SHIFT 39 /* L4 shift */
#define L4MASK 0x1ff /* mask for L4 index */
#define PDPNUM_KERNEL (((VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) >> PDPSHIFT) + 1)
-#define PDPNUM_USER (((VM_MAX_USER_ADDRESS - VM_MIN_USER_ADDRESS) >> PDPSHIFT) + 1)
#define PDPMASK 0x1ff /* mask for page directory pointer index */
#else /* __x86_64__ */
#define PDPNUM 4 /* number of page directory pointers */
@@ -130,6 +129,26 @@ typedef phys_addr_t pt_entry_t;
*/
#define pdenum2lin(a) ((vm_offset_t)(a) << PDESHIFT)
+#if PAE
+#ifdef __x86_64__
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l4num) << L4SHIFT) + \
+ ((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#else /* __x86_64__ */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l3num) << PDPSHIFT) + \
+ ((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+#else /* PAE */
+#define pagenum2lin(l4num, l3num, l2num, l1num) \
+ (((vm_offset_t)(l2num) << PDESHIFT) + \
+ ((vm_offset_t)(l1num) << PTESHIFT))
+#endif
+
+
/*
* Convert linear offset to page table index
*/