Merge branch 'master' into master-user_level_drivers

author: Samuel Thibault <samuel.thibault@ens-lyon.org> 2019-11-11 23:54:14 +0100
committer: Samuel Thibault <samuel.thibault@ens-lyon.org> 2019-11-11 23:54:14 +0100
commit: ca9c75b234fea9a4fb0ed9f7d6a3f11df4ca11e0 (patch)
tree: ccfdaf0e68cfa94c4479208f2850326dc1790437 /kern
parent: b0d9e31f9a1ee8fa0283f082f01467666d92a1f6 (diff)
parent: 74a84a56f24c17e98bc06eaa8b49195fbd5c0541 (diff)
17 files changed, 497 insertions, 205 deletions
diff --git a/kern/ast.c b/kern/ast.c
index 2772ed3e..d2289344 100644
--- a/kern/ast.c
+++ b/kern/ast.c
@@ -227,7 +227,7 @@ ast_check(void)
 		break;
 
 	    default:
-	        panic("ast_check: Bad processor state (cpu %d processor %08x) state: %d",
+	        panic("ast_check: Bad processor state (cpu %d processor %p) state: %d",
 			mycpu, myprocessor, myprocessor->state);
 	}
 
diff --git a/kern/ast.h b/kern/ast.h
index 7d472be9..8895ffbc 100644
--- a/kern/ast.h
+++ b/kern/ast.h
@@ -41,6 +41,7 @@
  */
 
 #include "cpu_number.h"
+#include <kern/kern_types.h>
 #include <kern/macros.h>
 #include <machine/ast.h>
 
@@ -131,4 +132,8 @@ extern void ast_init (void);
 
 extern void ast_check (void);
 
+#if	NCPUS > 1
+extern void cause_ast_check(const processor_t processor);
+#endif
+
 #endif	/* _KERN_AST_H_ */
diff --git a/kern/atomic.h b/kern/atomic.h
new file mode 100644
index 00000000..00da1645
--- /dev/null
+++ b/kern/atomic.h
@@ -0,0 +1,54 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+   Contributed by Agustina Arzille <avarzille@riseup.net>, 2017.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either
+   version 2 of the license, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public
+   License along with this program; if not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _KERN_ATOMIC_H_
+#define _KERN_ATOMIC_H_   1
+
+/* Atomically compare *PTR with EXP and set it to NVAL if they're equal.
+ * Evaluates to a boolean, indicating whether the comparison was successful.*/
+#define __atomic_cas_helper(ptr, exp, nval, mo)   \
+  ({   \
+     typeof(exp) __e = (exp);   \
+     __atomic_compare_exchange_n ((ptr), &__e, (nval), 0,   \
+       __ATOMIC_##mo, __ATOMIC_RELAXED);   \
+   })
+
+#define atomic_cas_acq(ptr, exp, nval)   \
+  __atomic_cas_helper (ptr, exp, nval, ACQUIRE)
+
+#define atomic_cas_rel(ptr, exp, nval)   \
+  __atomic_cas_helper (ptr, exp, nval, RELEASE)
+
+#define atomic_cas_seq(ptr, exp, nval)   \
+  __atomic_cas_helper (ptr, exp, nval, SEQ_CST)
+
+/* Atomically exchange the value of *PTR with VAL, evaluating to
+ * its previous value. */
+#define __atomic_swap_helper(ptr, val, mo)   \
+  __atomic_exchange_n ((ptr), (val), __ATOMIC_##mo)
+
+#define atomic_swap_acq(ptr, val)   \
+  __atomic_swap_helper (ptr, val, ACQUIRE)
+
+#define atomic_swap_rel(ptr, val)   \
+  __atomic_swap_helper (ptr, val, RELEASE)
+
+#define atomic_swap_seq(ptr, val)   \
+  __atomic_swap_helper (ptr, val, SEQ_CST)
+
+#endif
diff --git a/kern/bootstrap.c b/kern/bootstrap.c
index 7398ea44..8b88d17d 100644
--- a/kern/bootstrap.c
+++ b/kern/bootstrap.c
@@ -180,6 +180,12 @@ void bootstrap_create(void)
       if (losers)
 	panic ("cannot set boot-script variable device-port: %s",
 	       boot_script_error_string (losers));
+      losers = boot_script_set_variable
+	("kernel-task", VAL_PORT,
+	 (long) kernel_task->itk_self);
+      if (losers)
+	panic ("cannot set boot-script variable kernel-task: %s",
+	       boot_script_error_string (losers));
 
       losers = boot_script_set_variable ("kernel-command-line", VAL_STR,
 					 (long) kernel_cmdline);
@@ -487,7 +493,7 @@ read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size,
 
 static void copy_bootstrap(void *e, exec_info_t *boot_exec_info)
 {
-	//register vm_map_t	user_map = current_task()->map;
+	/* vm_map_t	user_map = current_task()->map; */
 	int err;
 
 	if ((err = exec_load(boot_read, read_exec, e, boot_exec_info)))
@@ -813,7 +819,7 @@ boot_script_free (void *ptr, unsigned int size)
 int
 boot_script_task_create (struct cmd *cmd)
 {
-  kern_return_t rc = task_create(TASK_NULL, FALSE, &cmd->task);
+  kern_return_t rc = task_create_kernel(TASK_NULL, FALSE, &cmd->task);
   if (rc)
     {
       printf("boot_script_task_create failed with %x\n", rc);
diff --git a/kern/cpu_number.h b/kern/cpu_number.h
index 650f4042..5d3e4bd1 100644
--- a/kern/cpu_number.h
+++ b/kern/cpu_number.h
@@ -37,7 +37,8 @@ int	master_cpu;	/* 'master' processor - keeps time */
 	/* cpu number is always 0 on a single processor system */
 #define	cpu_number()	(0)
 
+#endif	/* NCPUS == 1 */
+
 #define CPU_L1_SIZE (1 << CPU_L1_SHIFT)
 
-#endif	/* NCPUS == 1 */
 #endif /* _KERN_CPU_NUMBER_H_ */
diff --git a/kern/gsync.c b/kern/gsync.c
index e70e1199..e73a6cf0 100644
--- a/kern/gsync.c
+++ b/kern/gsync.c
@@ -17,36 +17,61 @@
 */
 
 #include <kern/gsync.h>
+#include <kern/kmutex.h>
 #include <kern/sched_prim.h>
 #include <kern/thread.h>
-#include <kern/lock.h>
 #include <kern/list.h>
 #include <vm/vm_map.h>
+#include <vm/vm_kern.h>
 
 /* An entry in the global hash table. */
 struct gsync_hbucket
 {
   struct list entries;
-  decl_simple_lock_data (, lock)
+  struct kmutex lock;
 };
 
 /* A key used to uniquely identify an address that a thread is
  * waiting on. Its members' values depend on whether said
- * address is shared or task-local. */
-struct gsync_key
+ * address is shared or task-local. Note that different types of keys
+ * should never compare equal, since a task map should never have
+ * the same address as a VM object. */
+union gsync_key
 {
-  unsigned long u;
-  unsigned long v;
+  struct
+    {
+      vm_map_t map;
+      vm_offset_t addr;
+    } local;
+
+  struct
+    {
+      vm_object_t obj;
+      vm_offset_t off;
+    } shared;
+
+  struct
+    {
+      unsigned long u;
+      unsigned long v;
+    } any;
 };
 
 /* A thread that is blocked on an address with 'gsync_wait'. */
 struct gsync_waiter
 {
   struct list link;
-  struct gsync_key key;
+  union gsync_key key;
   thread_t waiter;
 };
 
+/* Needed data for temporary mappings. */
+struct vm_args
+{
+  vm_object_t obj;
+  vm_offset_t off;
+};
+
 #define GSYNC_NBUCKETS   512
 static struct gsync_hbucket gsync_buckets[GSYNC_NBUCKETS];
 
@@ -56,97 +81,93 @@ void gsync_setup (void)
   for (i = 0; i < GSYNC_NBUCKETS; ++i)
     {
       list_init (&gsync_buckets[i].entries);
-      simple_lock_init (&gsync_buckets[i].lock);
+      kmutex_init (&gsync_buckets[i].lock);
     }
 }
 
 /* Convenience comparison functions for gsync_key's. */
 
 static inline int
-gsync_key_eq (const struct gsync_key *lp,
-  const struct gsync_key *rp)
+gsync_key_eq (const union gsync_key *lp,
+  const union gsync_key *rp)
 {
-  return (lp->u == rp->u && lp->v == rp->v);
+  return (lp->any.u == rp->any.u && lp->any.v == rp->any.v);
 }
 
 static inline int
-gsync_key_lt (const struct gsync_key *lp,
-  const struct gsync_key *rp)
+gsync_key_lt (const union gsync_key *lp,
+  const union gsync_key *rp)
 {
-  return (lp->u < rp->u || (lp->u == rp->u && lp->v < rp->v));
+  return (lp->any.u < rp->any.u ||
+    (lp->any.u == rp->any.u && lp->any.v < rp->any.v));
 }
 
 #define MIX2_LL(x, y)   ((((x) << 5) | ((x) >> 27)) ^ (y))
 
 static inline unsigned int
-gsync_key_hash (const struct gsync_key *keyp)
+gsync_key_hash (const union gsync_key *keyp)
 {
   unsigned int ret = sizeof (void *);
 #ifndef __LP64__
-  ret = MIX2_LL (ret, keyp->u);
-  ret = MIX2_LL (ret, keyp->v);
+  ret = MIX2_LL (ret, keyp->any.u);
+  ret = MIX2_LL (ret, keyp->any.v);
 #else
-  ret = MIX2_LL (ret, keyp->u & ~0U);
-  ret = MIX2_LL (ret, keyp->u >> 32);
-  ret = MIX2_LL (ret, keyp->v & ~0U);
-  ret = MIX2_LL (ret, keyp->v >> 32);
+  ret = MIX2_LL (ret, keyp->any.u & ~0U);
+  ret = MIX2_LL (ret, keyp->any.u >> 32);
+  ret = MIX2_LL (ret, keyp->any.v & ~0U);
+  ret = MIX2_LL (ret, keyp->any.v >> 32);
 #endif
   return (ret);
 }
 
-/* Test if the passed VM Map can access the address ADDR. The
- * parameter FLAGS is used to specify the width and protection
- * of the address. */
+/* Perform a VM lookup for the address in the map. The FLAGS
+ * parameter is used to specify some attributes for the address,
+ * such as protection. Place the corresponding VM object/offset pair
+ * in VAP. Returns 0 if successful, -1 otherwise. */
 static int
-valid_access_p (vm_map_t map, vm_offset_t addr, int flags)
+probe_address (vm_map_t map, vm_offset_t addr,
+  int flags, struct vm_args *vap)
 {
   vm_prot_t prot = VM_PROT_READ |
     ((flags & GSYNC_MUTATE) ? VM_PROT_WRITE : 0);
-  vm_offset_t size = sizeof (unsigned int) *
-    ((flags & GSYNC_QUAD) ? 2 : 1);
+  vm_map_version_t ver;
+  vm_prot_t rprot;
+  boolean_t wired_p;
+
+  if (vm_map_lookup (&map, addr, prot, &ver,
+      &vap->obj, &vap->off, &rprot, &wired_p) != KERN_SUCCESS)
+    return (-1);
+  else if ((rprot & prot) != prot)
+    {
+      vm_object_unlock (vap->obj);
+      return (-1);
+    }
 
-  vm_map_entry_t entry;
-  return (vm_map_lookup_entry (map, addr, &entry) &&
-    entry->vme_end >= addr + size &&
-    (prot & entry->protection) == prot);
+  return (0);
 }
 
-/* Given a task and an address, initialize the key at *KEYP and
- * return the corresponding bucket in the global hash table. */
+/* Initialize the key with its needed members, depending on whether the
+ * address is local or shared. Also stores the VM object and offset inside
+ * the argument VAP for future use. */
 static int
-gsync_fill_key (task_t task, vm_offset_t addr,
-  int flags, struct gsync_key *keyp)
+gsync_prepare_key (task_t task, vm_offset_t addr, int flags,
+  union gsync_key *keyp, struct vm_args *vap)
 {
-  if (flags & GSYNC_SHARED)
+  if (probe_address (task->map, addr, flags, vap) < 0)
+    return (-1);
+  else if (flags & GSYNC_SHARED)
     {
       /* For a shared address, we need the VM object
        * and offset as the keys. */
-      vm_map_t map = task->map;
-      vm_prot_t prot = VM_PROT_READ |
-        ((flags & GSYNC_MUTATE) ? VM_PROT_WRITE : 0);
-      vm_map_version_t ver;
-      vm_prot_t rpr;
-      vm_object_t obj;
-      vm_offset_t off;
-      boolean_t wired_p;
-
-      if (unlikely (vm_map_lookup (&map, addr, prot, &ver,
-          &obj, &off, &rpr, &wired_p) != KERN_SUCCESS))
-        return (-1);
-
-      /* The VM object is returned locked. However, we check the
-       * address' accessibility later, so we can release it. */
-      vm_object_unlock (obj);
-
-      keyp->u = (unsigned long)obj;
-      keyp->v = (unsigned long)off;
+      keyp->shared.obj = vap->obj;
+      keyp->shared.off = vap->off;
     }
   else
     {
       /* Task-local address. The keys are the task's map and
        * the virtual address itself. */
-      keyp->u = (unsigned long)task->map;
-      keyp->v = (unsigned long)addr;
+      keyp->local.map = task->map;
+      keyp->local.addr = addr;
     }
 
   return ((int)(gsync_key_hash (keyp) % GSYNC_NBUCKETS));
@@ -160,7 +181,7 @@ node_to_waiter (struct list *nodep)
 
 static inline struct list*
 gsync_find_key (const struct list *entries,
-  const struct gsync_key *keyp, int *exactp)
+  const union gsync_key *keyp, int *exactp)
 {
   /* Look for a key that matches. We take advantage of the fact
    * that the entries are sorted to break out of the loop as
@@ -182,57 +203,105 @@ gsync_find_key (const struct list *entries,
   return (runp);
 }
 
-kern_return_t gsync_wait (task_t task, vm_offset_t addr,
-  unsigned int lo, unsigned int hi, natural_t msec, int flags)
+/* Create a temporary mapping in the kernel.*/
+static inline vm_offset_t
+temp_mapping (struct vm_args *vap, vm_offset_t addr, vm_prot_t prot)
 {
-  if (unlikely (task != current_task()))
-    /* Not implemented yet.  */
-    return (KERN_FAILURE);
+  vm_offset_t paddr = VM_MIN_KERNEL_ADDRESS;
+  /* Adjust the offset for addresses that aren't page-aligned. */
+  vm_offset_t off = vap->off - (addr - trunc_page (addr));
 
-  struct gsync_waiter w;
-  int bucket = gsync_fill_key (task, addr, flags, &w.key);
+  if (vm_map_enter (kernel_map, &paddr, PAGE_SIZE,
+      0, TRUE, vap->obj, off, FALSE, prot, VM_PROT_ALL,
+      VM_INHERIT_DEFAULT) != KERN_SUCCESS)
+    paddr = 0;
+
+  return (paddr);
+}
 
-  if (unlikely (bucket < 0))
+kern_return_t gsync_wait (task_t task, vm_offset_t addr,
+  unsigned int lo, unsigned int hi, natural_t msec, int flags)
+{
+  if (task == 0)
+    return (KERN_INVALID_TASK);
+  else if (addr % sizeof (int) != 0)
     return (KERN_INVALID_ADDRESS);
 
-  /* Test that the address is actually valid for the
-   * given task. Do so with the read-lock held in order
-   * to prevent memory deallocations. */
   vm_map_lock_read (task->map);
 
-  struct gsync_hbucket *hbp = gsync_buckets + bucket;
-  simple_lock (&hbp->lock);
+  struct gsync_waiter w;
+  struct vm_args va;
+  boolean_t remote = task != current_task ();
+  int bucket = gsync_prepare_key (task, addr, flags, &w.key, &va);
 
-  if (unlikely (!valid_access_p (task->map, addr, flags)))
+  if (bucket < 0)
     {
-      simple_unlock (&hbp->lock);
       vm_map_unlock_read (task->map);
       return (KERN_INVALID_ADDRESS);
     }
+  else if (remote)
+    /* The VM object is returned locked. However, we are about to acquire
+     * a sleeping lock for a bucket, so we must not hold any simple
+     * locks. To prevent this object from going away, we add a reference
+     * to it when requested. */
+    vm_object_reference_locked (va.obj);
+
+  /* We no longer need the lock on the VM object. */
+  vm_object_unlock (va.obj);
+
+  struct gsync_hbucket *hbp = gsync_buckets + bucket;
+  kmutex_lock (&hbp->lock, FALSE);
 
   /* Before doing any work, check that the expected value(s)
    * match the contents of the address. Otherwise, the waiting
    * thread could potentially miss a wakeup. */
-  if (((unsigned int *)addr)[0] != lo ||
-      ((flags & GSYNC_QUAD) &&
-        ((unsigned int *)addr)[1] != hi))
+
+  boolean_t equal;
+  if (! remote)
+    equal = ((unsigned int *)addr)[0] == lo &&
+      ((flags & GSYNC_QUAD) == 0 ||
+       ((unsigned int *)addr)[1] == hi);
+  else
     {
-      simple_unlock (&hbp->lock);
-      vm_map_unlock_read (task->map);
-      return (KERN_INVALID_ARGUMENT);
+      vm_offset_t paddr = temp_mapping (&va, addr, VM_PROT_READ);
+      if (unlikely (paddr == 0))
+        {
+          kmutex_unlock (&hbp->lock);
+          vm_map_unlock_read (task->map);
+          /* Make sure to remove the reference we added. */
+          vm_object_deallocate (va.obj);
+          return (KERN_MEMORY_FAILURE);
+        }
+
+      vm_offset_t off = addr & (PAGE_SIZE - 1);
+      paddr += off;
+
+      equal = ((unsigned int *)paddr)[0] == lo &&
+        ((flags & GSYNC_QUAD) == 0 ||
+         ((unsigned int *)paddr)[1] == hi);
+
+      paddr -= off;
+
+      /* Note that the call to 'vm_map_remove' will unreference
+       * the VM object, so we don't have to do it ourselves. */
+      vm_map_remove (kernel_map, paddr, paddr + PAGE_SIZE);
     }
 
+  /* Done with the task's map. */
   vm_map_unlock_read (task->map);
 
+  if (! equal)
+    {
+      kmutex_unlock (&hbp->lock);
+      return (KERN_INVALID_ARGUMENT);
+    }
+
   /* Look for the first entry in the hash bucket that
    * compares strictly greater than this waiter. */
   struct list *runp;
   list_for_each (&hbp->entries, runp)
-    {
-      struct gsync_waiter *p = node_to_waiter (runp);
-      if (gsync_key_lt (&w.key, &p->key))
-        break;
-    }
+    if (gsync_key_lt (&w.key, &node_to_waiter(runp)->key))
+      break;
 
   /* Finally, add ourselves to the list and go to sleep. */
   list_add (runp->prev, runp, &w.link);
@@ -243,24 +312,23 @@ kern_return_t gsync_wait (task_t task, vm_offset_t addr,
   else
     thread_will_wait (w.waiter);
 
-  thread_sleep (0, (simple_lock_t)&hbp->lock, TRUE);
+  kmutex_unlock (&hbp->lock);
+  thread_block (thread_no_continuation);
 
   /* We're back. */
-  kern_return_t ret = current_thread()->wait_result;
-  if (ret != THREAD_AWAKENED)
+  kern_return_t ret = KERN_SUCCESS;
+  if (current_thread()->wait_result != THREAD_AWAKENED)
     {
       /* We were interrupted or timed out. */
-      simple_lock (&hbp->lock);
-      if (w.link.next != 0)
+      kmutex_lock (&hbp->lock, FALSE);
+      if (!list_node_unlinked (&w.link))
         list_remove (&w.link);
-      simple_unlock (&hbp->lock);
+      kmutex_unlock (&hbp->lock);
 
       /* Map the error code. */
-      ret = ret == THREAD_INTERRUPTED ?
+      ret = current_thread()->wait_result == THREAD_INTERRUPTED ?
         KERN_INTERRUPTED : KERN_TIMEDOUT;
     }
-  else
-    ret = KERN_SUCCESS;
 
   return (ret);
 }
@@ -281,34 +349,60 @@ dequeue_waiter (struct list *nodep)
 kern_return_t gsync_wake (task_t task,
   vm_offset_t addr, unsigned int val, int flags)
 {
-  if (unlikely (task != current_task()))
-    /* Not implemented yet.  */
-    return (KERN_FAILURE);
-
-  struct gsync_key key;
-  int bucket = gsync_fill_key (task, addr, flags, &key);
-
-  if (unlikely (bucket < 0))
+  if (task == 0)
+    return (KERN_INVALID_TASK);
+  else if (addr % sizeof (int) != 0)
     return (KERN_INVALID_ADDRESS);
 
-  kern_return_t ret = KERN_INVALID_ARGUMENT;
-
   vm_map_lock_read (task->map);
-  struct gsync_hbucket *hbp = gsync_buckets + bucket;
-  simple_lock (&hbp->lock);
 
-  if (unlikely (!valid_access_p (task->map, addr, flags)))
+  union gsync_key key;
+  struct vm_args va;
+  int bucket = gsync_prepare_key (task, addr, flags, &key, &va);
+
+  if (bucket < 0)
     {
-      simple_unlock (&hbp->lock);
       vm_map_unlock_read (task->map);
       return (KERN_INVALID_ADDRESS);
     }
+  else if (current_task () != task && (flags & GSYNC_MUTATE) != 0)
+    /* See above on why we do this. */
+    vm_object_reference_locked (va.obj);
+
+  /* Done with the VM object lock. */
+  vm_object_unlock (va.obj);
+
+  kern_return_t ret = KERN_INVALID_ARGUMENT;
+  struct gsync_hbucket *hbp = gsync_buckets + bucket;
+
+  kmutex_lock (&hbp->lock, FALSE);
 
   if (flags & GSYNC_MUTATE)
-    /* Set the contents of the address to the specified value,
-     * even if we don't end up waking any threads. Note that
-     * the buckets' simple locks give us atomicity. */
-    *(unsigned int *)addr = val;
+    {
+      /* Set the contents of the address to the specified value,
+       * even if we don't end up waking any threads. Note that
+       * the buckets' simple locks give us atomicity. */
+
+      if (task != current_task ())
+        {
+          vm_offset_t paddr = temp_mapping (&va, addr,
+            VM_PROT_READ | VM_PROT_WRITE);
+
+          if (paddr == 0)
+            {
+              kmutex_unlock (&hbp->lock);
+              vm_map_unlock_read (task->map);
+              vm_object_deallocate (va.obj);
+              return (KERN_MEMORY_FAILURE);
+            }
+
+          addr = paddr + (addr & (PAGE_SIZE - 1));
+        }
+
+      *(unsigned int *)addr = val;
+      if (task != current_task ())
+        vm_map_remove (kernel_map, addr, addr + sizeof (int));
+    }
 
   vm_map_unlock_read (task->map);
 
@@ -325,37 +419,35 @@ kern_return_t gsync_wake (task_t task,
       ret = KERN_SUCCESS;
     }
 
-  simple_unlock (&hbp->lock);
+  kmutex_unlock (&hbp->lock);
   return (ret);
 }
 
 kern_return_t gsync_requeue (task_t task, vm_offset_t src,
   vm_offset_t dst, boolean_t wake_one, int flags)
 {
-  if (unlikely (task != current_task()))
-    /* Not implemented yet.  */
-    return (KERN_FAILURE);
+  if (task == 0)
+    return (KERN_INVALID_TASK);
+  else if (src % sizeof (int) != 0 || dst % sizeof (int) != 0)
+    return (KERN_INVALID_ADDRESS);
 
-  struct gsync_key src_k, dst_k;
-  int src_bkt = gsync_fill_key (task, src, flags, &src_k);
-  int dst_bkt = gsync_fill_key (task, dst, flags, &dst_k);
+  union gsync_key src_k, dst_k;
+  struct vm_args va;
 
-  if ((src_bkt | dst_bkt) < 0)
+  int src_bkt = gsync_prepare_key (task, src, flags, &src_k, &va);
+  if (src_bkt < 0)
     return (KERN_INVALID_ADDRESS);
 
-  vm_map_lock_read (task->map);
+  /* Unlock the VM object before the second lookup. */
+  vm_object_unlock (va.obj);
 
-  /* We don't actually dereference or modify the contents
-   * of the addresses, but we still check that they can
-   * be accessed by the task. */
-  if (unlikely (!valid_access_p (task->map, src, flags) ||
-      !valid_access_p (task->map, dst, flags)))
-    {
-      vm_map_unlock_read (task->map);
-      return (KERN_INVALID_ADDRESS);
-    }
+  int dst_bkt = gsync_prepare_key (task, dst, flags, &dst_k, &va);
+  if (dst_bkt < 0)
+    return (KERN_INVALID_ADDRESS);
 
-  vm_map_unlock_read (task->map);
+  /* We never create any temporary mappings in 'requeue', so we
+   * can unlock the VM object right now. */
+  vm_object_unlock (va.obj);
 
   /* If we're asked to unconditionally wake up a waiter, then
    * we need to remove a maximum of two threads from the queue. */
@@ -365,23 +457,23 @@ kern_return_t gsync_requeue (task_t task, vm_offset_t src,
 
   /* Acquire the locks in order, to prevent any potential deadlock. */
   if (bp1 == bp2)
-    simple_lock (&bp1->lock);
+    kmutex_lock (&bp1->lock, FALSE);
   else if ((unsigned long)bp1 < (unsigned long)bp2)
     {
-      simple_lock (&bp1->lock);
-      simple_lock (&bp2->lock);
+      kmutex_lock (&bp1->lock, FALSE);
+      kmutex_lock (&bp2->lock, FALSE);
     }
   else
     {
-      simple_lock (&bp2->lock);
-      simple_lock (&bp1->lock);
+      kmutex_lock (&bp2->lock, FALSE);
+      kmutex_lock (&bp1->lock, FALSE);
     }
 
   kern_return_t ret = KERN_SUCCESS;
   int exact;
   struct list *inp = gsync_find_key (&bp1->entries, &src_k, &exact);
 
-  if (!exact)
+  if (! exact)
     /* There are no waiters in the source queue. */
     ret = KERN_INVALID_ARGUMENT;
   else
@@ -416,9 +508,9 @@ kern_return_t gsync_requeue (task_t task, vm_offset_t src,
     }
 
   /* Release the locks and we're done.*/
-  simple_unlock (&bp1->lock);
+  kmutex_unlock (&bp1->lock);
   if (bp1 != bp2)
-    simple_unlock (&bp2->lock);
+    kmutex_unlock (&bp2->lock);
 
   return (ret);
 }
diff --git a/kern/host.c b/kern/host.c
index 57280c49..3271b0cd 100644
--- a/kern/host.c
+++ b/kern/host.c
@@ -154,7 +154,7 @@ kern_return_t	host_info(
 	    {
 		host_sched_info_t	sched_info;
 		extern int	min_quantum;
-					/* minimum quantum, in microseconds */
+					/* minimum quantum, in ticks */
 
 		/*
 		 *	Return scheduler information.
@@ -165,8 +165,9 @@ kern_return_t	host_info(
 		sched_info = (host_sched_info_t) info;
 
 		sched_info->min_timeout = tick / 1000;
-		sched_info->min_quantum = min_quantum / 1000;
 				/* convert microseconds to milliseconds */
+		sched_info->min_quantum = min_quantum * tick / 1000;
+				/* convert ticks to milliseconds */
 
 		*count = HOST_SCHED_INFO_COUNT;
 		return KERN_SUCCESS;
diff --git a/kern/kmutex.c b/kern/kmutex.c
new file mode 100644
index 00000000..5926d1d9
--- /dev/null
+++ b/kern/kmutex.c
@@ -0,0 +1,76 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+   Contributed by Agustina Arzille <avarzille@riseup.net>, 2017.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either
+   version 2 of the license, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public
+   License along with this program; if not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#include <kern/kmutex.h>
+#include <kern/atomic.h>
+#include <kern/sched_prim.h>
+#include <kern/thread.h>
+
+void kmutex_init (struct kmutex *mtxp)
+{
+  mtxp->state = KMUTEX_AVAIL;
+  simple_lock_init (&mtxp->lock);
+}
+
+kern_return_t kmutex_lock (struct kmutex *mtxp, boolean_t interruptible)
+{
+  check_simple_locks ();
+
+  if (atomic_cas_acq (&mtxp->state, KMUTEX_AVAIL, KMUTEX_LOCKED))
+    /* Unowned mutex - We're done. */
+    return (KERN_SUCCESS);
+
+  /* The mutex is locked. We may have to sleep. */
+  simple_lock (&mtxp->lock);
+  if (atomic_swap_acq (&mtxp->state, KMUTEX_CONTENDED) == KMUTEX_AVAIL)
+    {
+      /* The mutex was released in-between. */
+      simple_unlock (&mtxp->lock);
+      return (KERN_SUCCESS);
+    }
+
+  /* Sleep and check the result value of the waiting, in order to
+   * inform our caller if we were interrupted or not. Note that
+   * we don't need to set again the mutex state. The owner will
+   * handle that in every case. */
+  thread_sleep ((event_t)mtxp, (simple_lock_t)&mtxp->lock, interruptible);
+  return (current_thread()->wait_result == THREAD_AWAKENED ?
+    KERN_SUCCESS : KERN_INTERRUPTED);
+}
+
+kern_return_t kmutex_trylock (struct kmutex *mtxp)
+{
+  return (atomic_cas_acq (&mtxp->state, KMUTEX_AVAIL, KMUTEX_LOCKED) ?
+    KERN_SUCCESS : KERN_FAILURE);
+}
+
+void kmutex_unlock (struct kmutex *mtxp)
+{
+  if (atomic_cas_rel (&mtxp->state, KMUTEX_LOCKED, KMUTEX_AVAIL))
+    /* No waiters - We're done. */
+    return;
+
+  simple_lock (&mtxp->lock);
+
+  if (!thread_wakeup_one ((event_t)mtxp))
+    /* Any threads that were waiting on this mutex were
+     * interrupted and left - Reset the mutex state. */
+    mtxp->state = KMUTEX_AVAIL;
+
+  simple_unlock (&mtxp->lock);
+}
diff --git a/kern/kmutex.h b/kern/kmutex.h
new file mode 100644
index 00000000..29815156
--- /dev/null
+++ b/kern/kmutex.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+   Contributed by Agustina Arzille <avarzille@riseup.net>, 2017.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either
+   version 2 of the license, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public
+   License along with this program; if not, see
+   <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef _KERN_KMUTEX_H_
+#define _KERN_KMUTEX_H_   1
+
+#include <kern/lock.h>
+#include <mach/kern_return.h>
+
+struct kmutex
+{
+  unsigned int state;
+  decl_simple_lock_data (, lock)
+};
+
+/* Possible values for the mutex state. */
+#define KMUTEX_AVAIL       0
+#define KMUTEX_LOCKED      1
+#define KMUTEX_CONTENDED   2
+
+/* Initialize mutex in *MTXP. */
+extern void kmutex_init (struct kmutex *mtxp);
+
+/* Acquire lock MTXP. If INTERRUPTIBLE is true, the sleep may be
+ * prematurely terminated, in which case the function returns
+ * KERN_INTERRUPTED. Otherwise, KERN_SUCCESS is returned. */
+extern kern_return_t kmutex_lock (struct kmutex *mtxp,
+  boolean_t interruptible);
+
+/* Try to acquire the lock MTXP without sleeping.
+ * Returns KERN_SUCCESS if successful, KERN_FAILURE otherwise. */
+extern kern_return_t kmutex_trylock (struct kmutex *mtxp);
+
+/* Unlock the mutex MTXP. */
+extern void kmutex_unlock (struct kmutex *mtxp);
+
+#endif
diff --git a/kern/machine.h b/kern/machine.h
index c67213a2..5c55d2cd 100644
--- a/kern/machine.h
+++ b/kern/machine.h
@@ -54,5 +54,6 @@ extern kern_return_t processor_shutdown (processor_t);
  *  action_thread() shuts down processors or changes their assignment.
  */
 extern void action_thread_continue (void) __attribute__((noreturn));
+extern void action_thread(void) __attribute__((noreturn));
 
 #endif /* _MACHINE_H_ */
diff --git a/kern/profile.c b/kern/profile.c
index 1381b1a5..b33d6953 100644
--- a/kern/profile.c
+++ b/kern/profile.c
@@ -71,7 +71,7 @@ void profile_thread()
 		int			arg[SIZE_PROF_BUFFER+1];
 	} msg;
 
-	register spl_t	s;
+	spl_t	s;
 	buf_to_send_t	buf_entry;
 	queue_entry_t	prof_queue_entry;
 	prof_data_t	pbuf;
@@ -113,7 +113,7 @@ void profile_thread()
 	   else {
 		task_t		curr_task;
                 thread_t	curr_th;
-		register int 	*sample;
+		int 	*sample;
                 int 		curr_buf;
 		int 		imax;
 
@@ -183,7 +183,7 @@ void
 send_last_sample_buf(th)
 thread_t th;
 {
-        register	spl_t s;
+        spl_t s;
         buf_to_send_t buf_entry;
         vm_offset_t vm_buf_entry;
 
diff --git a/kern/sched.h b/kern/sched.h
index f82f9f56..588e0aa6 100644
--- a/kern/sched.h
+++ b/kern/sched.h
@@ -47,10 +47,10 @@
 #if	STAT_TIME
 
 /*
- *	Statistical timing uses microseconds as timer units.  18 bit shift
+ *	Statistical timing uses microseconds as timer units.  17 bit shift
  *	yields priorities.  PRI_SHIFT_2 isn't needed.
  */
-#define PRI_SHIFT	18
+#define PRI_SHIFT	17
 
 #else	/* STAT_TIME */
 
@@ -60,7 +60,7 @@
 #include <machine/sched_param.h>
 
 #endif	/* STAT_TIME */
-#define NRQS	50			/* 50 run queues per cpu */
+#define NRQS	64			/* 64 run queues per cpu */
 
 struct run_queue {
 	queue_head_t		runq[NRQS];	/* one for each priority */
@@ -113,6 +113,7 @@ extern queue_head_t	action_queue;	/* assign/shutdown queue */
 decl_simple_lock_data(extern,action_lock);
 
 extern int		min_quantum;	/* defines max context switch rate */
+#define MIN_QUANTUM	(hz / 33)	/* context switch 33 times/second */
 
 /*
  *	Default base priorities for threads.
@@ -165,13 +166,4 @@ MACRO_BEGIN							\
 			(thread)->processor_set->sched_load;	\
 MACRO_END
 
-#if	SIMPLE_CLOCK
-/*
- *	sched_usec is an exponential average of number of microseconds
- *	in a second for clock drift compensation.
- */
-
-extern int	sched_usec;
-#endif	/* SIMPLE_CLOCK */
-
 #endif	/* _KERN_SCHED_H_ */
diff --git a/kern/sched_prim.c b/kern/sched_prim.c
index bb767352..63a0437c 100644
--- a/kern/sched_prim.c
+++ b/kern/sched_prim.c
@@ -64,10 +64,6 @@ int		min_quantum;	/* defines max context switch rate */
 
 unsigned	sched_tick;
 
-#if	SIMPLE_CLOCK
-int		sched_usec;
-#endif	/* SIMPLE_CLOCK */
-
 thread_t	sched_thread_id;
 
 timer_elt_data_t recompute_priorities_timer;
@@ -153,15 +149,12 @@ void sched_init(void)
 	recompute_priorities_timer.fcn = recompute_priorities;
 	recompute_priorities_timer.param = NULL;
 
-	min_quantum = hz / 10;		/* context switch 10 times/second */
+	min_quantum = MIN_QUANTUM;
 	wait_queue_init();
 	pset_sys_bootstrap();		/* initialize processor mgmt. */
 	queue_init(&action_queue);
 	simple_lock_init(&action_lock);
 	sched_tick = 0;
-#if	SIMPLE_CLOCK
-	sched_usec = 0;
-#endif	/* SIMPLE_CLOCK */
 	ast_init();
 }
 
@@ -231,7 +224,7 @@ void assert_wait(
 
 	thread = current_thread();
 	if (thread->wait_event != 0) {
-		panic("assert_wait: already asserted event %#x\n",
+		panic("assert_wait: already asserted event %p\n",
 		      thread->wait_event);
 	}
  	s = splsched();
@@ -376,13 +369,14 @@ void clear_wait(
  *	and thread_wakeup_one.
  *
  */
-void thread_wakeup_prim(
+boolean_t thread_wakeup_prim(
 	event_t		event,
 	boolean_t	one_thread,
 	int		result)
 {
 	queue_t			q;
 	int			index;
+	boolean_t woke = FALSE;
 	thread_t		thread, next_th;
 	decl_simple_lock_data( , *lock);
 	spl_t			s;
@@ -435,6 +429,7 @@ void thread_wakeup_prim(
 				break;
 			}
 			thread_unlock(thread);
+			woke = TRUE;
 			if (one_thread)
 				break;
 		}
@@ -442,6 +437,7 @@ void thread_wakeup_prim(
 	}
 	simple_unlock(lock);
 	splx(s);
+	return (woke);
 }
 
 /*
@@ -1086,21 +1082,8 @@ void compute_my_priority(
  */
 void recompute_priorities(void *param)
 {
-#if	SIMPLE_CLOCK
-	int	new_usec;
-#endif	/* SIMPLE_CLOCK */
-
 	sched_tick++;		/* age usage one more time */
 	set_timeout(&recompute_priorities_timer, hz);
-#if	SIMPLE_CLOCK
-	/*
-	 *	Compensate for clock drift.  sched_usec is an
-	 *	exponential average of the number of microseconds in
-	 *	a second.  It decays in the same fashion as cpu_usage.
-	 */
-	new_usec = sched_usec_elapsed();
-	sched_usec = (5*sched_usec + 3*new_usec)/8;
-#endif	/* SIMPLE_CLOCK */
 	/*
 	 *	Wakeup scheduler thread.
 	 */
@@ -1347,17 +1330,12 @@ void thread_setrun(
 
 	    /*
 	     *	Cause ast on processor if processor is on line.
-	     *
-	     *	XXX Don't do this remotely to master because this will
-	     *	XXX send an interprocessor interrupt, and that's too
-	     *  XXX expensive for all the unparallelized U*x code.
 	     */
 	    if (processor == current_processor()) {
 		ast_on(cpu_number(), AST_BLOCK);
 	    }
-	    else if ((processor != master_processor) &&
-	    	     (processor->state != PROCESSOR_OFF_LINE)) {
-			cause_ast_check(processor);
+	    else if ((processor->state != PROCESSOR_OFF_LINE)) {
+		cause_ast_check(processor);
 	    }
 	}
 #else	/* NCPUS > 1 */
diff --git a/kern/sched_prim.h b/kern/sched_prim.h
index dfb2f54b..405e5456 100644
--- a/kern/sched_prim.h
+++ b/kern/sched_prim.h
@@ -72,7 +72,7 @@ extern void	thread_sleep(
 	simple_lock_t	lock,
 	boolean_t	interruptible);
 extern void	thread_wakeup(void);		/* for function pointers */
-extern void	thread_wakeup_prim(
+extern boolean_t	thread_wakeup_prim(
 	event_t		event,
 	boolean_t	one_thread,
 	int		result);
diff --git a/kern/task.c b/kern/task.c
index 1874af69..735b9e59 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -73,7 +73,7 @@ void task_init(void)
 	 * Task_create must assign to kernel_task as a side effect,
 	 * for other initialization. (:-()
 	 */
-	(void) task_create(TASK_NULL, FALSE, &kernel_task);
+	(void) task_create_kernel(TASK_NULL, FALSE, &kernel_task);
 	(void) task_set_name(kernel_task, "gnumach");
 	vm_map_set_name(kernel_map, kernel_task->name);
 }
@@ -83,6 +83,19 @@ kern_return_t task_create(
 	boolean_t	inherit_memory,
 	task_t		*child_task)		/* OUT */
 {
+	if (parent_task == TASK_NULL)
+		return KERN_INVALID_TASK;
+
+	return task_create_kernel (parent_task, inherit_memory,
+				   child_task);
+}
+
+kern_return_t
+task_create_kernel(
+	task_t		parent_task,
+	boolean_t	inherit_memory,
+	task_t		*child_task)		/* OUT */
+{
 	task_t		new_task;
 	processor_set_t	pset;
 #if FAST_TAS
@@ -189,14 +202,16 @@ kern_return_t task_create(
 			  new_task);
 	else
 		snprintf (new_task->name, sizeof new_task->name, "(%.*s)",
-			  sizeof new_task->name - 3, parent_task->name);
+			  (int) (sizeof new_task->name - 3), parent_task->name);
 
 	if (new_task_notification != NULL) {
 		task_reference (new_task);
 		task_reference (parent_task);
 		mach_notify_new_task (new_task_notification,
 				      convert_task_to_port (new_task),
-				      convert_task_to_port (parent_task));
+				      parent_task
+				      ? convert_task_to_port (parent_task)
+				      : IP_NULL);
 	}
 
 	ipc_task_enable(new_task);
@@ -1209,7 +1224,8 @@ void consider_task_collect(void)
 		task_collect_max_rate = hz;
 
 	if (task_collect_allowed &&
-	    (sched_tick > (task_collect_last_tick + task_collect_max_rate))) {
+	    (sched_tick > (task_collect_last_tick +
+			   task_collect_max_rate / (hz / 1)))) {
 		task_collect_last_tick = sched_tick;
 		task_collect_scan();
 	}
diff --git a/kern/task.h b/kern/task.h
index 2a4c28fc..0b746aff 100644
--- a/kern/task.h
+++ b/kern/task.h
@@ -140,6 +140,10 @@ extern kern_return_t	task_create(
 	task_t		parent_task,
 	boolean_t	inherit_memory,
 	task_t		*child_task);
+extern kern_return_t	task_create_kernel(
+	task_t		parent_task,
+	boolean_t	inherit_memory,
+	task_t		*child_task);
 extern kern_return_t	task_terminate(
 	task_t		task);
 extern kern_return_t	task_suspend(
diff --git a/kern/thread.c b/kern/thread.c
index 0ac7c535..680e72c2 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -444,6 +444,17 @@ kern_return_t thread_create(
 	task_unlock(parent_task);
 
 	/*
+	 *	This thread will mosty probably start working, assume it
+	 *	will take its share of CPU, to avoid having to find it out
+	 *	slowly.  Decaying will however fix that quickly if it actually
+	 *	does not work
+	 */
+	new_thread->cpu_usage = TIMER_RATE * SCHED_SCALE /
+				(pset->load_average >= SCHED_SCALE ?
+				  pset->load_average : SCHED_SCALE);
+	new_thread->sched_usage = TIMER_RATE * SCHED_SCALE;
+
+	/*
 	 *	Lock both the processor set and the task,
 	 *	so that the thread can be added to both
 	 *	simultaneously.  Processor set must be
@@ -1527,13 +1538,6 @@ kern_return_t thread_info(
 	    basic_info->cpu_usage = thread->cpu_usage /
 					(TIMER_RATE/TH_USAGE_SCALE);
 	    basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5;
-#if	SIMPLE_CLOCK
-	    /*
-	     *	Clock drift compensation.
-	     */
-	    basic_info->cpu_usage =
-		(basic_info->cpu_usage * 1000000)/sched_usec;
-#endif	/* SIMPLE_CLOCK */
 
 	    flags = 0;
 	    if (thread->state & TH_SWAPPED)
@@ -1576,8 +1580,11 @@ kern_return_t thread_info(
 	else if (flavor == THREAD_SCHED_INFO) {
 	    thread_sched_info_t	sched_info;
 
-	    if (*thread_info_count < THREAD_SCHED_INFO_COUNT) {
-		return KERN_INVALID_ARGUMENT;
+	    /* Allow *thread_info_count to be one smaller than the
+	       usual amount, because last_processor is a
+	       new member that some callers might not know about. */
+	    if (*thread_info_count < THREAD_SCHED_INFO_COUNT -1) {
+		    return KERN_INVALID_ARGUMENT;
 	    }
 
 	    sched_info = (thread_sched_info_t) thread_info_out;
@@ -1605,6 +1612,12 @@ kern_return_t thread_info(
 	    sched_info->depressed = (thread->depress_priority >= 0);
 	    sched_info->depress_priority = thread->depress_priority;
 
+#if NCPUS > 1
+	    sched_info->last_processor = thread->last_processor;
+#else
+	    sched_info->last_processor = 0;
+#endif
+
 	    thread_unlock(thread);
 	    splx(s);
 
@@ -2257,7 +2270,7 @@ thread_wire(
 
 void thread_collect_scan(void)
 {
-	register thread_t	thread, prev_thread;
+	thread_t	thread, prev_thread;
 	processor_set_t		pset, prev_pset;
 
 	prev_thread = THREAD_NULL;
@@ -2333,7 +2346,8 @@ void consider_thread_collect(void)
 
 	if (thread_collect_allowed &&
 	    (sched_tick >
-	     (thread_collect_last_tick + thread_collect_max_rate))) {
+	     (thread_collect_last_tick +
+	      thread_collect_max_rate / (hz / 1)))) {
 		thread_collect_last_tick = sched_tick;
 		thread_collect_scan();
 	}
author	Samuel Thibault <samuel.thibault@ens-lyon.org>	2019-11-11 23:54:14 +0100
committer	Samuel Thibault <samuel.thibault@ens-lyon.org>	2019-11-11 23:54:14 +0100
commit	ca9c75b234fea9a4fb0ed9f7d6a3f11df4ca11e0 (patch)
tree	ccfdaf0e68cfa94c4479208f2850326dc1790437 /kern
parent	b0d9e31f9a1ee8fa0283f082f01467666d92a1f6 (diff)
parent	74a84a56f24c17e98bc06eaa8b49195fbd5c0541 (diff)