BarryServer : Git

All the code for all my projects
// BarryServer : Git / Nucleus / commit / 6217f0db2c8f2513994f4cc773aaa4171a049963

// Related

Nucleus

Barry Kernel threads + threads share address space 6217f0d (3 years, 1 month ago)
diff --git a/include/nucleus/cpu.h b/include/nucleus/cpu.h
index cb41a28..60753ef 100644
--- a/include/nucleus/cpu.h
+++ b/include/nucleus/cpu.h
@@ -14,6 +14,7 @@ struct Processor {
 	cpu_t id;
 	uint32_t inCriticalSection;
 	uint32_t critFlags;
+	struct InterruptFrame *frame;
 	Scheduler *scheduler;
 	struct IPIQueue *ipiq;
 };
@@ -86,5 +87,6 @@ void send_ipiq(cpu_t targid, ipiq_func_t func, void *arg, enum IPIQFlag flags);
 
 void set_fs_base(uintptr_t base);
 void set_gs_base(uintptr_t base);
+void set_kernel_stack(uintptr_t top);
 
 #endif
diff --git a/include/nucleus/memory.h b/include/nucleus/memory.h
index 057ff0b..481070c 100644
--- a/include/nucleus/memory.h
+++ b/include/nucleus/memory.h
@@ -66,6 +66,6 @@ void *map_page(Page *page);
 
 int verify_access(const void *addr, size_t len, int prot);
 
-VMRegion *vm_create_stack(void);
+void switch_to_mm(VirtualMemory *vm);
 
 #endif
diff --git a/include/nucleus/task.h b/include/nucleus/task.h
index f7825a8..8c33f55 100644
--- a/include/nucleus/task.h
+++ b/include/nucleus/task.h
@@ -31,21 +31,19 @@ enum State {
 /* Structure for a Task */
 struct Task {
 	Object obj;
-	pid_t tid, tgid;
-	uid_t uid, euid, suid;
-	gid_t gid, egid, sgid;
-	int status;
-	uint32_t inSyscall;
-
 	Scheduler *scheduler;
 	enum Priority priority;
 	enum State state;
 	uintptr_t esi, edi, ebx;
 	uintptr_t esp, ebp, eip;
-	page_dir_t pageDir;
+
+	pid_t tid, tgid;
+	uid_t uid, euid, suid;
+	gid_t gid, egid, sgid;
+	int status;
+	uint32_t inSyscall;
 
 	File *executable;
-	VMRegion *stack;
 	Task *target;
 	ObjectList *wait;
 
@@ -68,6 +66,8 @@ struct Scheduler {
 
 #define current cpu->scheduler->task
 
+#define KERNEL_STACK_SIZE (2 * PAGE_SIZE)
+
 extern ObjectType taskType;
 extern ObjectType schedulerType;
 extern ObjectType signalsType;
@@ -95,10 +95,12 @@ exit_syscall_context(void)
 }
 
 void init_tasking(void);
-void enqueue_task(Task *task);
 void block_task(enum State reason, ObjectList *list);
 void unblock_task(Task *task);
 Task *find_task(pid_t tid);
+void enqueue_task(Task *task);
 void schedule(void);
+Task *create_kthread(void (*func)(void), enum Priority p);
+_Noreturn void terminate(void);
 
 #endif
diff --git a/include/sys/sched.h b/include/sys/sched.h
index 6cf7f94..125c9bd 100644
--- a/include/sys/sched.h
+++ b/include/sys/sched.h
@@ -16,7 +16,7 @@ enum CloneFlag {
 };
 
 pid_t getpid(void);
-pid_t clone(int flags);
+pid_t clone(int flags, void *stack);
 _Noreturn void exit(int status);
 
 uid_t getuid(void);
diff --git a/kernel/acpi/apic.c b/kernel/acpi/apic.c
index 6051636..7f9935c 100644
--- a/kernel/acpi/apic.c
+++ b/kernel/acpi/apic.c
@@ -58,6 +58,7 @@ int apic = 0;
 size_t ncpus = 1;
 uintptr_t lapicPtr, ioapicPtr;
 cpu_t lapicIds[MAX_CPUS], lapicNums[MAX_CPUS];
+extern uintptr_t stacks[];
 
 /* Enable APIC */
 static void
@@ -176,10 +177,11 @@ init_apic(struct SDTHeader *header)
 	uint32_t i, j;
 	uintptr_t apTrampoline = 0x1000, stack;
 	memcpy((void *) apTrampoline, &ap_trampoline, PAGE_SIZE);
+	*((uint32_t *) (apTrampoline + 0xFFC)) = (uintptr_t) stacks;
 	for (i = 1; i < ncpus; i++) {
 		/* Give each processor a separate stack */
-		stack = alloc_frame() + PAGE_SIZE - sizeof(uintptr_t);
-		*((uint32_t *) (apTrampoline + 0xF00 + i)) = stack;
+		stacks[i] = alloc_frame() + PAGE_SIZE;
+//		*((uint32_t *) (apTrampoline + 0xC00) + i) = stacks[i];
 		/* Send INIT IPI */
 		LAPIC(0x280) = 0;
 		LAPIC(0x310) = (LAPIC(0x310) & 0x00FFFFFF)
@@ -239,5 +241,5 @@ send_ipi(cpu_t target, uint8_t num)
 	             | (lapicIds[target] << 24);
 	LAPIC(0x300) = (LAPIC(0x300) & 0xFFF32000)
 	             | (0x5000 + num + 48);
-	while ((LAPIC(0x300) >> 12) & 1);
+	do asm("pause":::"memory"); while (LAPIC(0x300) & (1 << 12));
 }
diff --git a/kernel/acpi/trampoline.S b/kernel/acpi/trampoline.S
index 3230e85..c807524 100644
--- a/kernel/acpi/trampoline.S
+++ b/kernel/acpi/trampoline.S
@@ -48,9 +48,8 @@ ap_pm:
 	incl %eax
 	movl %eax, apId
 
-	movl $0x1F00, %esi
-	movl (%esi,%eax), %ebx
-	movl %ebx, %esp
+	movl 0x1FFC, %esi
+	movl (%esi,%eax,4), %esp
 	movl %esp, %ebp
 
 	sti
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e7ab334..ba59986 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -34,6 +34,7 @@ void send_ipi(cpu_t target, uint8_t num);
 
 Processor __seg_gs *const cpu = 0;
 Processor *cpus[MAX_CPUS];
+uintptr_t stacks[MAX_CPUS];
 
 /* Per-CPU setup */
 void
diff --git a/kernel/gdt.c b/kernel/gdt.c
index 7e8a1a4..fbdfb82 100644
--- a/kernel/gdt.c
+++ b/kernel/gdt.c
@@ -33,7 +33,7 @@ static struct GDTEntry {
 	uint8_t baseMiddle, access, gran, baseHigher;
 } __attribute__((packed)) *gdt[MAX_CPUS]; /* Per CPU */
 
-#define GDT_OFFSET(i) (i * sizeof(struct GDTEntry))
+#define GDT_OFFSET(i) ((i) * sizeof(struct GDTEntry))
 
 /* Structure for a TSS Entry */
 static struct TSSEntry {
@@ -83,6 +83,7 @@ cpu_load_gdt(void)
 
 	memset(gdt[CPUID], 0, size);
 
+	/* Create gate entries */
 	gdt_set_gate(GDT_NULL,      0x00000000, 0x00000000, 0x00, 0x00);
 	gdt_set_gate(GDT_KERN_CODE, 0x00000000, 0xFFFFFFFF, 0x9A, 0xCF);
 	gdt_set_gate(GDT_KERN_DATA, 0x00000000, 0xFFFFFFFF, 0x92, 0xCF);
@@ -91,20 +92,22 @@ cpu_load_gdt(void)
 	gdt_set_gate(GDT_FS,        0x00000000, 0xFFFFFFFF, 0xF2, 0xCF);
 	gdt_set_gate(GDT_GS,        0x00000000, 0xFFFFFFFF, 0xF2, 0xCF);
 
+	/* Create TSS entry */
 	uint32_t addr = (uint32_t) (gdt[CPUID] + (GDT_TSS + 1));
 	gdt_set_gate(GDT_TSS, addr, sizeof(struct TSSEntry) - 1, 0xE9, 0);
 
+	/* TSS */
 	tss[CPUID] = (void *) addr;
 	tss[CPUID]->ss0 = 0x10;
-	tss[CPUID]->esp0 = 0xF0800000 - sizeof(uintptr_t);
 	tss[CPUID]->cs = GDT_OFFSET(GDT_KERN_CODE) | 0;
 	tss[CPUID]->ds = GDT_OFFSET(GDT_KERN_DATA) | 0;
 	tss[CPUID]->es = tss[CPUID]->ss = tss[CPUID]->ds;
 	tss[CPUID]->fs = tss[CPUID]->gs = GDT_OFFSET(GDT_FS) | 3;
 	tss[CPUID]->iomapBase = sizeof(struct TSSEntry);
 
+	/* Load table */
 	struct DescRecord ptr = {
-		.limit = sizeof(struct GDTEntry) * (GDT_TSS + 1),
+		.limit = GDT_OFFSET(GDT_TSS + 1),
 		.base = (uintptr_t) gdt[CPUID],
 	};
 	asm volatile("lgdt %0" :: "m" (ptr));
@@ -132,3 +135,11 @@ set_gs_base(uintptr_t base)
 	gs->baseHigher = (base >> 24) & 0xFF;
 	asm volatile("mov %0, %%gs" :: "r" (GDT_OFFSET(GDT_GS) | 3));
 }
+
+/* Set the used kernel stack */
+void
+set_kernel_stack(uintptr_t top)
+{
+	ASSERT((top & 0xF) == 0);
+	tss[cpu->id]->esp0 = top;
+}
diff --git a/kernel/idt.c b/kernel/idt.c
index 0f2bdfd..6e04543 100644
--- a/kernel/idt.c
+++ b/kernel/idt.c
@@ -31,10 +31,13 @@ install_idt_entry(uint8_t num, void *addr)
 	if (num < 32)
 		type = 0x8F; /* Trap gate for exceptions */
 
+	if (num == 0x80)
+		type |= 3 << 5; /* Allowed from ring 3 */
+
 	IDT[num].offsetLower = (uintptr_t) addr & 0xFFFF;
 	IDT[num].selector = 0x08;
 	IDT[num].zero = 0;
-	IDT[num].typeAttr = type | 0x60; /* Allowed from ring 3 */
+	IDT[num].typeAttr = type;
 	IDT[num].offsetHigher = (uintptr_t) addr >> 16;
 }
 
@@ -58,8 +61,13 @@ isr_handler(struct InterruptFrame frame)
 
 	/* Run registered handler */
 	int_handler_t handler = interrupts[frame.intnum];
-	if (handler)
+	struct InterruptFrame *oldFrame;
+	if (handler) {
+		oldFrame = cpu->frame;
+		cpu->frame = &frame;
 		handler(&frame);
+		cpu->frame = oldFrame;
+	}
 }
 
 /* Register an exception handler */
diff --git a/kernel/start.S b/kernel/start.S
index 96048e2..fe842e8 100644
--- a/kernel/start.S
+++ b/kernel/start.S
@@ -9,9 +9,10 @@ header:
 	.long 0, 1280, 1024, 32
 
 .section .bss, "aw", @nobits
+.global stackBottom
 .global stackTop
 stackBottom:
-	.skip 16384
+	.skip 8192
 stackTop:
 
 .section .text
diff --git a/memory/fault.c b/memory/fault.c
index 7dfae10..cbff7b2 100644
--- a/memory/fault.c
+++ b/memory/fault.c
@@ -142,14 +142,10 @@ not_present_write(VMRegion *region, uintptr_t addr)
 	ASSERT(front);
 	inode = front->inode;
 	page = find_page(inode->pages, offset);
+	if (page)
+		return install_page(addr, page, region->prot);
 	newPage = create_page(inode->pages, alloc_frame(), offset);
 	install_page(addr, newPage, region->prot);
-	if (page) {
-		copy_page_frame(PAGE_ADDR(page->frame),
-		                PAGE_ADDR(newPage->frame));
-		remove(inode->pages, page);
-		return;
-	}
 
 	/* Anonymous region, zero-fill */
 	if (region->flags & MAP_ANONYMOUS) {
@@ -179,14 +175,20 @@ page_fault_handler(struct InterruptFrame *frame)
 	uint8_t present = frame->err & (1 << 0);
 	uint8_t write   = frame->err & (1 << 1);
 	uint8_t user    = frame->err & (1 << 2);
+	page_t pg = get_page(addr);
 
 	ASSERT(current && current->vm);
 
+	/* Handle lazy invalidation */
+	if (!present && (pg & PTE_PRESENT))
+		return flush_tlb(addr);
+	if (write && (pg & PTE_WRITE))
+		return flush_tlb(addr);
+
 	/* Iterate VM Regions */
 	VMRegion *region = find_region(addr);
 	if (__builtin_expect(!region, 0)) {
 		/* Not in a region */
-		page_t pg = get_page(addr);
 		panic("Page Fault [%d:%d] (%#.8x -> %#.8x [tbl:%d, pg:%d][%#.8x], %s, %s, %s)",
 		      current->tgid, current->tid, frame->eip,
 		      addr, (addr >> 12) / 1024, (addr >> 12) % 1024, pg,
@@ -195,9 +197,12 @@ page_fault_handler(struct InterruptFrame *frame)
 		      user ? "user" : "kernel");
 	}
 
+	/* Protection violation, kill process */
 	if (user && write && !(region->prot & PROT_WRITE))
-		panic("Segmentation violation");
+		panic("Segmentation violation : %#.8x[%#.8x] (%#.8x -> %#.8x)",
+		      region, region->prot, frame->eip, addr);
 
+	/* Update paging structures correctly */
 	if (present && write)
 		return copy_on_write(region, addr);
 	if (!present && write)
diff --git a/memory/frame.c b/memory/frame.c
index 428aae9..0e0f8fd 100644
--- a/memory/frame.c
+++ b/memory/frame.c
@@ -174,6 +174,7 @@ init_frames(uint32_t size, void *addr)
 	struct {uint32_t start, end;} remaps[] = {
 		{.start = 0x0000,   .end = bumpAlloc}, /* PMM bitmaps */
 		{.start = 0x100000, .end = 0x180000 }, /* Kernel */
+		{.start = 0x200000, .end = 0x800000 }, /* Kernel heap */
 	};
 
 	/* Check bitmaps */
@@ -183,10 +184,11 @@ init_frames(uint32_t size, void *addr)
 		numFrames += region->numFrames;
 		uintptr_t end = region->base + (region->numFrames * PAGE_SIZE);
 		/* Iterate the remaps[] to find overlapping regions */
-		for (i = 0; i < sizeof(remaps)/sizeof(remaps[0]); i++)
+		for (i = 0; i < sizeof(remaps)/sizeof(remaps[0]); i++) {
 			for (j = remaps[i].start;
 			     j < remaps[i].end && j >= region->base && j < end;
 			     j += PAGE_SIZE)
 				set_frame(region, (j - region->base) >> 12);
+		}
 	}
 }
diff --git a/memory/mmap.c b/memory/mmap.c
index 2093bf3..f6e94c7 100644
--- a/memory/mmap.c
+++ b/memory/mmap.c
@@ -26,7 +26,7 @@ mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
 				break;
 			region = head;
 		}
-		if (!head && region->end + len >= 0xDFC00000)
+		if (region && region->end + len >= 0xDFC00000)
 			return (void *) -ENOMEM;
 		addr = (void *) region->end;
 	}
diff --git a/memory/namespace.h b/memory/namespace.h
index bd5b0a1..b0f3528 100644
--- a/memory/namespace.h
+++ b/memory/namespace.h
@@ -1,7 +1,9 @@
 #ifndef MEMORY_NAMESPACE_H
 #define MEMORY_NAMESPACE_H
 
+#include <nucleus/memory.h>
 #include <nucleus/object.h>
+#include <nucleus/types.h>
 #include <nucleus/vfs.h>
 
 /* Structure for a Page in a Cache */
@@ -15,6 +17,8 @@ struct Page {
 struct VirtualMemory {
 	Object obj;
 	ObjectList *regions;
+	VMRegion *stack;
+	page_dir_t pageDir;
 };
 
 /* Structure for a Virtual Memory Region object */
@@ -27,6 +31,7 @@ struct VMRegion {
 	File *front, *back;
 };
 
+void switch_dir(page_dir_t dir);
 VMRegion *vm_create_region(void *addr, size_t len, int prot, int flags,
                            off_t offset, File *back);
 
diff --git a/memory/paging.c b/memory/paging.c
index e9a8a83..aa5fa17 100644
--- a/memory/paging.c
+++ b/memory/paging.c
@@ -16,7 +16,7 @@ page_t zeroFrame;
 static page_dir_t kernelDir;
 
 /* Switch page directory */
-static void
+void
 switch_dir(page_dir_t dir)
 {
 	asm volatile("mov %0, %%cr3" :: "r" (dir));
@@ -43,6 +43,8 @@ set_page(uintptr_t vaddr, page_t page)
 	page_table_t *tables = (void *) 0xFFFFF000;
 	uintptr_t address = vaddr >> 12;
 	uint32_t tbl = address / 1024;
+	if (!(tables[tbl] & PDE_PRESENT) && !page)
+		return;
 	/* Create table if not present */
 	if (!(tables[tbl] & PDE_PRESENT)) {
 		tables[tbl] = alloc_frame() | PDE_PRESENT | PDE_WRITE;
@@ -91,18 +93,9 @@ clone_dir(void)
 			}
 
 			/* Link the pages for Copy-On-Write */
-			if (tbl < 960) { /* 0xF0000000 */
-				oldTable[pg] &= ~PTE_WRITE;
-				flush_tlb(((tbl * 1024) + pg) << 12);
-				newTable[pg] = oldTable[pg];
-			} else {
-				/* Copy the kernel stack area immediately */
-				newTable[pg] = alloc_frame()
-				             | PAGE_ATTR(oldTable[pg]);
-				flush_tlb(((tbl * 1024) + pg) << 12);
-				copy_page_frame(PAGE_ADDR(oldTable[pg]),
-				                PAGE_ADDR(newTable[pg]));
-			}
+			oldTable[pg] &= ~PTE_WRITE;
+			flush_tlb(((tbl * 1024) + pg) << 12);
+			newTable[pg] = oldTable[pg];
 		}
 	}
 	newTables[1023] = oldTables[1022];
@@ -142,23 +135,17 @@ init_paging(void)
 	/*
 	 * By mapping the page directory as the last page table, the page
 	 * directory entries are read as page table entries, and the page
-	 * table entries become the pages. This means that each page contains
-	 * the contents of a page table, and the region in memory represented by
-	 * the last page table contains a contiguous list of all pages in
-	 * memory.  The very last page contains the contents of the page
-	 * directory itself.  This means that each virtual address space
-	 * contains it's own paging structures.
+	 * tables become the pages. This means that each page contains the
+	 * contents of a page table, and the region in memory represented by the
+	 * last page table contains a contiguous list of all pages in memory.
+	 * The very last page contains the contents of the page directory
+	 * itself.  This means that each virtual address space contains it's own
+	 * paging structures.
 	 */
 
 	/* Use kernel directory */
 	register_exception(14, page_fault_handler);
 	cpu_load_paging();
-
-	/* Allocate a kernel stack */
-	uintptr_t stk;
-	for (stk = 0xF0400000; stk < 0xF0800000; stk += PAGE_SIZE)
-		set_page(stk, alloc_frame() | PTE_PRESENT | PTE_WRITE);
-
 	zeroFrame = alloc_frame();
 }
 
diff --git a/memory/region.c b/memory/region.c
index 26cbc37..469f6b4 100644
--- a/memory/region.c
+++ b/memory/region.c
@@ -17,6 +17,7 @@ static void vm_copy(Object *, Object *);
 static void region_new(Object *);
 static void region_delete(Object *);
 static void region_copy(Object *, Object *);
+static int region_compare(void *, void *);
 
 /* Virtual Memory Namespace object type */
 ObjectType virtualMemoryType = {
@@ -40,7 +41,17 @@ static void
 vm_new(Object *obj)
 {
 	VirtualMemory *vm = (void *) obj;
-	vm->regions = create_list(&vmRegionType, LIST_NORMAL);
+	vm->regions = create_list(&vmRegionType, LIST_ORDERED, region_compare);
+
+	/* Create stack region */
+	VMRegion *stack = new(&vmRegionType);
+	stack->start = 0xDFC00000;
+	stack->end = 0xE0000000;
+	stack->prot = PROT_READ | PROT_WRITE;
+	stack->flags = MAP_PRIVATE | MAP_ANONYMOUS;
+	vm->stack = stack;
+
+	asm volatile("mov %%cr3, %0" : "=r" (vm->pageDir));
 }
 
 /* Destroy a Virtual Memory object */
@@ -49,6 +60,8 @@ vm_delete(Object *obj)
 {
 	VirtualMemory *vm = (void *) obj;
 	destroy_list(vm->regions);
+	if (vm->stack)
+		put(vm->stack);
 }
 
 /* Copy a Virtual Memory object */
@@ -62,6 +75,9 @@ vm_copy(Object *a, Object *b)
 		add(child->regions, insert);
 		put(insert);
 	}
+	if (parent->stack)
+		child->stack = copy(parent->stack);
+	child->pageDir = clone_dir();
 }
 
 /* Delete a Virtual Memory Region */
@@ -105,6 +121,14 @@ region_copy(Object *a, Object *b)
 		child->back = get(parent->back);
 }
 
+/* Compare two regions of memory */
+static int
+region_compare(void *a, void *b)
+{
+	VMRegion *ra = a, *rb = b;
+	return rb->start - ra->start;
+}
+
 /* Remove a range of pages from a region's page cache */
 static void
 remove_cache_range(VMRegion *region, uintptr_t start, uintptr_t end)
@@ -130,6 +154,18 @@ remove_cache_range(VMRegion *region, uintptr_t start, uintptr_t end)
 	}
 }
 
+/* Switch to another virtual memory namespace */
+void
+switch_to_mm(VirtualMemory *vm)
+{
+	if (!current || !current->vm)
+		return;
+	if (!vm->pageDir)
+		return;
+	if (current->vm->pageDir != vm->pageDir)
+		switch_dir(vm->pageDir);
+}
+
 /* Find a Virtual Memory Region by address */
 VMRegion *
 find_region(uintptr_t addr)
@@ -140,7 +176,7 @@ find_region(uintptr_t addr)
 			return region;
 	}
 
-	region = current->stack;
+	region = current->vm->stack;
 	if (region->start <= addr && region->end > addr)
 		return region;
 
@@ -203,14 +239,3 @@ vm_create_region(void *addr, size_t len, int prot, int flags, off_t offset,
 	add(current->vm->regions, region);
 	return region;
 }
-
-/* Create a Virtual Memory Region for the stack */
-VMRegion *
-vm_create_stack(void)
-{
-	VMRegion *stack = vm_create_region((void *) 0xDFC00000, 0x400000,
-	                                   PROT_READ | PROT_WRITE,
-	                                   MAP_PRIVATE | MAP_ANONYMOUS, 0, NULL);
-	remove(current->vm->regions, stack);
-	return stack;
-}
diff --git a/task/clone.c b/task/clone.c
index 377b869..5fdcfef 100644
--- a/task/clone.c
+++ b/task/clone.c
@@ -6,12 +6,39 @@
 #include <sys/sched.h>
 #include <sys/types.h>
 #include <nucleus/io.h>
+#include <nucleus/lib.h>
 #include <nucleus/task.h>
 #include <nucleus/vfs.h>
 
+/* Copy the stack */
+static void
+copy_kernel_stack(Task *parent, Task *child)
+{
+        size_t offset = (size_t) child - (uintptr_t) parent;
+	uintptr_t oldTop = (uintptr_t) parent + KERNEL_STACK_SIZE,
+	          oldStack = child->esp;
+	uintptr_t newTop = (uintptr_t) child + KERNEL_STACK_SIZE,
+	          newStack = child->esp + offset;
+
+	/* Copy contents and change stack */
+	memcpy((void *) newStack, (void *) oldStack,
+	       (size_t) oldTop - oldStack);
+        child->esp += offset;
+        child->ebp += offset;
+
+        /* Update pointers on the stack */
+        uintptr_t i, tmp;
+        for (i = newStack & ~(sizeof(uintptr_t) - 1);
+             i < newTop; i += sizeof(uintptr_t)) {
+                tmp = *(uintptr_t *) i;
+                if (tmp > oldStack && tmp < (uintptr_t) oldTop)
+                        *(uintptr_t *) i = tmp + offset;
+        }
+}
+
 /* Clone a task */
 pid_t
-clone(int flags)
+clone(int flags, void *stack)
 {
 	enter_critical_section();
 
@@ -23,6 +50,21 @@ clone(int flags)
 		flags |= CLONE_VM;
 	}
 
+	/* Copy thread information */
+	if (flags & CLONE_THREAD)
+		child->tgid = parent->tgid;
+	child->uid  = parent->uid;
+	child->euid = parent->euid;
+	child->suid = parent->suid;
+	child->gid  = parent->gid;
+	child->egid = parent->egid;
+	child->sgid = parent->sgid;
+	child->inSyscall = parent->inSyscall;
+
+	/* Get executable file */
+	if (parent->executable)
+		child->executable = get(parent->executable);
+
 	/* Clone parent's file system namespace */
 	if (flags & CLONE_FS)
 		child->fs = get(parent->fs);
@@ -40,9 +82,6 @@ clone(int flags)
 		child->vm = get(parent->vm);
 	else
 		child->vm = copy(parent->vm);
-	/* Copy stack */
-	if (parent->stack)
-		child->stack = copy(parent->stack);
 
 	/* Clone parent's signals namespace */
 	if (flags & CLONE_SIGHAND)
@@ -50,26 +89,23 @@ clone(int flags)
 	else
 		child->signals = copy(parent->signals);
 
-	/* Get executable file */
-	if (parent->executable)
-		child->executable = get(parent->executable);
-
-	child->inSyscall = parent->inSyscall;
-
-	/* After this, anything on the stack is desynchronised */
-	child->pageDir = clone_dir();
-
 	/* Split tasks here */
 	asm volatile("mov %%esi, %0" : "=r" (child->esi));
 	asm volatile("mov %%edi, %0" : "=r" (child->edi));
 	asm volatile("mov %%ebx, %0" : "=r" (child->ebx));
 	asm volatile("mov %%esp, %0" : "=r" (child->esp));
 	asm volatile("mov %%ebp, %0" : "=r" (child->ebp));
+	copy_kernel_stack(parent, child);
 	child->eip = (uintptr_t) &&end;
 	enqueue_task(child);
 	tid = child->tid;
 	put(child);
 	exit_critical_section();
+
 end:
+	if ((flags & CLONE_VM) && current == child) {
+		/* Set the child's user stack */
+		cpu->frame->esp = (uintptr_t) stack;
+	}
 	return tid;
 }
diff --git a/task/exec.c b/task/exec.c
index ff80d21..8134a3d 100644
--- a/task/exec.c
+++ b/task/exec.c
@@ -216,9 +216,6 @@ execve(const char *file, char *argv[], char *envp[])
 	close(fd);
 
 	/* Stack area */
-	if (current->stack)
-		put(current->stack);
-	current->stack = vm_create_stack();
 	memcpy((void *) esp, istack, ssz);
 	kfree(istack);
 
diff --git a/task/kthread.c b/task/kthread.c
new file mode 100644
index 0000000..2c91f5a
--- /dev/null
+++ b/task/kthread.c
@@ -0,0 +1,34 @@
+/*
+ * This file implements kernel threads.  Each kernel thread has a stack
+ * allocated in kernel memory and will run a specified function within the
+ * kernel.  When that function completes, the thread will terminate.  The
+ * thread has no resources allocated and should only access the kernel.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <nucleus/lib.h>
+#include <nucleus/task.h>
+
+#define PUSH(s,v) ({ \
+	s -= sizeof(uintptr_t); \
+	*(uintptr_t *) s = (uintptr_t) v; \
+})
+
+/* Create a new kernel thread */
+Task *
+create_kthread(void (*func)(void), enum Priority p)
+{
+	/* Create new task */
+	Task *task = new(&taskType);
+	task->priority = p;
+	task->eip = (uintptr_t) func;
+
+	/* Setup stack to return to terminate() */
+	task->esp = (uintptr_t) task + KERNEL_STACK_SIZE;
+	PUSH(task->esp, terminate);
+
+	/* Schedule the task */
+	enqueue_task(task);
+	return task;
+}
diff --git a/task/scheduler.c b/task/scheduler.c
index f3bf5b8..c927eb3 100644
--- a/task/scheduler.c
+++ b/task/scheduler.c
@@ -14,7 +14,7 @@
 
 static void scheduler_new(Object *);
 static void scheduler_delete(Object *);
-void context_switch(uintptr_t eip, page_dir_t pageDir, uintptr_t ebx,
+void context_switch(uintptr_t eip, uintptr_t ebx,
                     uintptr_t esi, uintptr_t edi,
                     uintptr_t ebp, uintptr_t esp);
 
@@ -54,8 +54,6 @@ scheduler_delete(Object *obj)
 static void
 switch_to_task(Task *task)
 {
-	page_dir_t pageDir = task->pageDir;
-
 	/* Save current task state */
 	if (__builtin_expect(!!current, 1)) {
 		lock(current);
@@ -66,15 +64,16 @@ switch_to_task(Task *task)
 		asm volatile("mov %%ebp, %0" : "=r" (current->ebp));
 		current->eip = (uintptr_t) &&end;
 		unlock(current);
-		if (pageDir == current->pageDir)
-			pageDir = 0;
 		put(current);
 	}
 
 	/* Switch to new context */
+	switch_to_mm(task->vm);
 	current = task; /* Given reference, so no get() */
-	context_switch(current->eip, pageDir, current->ebx,
-	               current->esi, current->edi, current->ebp, current->esp);
+	set_kernel_stack((uintptr_t) current + KERNEL_STACK_SIZE);
+	context_switch(current->eip, current->ebx,
+	               current->esi, current->edi,
+	               current->ebp, current->esp);
 end:
 	/* This prevents GCC from optimising the jump to be after the return */
 	asm volatile("":::"memory");
@@ -86,7 +85,7 @@ highest_priority_queue(Scheduler *s)
 {
 	enum Priority p;
 	for (p = PRIORITY_COUNT - 1; p > 0; p--) {
-		if (count(s->queue[p ]))
+		if (count(s->queue[p]))
 			return p;
 	}
 	return 0;
diff --git a/task/switch.S b/task/switch.S
index 07f1e56..7c6c09d 100644
--- a/task/switch.S
+++ b/task/switch.S
@@ -13,15 +13,10 @@ context_switch:
 .code32
 	cli
 	mov 4(%esp), %ecx
-	mov 8(%esp), %eax
-	mov 12(%esp), %ebx
-	mov 16(%esp), %esi
-	mov 20(%esp), %edi
-	mov 24(%esp), %ebp
-	mov 28(%esp), %esp
-	test %eax, %eax
-	je 1f
-	mov %eax, %cr3
-1:
+	mov 8(%esp), %ebx
+	mov 12(%esp), %esi
+	mov 16(%esp), %edi
+	mov 20(%esp), %ebp
+	mov 24(%esp), %esp
 	sti
 	jmp *%ecx
diff --git a/task/task.c b/task/task.c
index 539026a..c1e744e 100644
--- a/task/task.c
+++ b/task/task.c
@@ -21,7 +21,7 @@ static void task_delete(Object *);
 /* Task object type */
 ObjectType taskType = {
 	.name = "TASK",
-	.size = sizeof(Task),
+	.size = KERNEL_STACK_SIZE, //sizeof(Task),
 	.new = task_new,
 	.delete = task_delete,
 };
@@ -47,8 +47,6 @@ task_delete(Object *obj)
 	Task *task = (void *) obj;
 	if (task->executable)
 		put(task->executable);
-	if (task->stack)
-		put(task->stack);
 	if (task->target)
 		put(task->target);
 	if (task->wait)
@@ -63,44 +61,11 @@ task_delete(Object *obj)
 		put(task->signals);
 }
 
-/* Move the stack */
-static void
-move_stack(uintptr_t top, size_t size)
-{
-	size_t offset;
-	uintptr_t oldStack, oldBase;
-	uintptr_t newStack, newBase;
-	top -= sizeof(uintptr_t);
-	asm volatile("mov %%esp, %0" : "=r" (oldStack));
-	asm volatile("mov %%ebp, %0" : "=r" (oldBase));
-	offset = top - (uintptr_t) stackTop;
-	newStack = oldStack + offset;
-	newBase = oldBase + offset;
-
-	memcpy((void *) newStack, (void *) oldStack, (size_t) stackTop - oldStack);
-
-	/* Update pointers on the stack */
-	uintptr_t i, tmp;
-	for (i = top; i > top - size; i -= sizeof(uintptr_t)) {
-		tmp = *(uintptr_t *) i;
-		if (tmp > oldStack && tmp < (uintptr_t) stackTop) {
-			tmp += offset;
-			*(uintptr_t *) i = tmp;
-		}
-	}
-
-	asm volatile("mov %0, %%esp" :: "r" (newStack));
-	asm volatile("mov %0, %%ebp" :: "r" (newBase));
-}
-
 /* Initialise tasking */
 void
 init_tasking(void)
 {
-	move_stack(0xF0800000, 0x2000);
-
 	current = new(&taskType);
-	asm volatile("mov %%cr3, %0" : "=r" (current->pageDir));
 	current->state = RUNNING;
 
 	/* File System namespace */
@@ -115,6 +80,7 @@ init_tasking(void)
 	cpu->scheduler->tasks = 1;
 	register_interrupt(0, timer_handler);
 	register_exception(128, syscall_handler);
+	set_kernel_stack((uintptr_t) current + KERNEL_STACK_SIZE);
 }
 
 /* Get the current task's PID */