Orion
Barry Importing existing Orion kernel d41a53c (3 years, 2 months ago)
diff --git a/mem/frame.c b/mem/frame.c
new file mode 100644
index 0000000..ee88fd8
--- /dev/null
+++ b/mem/frame.c
@@ -0,0 +1,207 @@
+/*
+ * This file handles allocating and freeing of page frames. It it the core of
+ * the physical memory manager. The Kernel heap and paging systems both sit on
+ * top of this. It keeps track of which frames of physical memory are free and
+ * hands them out as needed. To do this it keeps a simple bitmap of the frames.
+ */
+
+#include <stdint.h>
+#include "mem.h"
+#include "paging.h"
+#include "frame.h"
+#include "../screen.h"
+
+#define INDEX(a) ((a)/32)
+#define OFFSET(a) ((a)%32)
+
+/* Descriptor of a Frame Region */
+typedef struct FrameRegion FrameRegion;
+struct FrameRegion {
+ uint32_t base;
+ size_t numFrames;
+ size_t usedFrames;
+ FrameRegion *next;
+ uint32_t bitmap[];
+};
+
+size_t numFrames, usedFrames;
+FrameRegion *regions;
+
+/* Count the number of pages a number of bytes occupies */
+static size_t
+page_count(size_t bytes)
+{
+ size_t pages = bytes/0x1000;
+ if (bytes & 0xFFF)
+ pages++;
+ return pages;
+}
+
+/* Set a bit in the frame bitset */
+static void
+set_frame(FrameRegion *region, uint32_t idx)
+{
+ region->bitmap[INDEX(idx)] |= (1 << OFFSET(idx));
+ region->usedFrames++;
+ usedFrames++;
+}
+
+/* Clear a bit in the frame bitset */
+static void
+clear_frame(FrameRegion *region, uint32_t idx)
+{
+ region->bitmap[INDEX(idx)] &= ~(1 << OFFSET(idx));
+ region->usedFrames--;
+ usedFrames--;
+}
+
+/* Test a bit in the frame bitset */
+static uint32_t
+test_frame(FrameRegion *region, uint32_t idx)
+{
+ return (region->bitmap[INDEX(idx)] & (1 << OFFSET(idx)));
+}
+
+/* Get n bits from position p */
+static inline uint32_t
+get_bits(uint32_t x, uint8_t p, uint8_t n)
+{
+ return (x >> (p+1-n)) & ~(~0 << n);
+}
+
+/* Find first free frame */
+static uint32_t
+find_frames(FrameRegion *region, size_t frames)
+{
+ /* TODO: best fit */
+ /* FIXME: frames can be at most 32 */
+ uint32_t i, j;
+ for (i = 0; i < INDEX(region->numFrames); i++) {
+ if (!~region->bitmap[i])
+ continue;
+ for (j = 0; j < 32; j++) {
+ if (get_bits(~region->bitmap[i], j, frames) == ~(~0 <<
+ frames))
+ return (i*32+j)-frames+1;
+ /* TODO: check across uint32_t boundaries */
+ }
+ }
+ return (uint32_t) -1;
+}
+
+/* Allocate a set of contiguous page frames */
+uint32_t
+alloc_frames(size_t frames)
+{
+ uint32_t idx;
+ size_t i;
+ /* Walk the regions, first fit */
+ FrameRegion *region;
+ for (region = regions; region; region = region->next) {
+ idx = find_frames(region, frames);
+ if (idx != -1)
+ break;
+ }
+ if (idx == -1)
+ return -1;
+
+ for (i = 0; i < frames; i++)
+ set_frame(region, idx + i);
+
+ return (uint32_t) region->base + (idx << 12);
+}
+
+/* Free a page frame */
+void
+free_frame(uint32_t frame)
+{
+ /* Walk the regions */
+ FrameRegion *region = regions;
+ while (region) {
+ if ((uint32_t) region > frame) {
+ frame -= region->base;
+ clear_frame(region, frame >> 12);
+ break;
+ }
+ region = region->next;
+ }
+}
+
+/* Setup the frame allocator */
+void
+init_frames(uint32_t memMapSize, struct E820Entry *memMap)
+{
+ /* Relocate the memory map */
+ struct E820Entry *top = (void *) 0x20000;
+ if (memMap < top) {
+ memcpy(top, memMap, memMapSize);
+ memMap = (void *) top;
+ }
+
+ /*
+ * When the OS starts, the bootloader passes the Kernel a map of memory.
+ * This map must be read, so the memory manager can avoid bad areas of
+ * memory, that as mapped to hardware, ACPI, etc.
+ * The frame allocator should only create a bitmap for frames that it
+ * can actually allocate. This means setting avoiding bad areas, and
+ * avoiding the Kernel.
+ */
+ uint32_t i, j;
+ FrameRegion *prev = 0, *head = regions;
+ uint32_t bumpAlloc = 0x1000;
+ for (i = 0; i < memMapSize / sizeof(struct E820Entry); i++) {
+ kprintf("MemMap: base=%#.8x%.8x, length=%#.8x%.8x, type=%d",
+ memMap[i].baseHigh, memMap[i].base,
+ memMap[i].lengthHigh, memMap[i].length,
+ memMap[i].type);
+ if (memMap[i].baseHigh > 0
+ || memMap[i].base > 0xFFFFFFFF
+ || memMap[i].type != 1)
+ continue;
+
+ /* Usable region - create bitmap */
+ size_t frameSize = memMap[i].length / 0x1000;
+ size_t bitmapSize = (frameSize / 8);
+ head = (FrameRegion *) bumpAlloc;
+ bumpAlloc += bitmapSize + sizeof(FrameRegion);
+ head->base = memMap[i].base;
+ head->numFrames = frameSize;
+ head->usedFrames = 0;
+ memset(head->bitmap, 0, bitmapSize);
+ /* Set top bits to 1, so they're never allocated */
+ for (j = OFFSET(head->numFrames); j < 32; j++)
+ set_frame(head, head->numFrames + j);
+ if (prev) prev->next = head;
+ else regions = head;
+ prev = head;
+ }
+
+ /* Regions to be remapped */
+ struct {uint32_t start, end;} remaps[] = {
+ {.start = 0x0000, .end = bumpAlloc}, /* PMM bitmaps */
+ {.start = 0xB0000, .end = 0xC0000 }, /* VGA memory */
+ {.start = 0x100000, .end = 0x180000 }, /* Kernel */
+ };
+
+ kprintf("Bump allocator top @ %#.8x", bumpAlloc);
+
+ /* Check bitmaps */
+ usedFrames = 0;
+ numFrames = 0;
+ FrameRegion *region = regions;
+ uint32_t regionEnd;
+ while (region) {
+ numFrames += region->numFrames;
+ regionEnd = region->base + (region->numFrames * 0x1000);
+ /* Iterate the remaps[] to find overlapping regions */
+ for (i = 0; i < sizeof(remaps)/sizeof(remaps[0]); i++)
+ for (j = remaps[i].start; j < remaps[i].end
+ && j >= region->base && j < regionEnd;
+ j += 0x1000)
+ set_frame(region, (j - region->base) >> 12);
+ region = region->next;
+ }
+
+ if (numFrames < 1024) /* 4MB */
+ panic("Not enough memory");
+}
diff --git a/mem/frame.h b/mem/frame.h
new file mode 100644
index 0000000..ba68567
--- /dev/null
+++ b/mem/frame.h
@@ -0,0 +1,25 @@
+#ifndef KERNEL_MEM_FRAME_H
+#define KERNEL_MEM_FRAME_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Structure of an E820 entry */
+struct E820Entry {
+ uint32_t size;
+ uint32_t base, baseHigh;
+ uint32_t length, lengthHigh;
+ uint32_t type;
+} __attribute__((packed));
+
+/* Structure of an E820 Entry */
+//struct E820Entry {
+// uint64_t base, length;
+// uint32_t type, attr;
+//} __attribute__((packed));
+
+uint32_t alloc_frames(size_t frames);
+void free_frame(uint32_t frame);
+void init_frames(uint32_t memMapSize, struct E820Entry *memMap);
+
+#endif
diff --git a/mem/heap.c b/mem/heap.c
new file mode 100644
index 0000000..9f9bbe2
--- /dev/null
+++ b/mem/heap.c
@@ -0,0 +1,125 @@
+/*
+ * This file contains the functions related to the Kernel's heap. It uses a
+ * simple method of allocating and freeing blocks from a pool in the Kernel's
+ * memory space. This heap will be present in every Page Directory, so can be
+ * used to store any Kernel data structures.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include "mem.h"
+#include "heap.h"
+#include "frame.h"
+#include "../vfs/cache.h"
+#include "../screen.h"
+#include "../spinlock.h"
+
+#define KHEAP_START 0x200000 /* 2 MB */
+#define KHEAP_END 0x800000 /* 8 MB */
+#define BLOCK_SIZE 16
+
+/* Structure for a Memory Header */
+typedef struct Header {
+ struct Header *next, *prev;
+ size_t size;
+ char magic[4];
+} Header; /* 16 bytes */
+
+size_t objectsAllocated = 0;
+size_t freeSpace = KHEAP_END - KHEAP_START;
+
+/* Allocate a region of the heap */
+void *
+kmalloc(size_t size)
+{
+ size_t blockSize, gapSize;
+ uintptr_t blockEnd;
+ Header *prev, *head, *next;
+ head = prev = (void *) KHEAP_START;
+ next = NULL;
+ objectsAllocated++;
+
+ /* Minimum allocation */
+ if (size % BLOCK_SIZE)
+ size += BLOCK_SIZE - (size % BLOCK_SIZE);
+ freeSpace -= size + sizeof(Header);
+
+ /* Low-memory VFS cache reaper */
+ if (freeSpace < 0x10000) /* 64 KB */
+ cache_reaper();
+
+ /* Block does not exist, create heap */
+ if (head->prev != head) {
+ head->prev = head;
+ head->next = NULL;
+ head->size = size;
+ memcpy(head->magic, "HEAP", 4);
+ memset((void *) (head + 1), 0, size);
+ return (void *) (head + 1);
+ }
+
+ /* Find gap */
+ while (head->next) {
+ next = head->next;
+ blockSize = sizeof(Header) + head->size;
+ blockEnd = (uintptr_t) head + blockSize;
+ gapSize = (size_t) next - blockEnd;
+ prev = head;
+
+ /* Fit in gap */
+ if (gapSize >= size + sizeof(Header)) {
+ head = (void *) blockEnd;
+ head->next = next;
+ head->prev = prev;
+ prev->next = head;
+ next->prev = head;
+ head->size = size;
+ memcpy(head->magic, "HEAP", 4);
+ memset((void *) (head + 1), 0, size);
+ return (void *) (head + 1);
+ }
+
+ head = head->next;
+ }
+
+ /* Add to end */
+ blockSize = sizeof(Header) + head->size;
+ blockEnd = (uintptr_t) head + blockSize;
+ gapSize = (size_t) KHEAP_END - blockEnd;
+ /* Fit in gap */
+ if (gapSize >= size + sizeof(Header)) {
+ prev = head;
+ head = (void *) blockEnd;
+ head->next = NULL;
+ head->prev = prev;
+ prev->next = head;
+ head->size = size;
+ memcpy(head->magic, "HEAP", 4);
+ memset((void *) (head + 1), 0, size);
+ return (void *) (head + 1);
+ }
+
+ panic("Kernel heap exhausted");
+}
+
+/* Free an allocated region of the heap */
+void
+_kfree(void *addr, char *file, int line)
+{
+ Header *prev, *head, *next;
+ head = (Header *) addr - 1;
+ objectsAllocated--;
+ freeSpace += head->size + sizeof(Header);
+
+ if (memcmp(head->magic, "HEAP", 4))
+ panic("Bad Kernel heap reference\n"
+ "Invalid target @ %#.8x (%s:%d)", addr, file, line);
+ prev = head->prev;
+ next = head->next;
+ memset(head, 0, sizeof(Header));
+
+ if (prev != head)
+ prev->next = next;
+ if (next)
+ next->prev = prev;
+}
diff --git a/mem/heap.h b/mem/heap.h
new file mode 100644
index 0000000..1923583
--- /dev/null
+++ b/mem/heap.h
@@ -0,0 +1,11 @@
+#ifndef KERNEL_MEM_HEAP_H
+#define KERNEL_MEM_HEAP_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+void *kmalloc(size_t size);
+void _kfree(void *addr, char *file, int line);
+#define kfree(a) _kfree(a, __FILE__, __LINE__)
+
+#endif
diff --git a/mem/mem.c b/mem/mem.c
new file mode 100644
index 0000000..80a7c89
--- /dev/null
+++ b/mem/mem.c
@@ -0,0 +1,106 @@
+/*
+ * This file contains a few routines for the manipulation of memory and strings.
+ * The functions would normally be part of a C library, but this is for the
+ * Kernel. The functions are standalone, and have no dependencies - they can be
+ * called immediately after boot.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Fill a region of memory with the specified byte */
+void *
+memset(void *s, int c, size_t n)
+{
+ unsigned char *a = s;
+ if (n > 0) {
+ while (n-- > 0)
+ *a++ = c;
+ }
+ return s;
+}
+
+/* Copy one region of memory to another */
+void *
+memcpy(void *dest, void *src, size_t n)
+{
+ unsigned char *a = (unsigned char *) dest,
+ *b = (unsigned char *) src;
+ while (n-- > 0)
+ *a++ = *b++;
+ return dest;
+}
+
+/* Compare two regions of memory */
+int
+memcmp(void *s1, void *s2, size_t n)
+{
+ unsigned char *a = (unsigned char *) s1,
+ *b = (unsigned char *) s2;
+ while (n-- > 0)
+ if (*a++ != *b++)
+ return a[-1] - b[-1];
+ return 0;
+}
+
+/* Find the length of a string */
+size_t
+strlen(char *s)
+{
+ if (!s)
+ return 0;
+ size_t i;
+ for (i = 0; s[i]; i++);
+ return i;
+}
+
+/* Find the length of a string up to maximum */
+size_t
+strnlen(char *s, size_t maxlen)
+{
+ if (!s)
+ return 0;
+ size_t i;
+ for (i = 0; s[i] && i <= maxlen; i++);
+ return i;
+}
+
+/* Compare two strings */
+int
+strcmp(char *s1, char *s2)
+{
+ for (; *s1 == *s2 && *s1 && *s2; s1++, s2++);
+ return *(unsigned char *) s1 - *(unsigned char *) s2;
+}
+
+/* Compare two limited strings */
+int
+strncmp(char *s1, char *s2, size_t n)
+{
+ if (!n--) return 0;
+ for (; *s1 == *s2 && *s1 && *s2 && n; s1++, s2++, n--);
+ return *(unsigned char *) s1 - *(unsigned char *) s2;
+}
+
+/* Copy a string */
+char *
+strcpy(char *dest, const char *src)
+{
+ char *ret = dest;
+ while (*src)
+ *dest++ = *src++;
+ *dest = '\0';
+ return ret;
+}
+
+/* Copy a limited string */
+char *
+strncpy(char *dest, const char *src, size_t n)
+{
+ char *ret = dest;
+ while (*src && n--)
+ *dest++ = *src++;
+ *dest = '\0';
+ return ret;
+}
+
diff --git a/mem/mem.h b/mem/mem.h
new file mode 100644
index 0000000..fa6dfc4
--- /dev/null
+++ b/mem/mem.h
@@ -0,0 +1,17 @@
+#ifndef KERNEL_MEM_H
+#define KERNEL_MEM_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+void *memset(void *s, int c, size_t n);
+void *memcpy(void *dest, void *src, size_t n);
+int memcmp(void *s1, void *s2, size_t n);
+size_t strlen(char *s);
+size_t strnlen(char *s, size_t maxlen);
+int strcmp(const char *s1, const char *s2);
+int strncmp(const char *s1, const char *s2, size_t n);
+char *strcpy(char *dest, const char *src);
+char *strncpy(char *dest, const char *src, size_t n);
+
+#endif
diff --git a/mem/page.S b/mem/page.S
new file mode 100644
index 0000000..5e1dd4a
--- /dev/null
+++ b/mem/page.S
@@ -0,0 +1,60 @@
+; This file implements some simple low level assembly helper functions for the
+; paging system. The functions are done in assembly for the speed benefit.
+; They may be called frequently by the memory manager, so the benefit will be
+; significant.
+
+[bits 32]
+
+; Enable paging
+[global enable_paging]
+enable_paging:
+ mov edx, cr0
+ or edx, 0x80000000
+ mov cr0, edx
+ ret
+
+; Disable paging
+[global disable_paging]
+disable_paging:
+ mov edx, cr0
+ and edx, 0x7FFFFFFF
+ mov cr0, edx
+ ret
+
+; Copy the contents of a page frame
+[global copy_page_frame]
+copy_page_frame:
+; push ebx
+; pushf
+; mov esi, [esp + 12]
+; mov edi, [esp + 16]
+; call disable_paging
+; mov ecx, 1024
+; rep movsd
+; call enable_paging
+; popf
+; pop ebx
+; ret
+ push ebx
+ pushf
+ cli
+ mov ebx, [esp + 12]
+ mov ecx, [esp + 16]
+ mov edx, cr0
+ and edx, 0x7FFFFFFF
+ mov cr0, edx
+ mov edx, 1024
+.loop:
+ mov eax, [ebx]
+ mov [ecx], eax
+ add ebx, 4
+ add ecx, 4
+ dec edx
+ jnz .loop
+ mov edx, cr0
+ or edx, 0x80000000
+ mov cr0, edx
+ sti
+ popf
+ pop ebx
+ ret
diff --git a/mem/pagefault.c b/mem/pagefault.c
new file mode 100644
index 0000000..14219b7
--- /dev/null
+++ b/mem/pagefault.c
@@ -0,0 +1,264 @@
+/*
+ * This is the page fault handler. It handles/dispatches all handlers for page
+ * faults. This includes various tasking functions.
+ */
+
+#include <stdint.h>
+#include <signal.h>
+#include "paging.h"
+#include "../vfs/cache.h"
+#include "../vfs/inode.h"
+#include "../vfs/tmpfs/fs.h"
+#include "../mem/heap.h"
+#include "../mem/mem.h"
+#include "../task/task.h"
+#include "../proc/proc.h"
+#include "../screen.h"
+
+extern size_t numFrames, usedFrames;
+
+void copy_page_frame(void *src, void *dest);
+
+/* Copy-On-Write */
+static void
+copy_on_write(VMRegion *region, uintptr_t addr)
+{
+ Page *page = NULL;
+ File *front = region->front,
+ *back = region->back;
+ off_t offset = ((addr & ~0xFFF) - region->start) + region->offset;
+ page_t *pg = get_page((void *) addr);
+
+ /* Create front if it doesn't exist and is needed */
+ uint8_t private = region->flags & MAP_PRIVATE;
+ uint8_t sharedanon = (region->flags & MAP_SHARED) &&
+ (region->flags & MAP_ANONYMOUS);
+ uint8_t created = 0;
+ if (!front && (private || sharedanon)) {
+ /*
+ * A private mapping will always write to the front. A shared
+ * mapping will write to the back. If a shared mapping is
+ * anonymous, then the back is the front. The front must be
+ * created if it is required - which means if the mapping is
+ * private, or if the mapping is shared & anonymous.
+ */
+ front = kmalloc(sizeof(File));
+ front->inode = inode_get(kmalloc(sizeof(Inode)));
+ front->ops = &tmpfsFileOps;
+ region->front = file_get(front);
+ created++;
+ }
+
+ /* Find original page frame */
+ Inode *inode;
+ if (!page && front) {
+ inode = front->inode;
+ ASSERT(inode);
+ page = page_find(inode, offset);
+ }
+ if (!page && back) {
+ inode = back->inode;
+ ASSERT(inode);
+ page = page_find(inode, offset);
+ }
+ ASSERT(page);
+
+ /* Copy already happened, just link */
+ if (page->usage == 1 && page->frame != zeroFrame) {
+ *pg |= PTE_WRITE;
+ return;
+ }
+ /* Put that page, and create a new one */
+ *pg = 0;
+ alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1);
+ copy_page_frame((void *) PG_ADDR(page->frame),
+ (void *) PG_ADDR(*pg));
+ page_remove(inode, page);
+ page = page_create(front->inode, PG_ADDR(*pg), offset);
+}
+
+/* Handle a not-present read page fault */
+static void
+not_present_read(VMRegion *region, uintptr_t addr)
+{
+ Page *page;
+ File *front = region->front,
+ *back = region->back;
+ off_t offset = ((addr & ~0xFFF) - region->start) + region->offset;
+ page_t *pg = get_page((void *) addr);
+
+ /* Handle uninitialised anonymous regions */
+ if (!front && (region->flags & MAP_ANONYMOUS)) {
+ front = kmalloc(sizeof(File));
+ front->inode = inode_get(kmalloc(sizeof(Inode)));
+ front->ops = &tmpfsFileOps;
+ region->front = file_get(front);
+ }
+
+ /* Attempt to use front */
+ if (front) {
+ page = page_find(front->inode, offset);
+ if (page) {
+ page_get(page);
+ alloc_page(pg, PTE_PRESENT | PTE_USER, page->frame);
+ return;
+ }
+ if (region->flags & MAP_ANONYMOUS) {
+ /* Must be anonymous, zero-fill */
+ alloc_page(pg, PTE_PRESENT | PTE_USER, zeroFrame);
+ page_create(front->inode, zeroFrame, offset);
+ return;
+ }
+ }
+
+ /* Use back */
+ ASSERT(back);
+ page = page_find(back->inode, offset);
+ if (page) {
+ page_get(page);
+ alloc_page(pg, PTE_PRESENT | PTE_USER, page->frame);
+ return;
+ }
+ /* Create new block cache entry */
+ alloc_page(pg, PTE_PRESENT | PTE_USER, -1);
+ file_mmap(back, (void *) PG_ADDR(addr), 0x1000, offset);
+ page_create(back->inode, PG_ADDR(*pg), offset);
+}
+
+/* Handle a not-present write page fault */
+static void
+not_present_write(VMRegion *region, uintptr_t addr)
+{
+ Page *page = NULL;
+ File *front = region->front,
+ *back = region->back;
+ off_t offset = ((addr & ~0xFFF) - region->start) + region->offset;
+ page_t *pg = get_page((void *) addr);
+
+ /* Handle uninitialised anonymous regions */
+ if (!front && ((region->flags & MAP_PRIVATE)
+ || (region->flags & MAP_ANONYMOUS))) {
+ /*
+ * This applies to all private regions, anonymous or not.
+ * Unless the region is shared, the process should write to the
+ * front, which will be the private copy. If the region is
+ * shared, and also anonymous, then the write will occur to the
+ * front too.
+ */
+ front = kmalloc(sizeof(File));
+ front->inode = inode_get(kmalloc(sizeof(Inode)));
+ front->ops = &tmpfsFileOps;
+ region->front = file_get(front);
+ }
+
+ /* Shared region, write-through to back */
+ if (region->flags & MAP_SHARED) {
+ if (region->flags & MAP_ANONYMOUS)
+ back = front;
+ ASSERT(back);
+ page = page_find(back->inode, offset);
+ if (page) {
+ page_get(page);
+ alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE,
+ page->frame);
+ return;
+ }
+ *pg = 0;
+ alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1);
+ memset((void *) PG_ADDR(addr), 0, 0x1000);
+ page_create(back->inode, PG_ADDR(*pg), offset);
+ return;
+ }
+
+ /* Private region, copy to front */
+ alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1);
+ if (front)
+ page = page_find(front->inode, offset);
+ if (page) {
+ copy_page_frame((void *) PG_ADDR(page->frame),
+ (void *) PG_ADDR(*pg));
+ page_remove(front->inode, page);
+ page_create(front->inode, PG_ADDR(*pg), offset);
+ return;
+ }
+
+ /* Anonymous region, zero-fill */
+ if (region->flags & MAP_ANONYMOUS) {
+ memset((void *) PG_ADDR(addr), 0, 0x1000);
+ page_create(front->inode, PG_ADDR(*pg), offset);
+ return;
+ }
+
+ /* Use back */
+ ASSERT(back);
+ page = page_find(back->inode, offset);
+ if (page) {
+ copy_page_frame((void *) PG_ADDR(page->frame),
+ (void *) PG_ADDR(*pg));
+ page_remove(back->inode, page);
+ } else {
+ file_mmap(back, (void *) PG_ADDR(addr), 0x1000, offset);
+ }
+ page_create(front->inode, PG_ADDR(*pg), offset);
+}
+
+/* Page fault handler */
+void
+page_fault_handler(InterruptFrame *frame)
+{
+ uintptr_t addr;
+ asm volatile("mov %%cr2, %0" : "=r" (addr));
+ uint8_t present = frame->errCode & (1 << 0);
+ uint8_t write = frame->errCode & (1 << 1);
+ uint8_t user = frame->errCode & (1 << 2);
+
+ /* Iterate VM Regions */
+ VMRegion *region;
+ for (region = current->vm->regions; region; region = region->next) {
+ if (region->start <= addr && region->end > addr)
+ break;
+ }
+ if (!region && current->stack) {
+ region = current->stack;
+ if (region->start > addr || region->end <= addr)
+ region = NULL;
+ }
+ if (!region && current->tls) {
+ region = current->tls;
+ if (region->start > addr || region->end <= addr)
+ region = NULL;
+ }
+ /* Not in a region */
+ if (!region) {
+ page_t *pg = get_page((void *) addr);
+ panic("Page Fault [%d:%d] (%#.8x -> %#.8x [tbl:%d, pg:%d][%#.8x]), %s, %s, %s",
+ current->tgid, current->tid, frame->eip,
+ addr, (addr>>12) / 1024, (addr>>12) % 1024, *pg,
+ present ? "present" : "not present",
+ write ? "write" : "read",
+ user ? "user" : "kernel");
+ }
+
+ if (user && write && !(region->prot & PROT_WRITE))
+ return (void) kill(current->tgid, SIGSEGV);
+
+ if (present && write)
+ return copy_on_write(region, addr);
+
+ if (write)
+ return not_present_write(region, addr);
+ else
+ return not_present_read(region, addr);
+}
+
+/* Early (pre-VFS/tasking) page fault handler */
+void
+early_page_fault_handler(InterruptFrame *frame)
+{
+ uintptr_t addr;
+ asm volatile("mov %%cr2, %0" : "=r" (addr));
+ if (!PG_ADDR(addr))
+ panic("Null dereference @ %#.8x", frame->eip);
+ alloc_page(get_page((void *) addr),
+ PTE_PRESENT | PTE_WRITE | PTE_GLOBAL, -1);
+}
diff --git a/mem/paging.c b/mem/paging.c
new file mode 100644
index 0000000..b44e843
--- /dev/null
+++ b/mem/paging.c
@@ -0,0 +1,228 @@
+/*
+ * This file contains all functions used to manipulate the virtual address
+ * spaces. It has a static Kernel page directory and table, which it uses to
+ * initialsed an identity-paged environment for the Kernel to work in. This is
+ * enough for the heap to function in. The file also exposes several functions
+ * that allow a page directory to be manipulated and have pages added and moved.
+ * These functions are used by other components of the Kernel - mostly the heap
+ * and IPC. There are also functions to create new and destroy existing page
+ * directories. The paging system also implements features like copy-on-write.
+ */
+
+#include <stdint.h>
+#include <sys/mman.h>
+#include "frame.h"
+#include "heap.h"
+#include "mem.h"
+#include "paging.h"
+#include "../vfs/vfs.h"
+#include "../proc/proc.h"
+#include "../io.h"
+#include "../screen.h"
+
+Spinlock quickPageLock;
+
+page_table_t kernelDir;
+page_t zeroFrame;
+
+void enable_paging(void);
+void disable_paging(void);
+void copy_page_frame(void *src, void *dest);
+
+/* Switch page directory */
+static void
+switch_dir(page_dir_t dir)
+{
+ asm volatile("mov %0, %%cr3" :: "r" (dir));
+}
+
+/* Allocate a page a frame */
+void
+alloc_page(page_t *page, uint16_t flags, page_t frame)
+{
+ page_t *mappings = (void *) 0xFFC00000;
+ page_table_t *tables = (void *) 0xFFFFF000;
+ if ((tables[(page - mappings) / 1024] & PDE_PRESENT) == 0)
+ return;
+ if (*page & 0xFFFFF000)
+ return;
+
+ if (frame == (page_t) -1)
+ frame = alloc_frames(1);
+ if (frame == (page_t) -1)
+ return;
+
+ *page = frame | flags;
+ flush_tlb((page - mappings) << 12);
+}
+
+/* Release a page's frame */
+void
+free_page(page_t *page)
+{
+ page_t *mappings = (void *) 0xFFC00000;
+ page_table_t *tables = (void *) 0xFFFFF000;
+ if ((tables[(page - mappings) / 1024] & PDE_PRESENT) == 0)
+ return;
+ if ((*page & 0xFFFFF000) == 0)
+ return;
+
+ free_frame(*page & 0xFFFFF000);
+ *page = 0x00000000;
+ flush_tlb((page - mappings) << 12);
+}
+
+/* Get Page Table Entry from virtual address */
+page_t *
+get_page(void *addr)
+{
+ page_t *mappings = (void *) 0xFFC00000;
+ page_table_t *tables = (void *) 0xFFFFF000;
+ uint32_t address = (uint32_t) addr >> 12;
+ uint32_t tbl = address / 1024;
+ /* Create table not present */
+ if ((tables[tbl] & PDE_PRESENT) == 0) {
+ tables[tbl] = alloc_frames(1)
+ | PDE_PRESENT | PDE_WRITE | PDE_USER;
+ memset((void *) mappings + (tbl * 0x1000), 0, 0x1000);
+ }
+ return &mappings[address];
+}
+
+/* Clone a page directory */
+page_dir_t
+clone_dir(void)
+{
+ page_table_t *oldTables = (void *) 0xFFFFF000;
+ page_table_t *newTables = (void *) 0xFFFFE000;
+ page_t *oldTable, *newTable;
+ page_dir_t dir = alloc_frames(1);
+ uint16_t i, tbl, pg;
+
+ /* Temporarily link new paging structures into current directory */
+ page_table_t restore = oldTables[1022];
+ oldTables[1022] = dir | PDE_PRESENT | PDE_WRITE;
+ for (i = 0; i < 1024; i++)
+ flush_tlb((uintptr_t) newTables + (0x1000 * i));
+
+ /* Iterate tables */
+ for (tbl = 0; tbl < 1022; tbl++) {
+ if ((oldTables[tbl] & PDE_PRESENT) == 0)
+ continue;
+
+ /* Link Kernel tables */
+ if (tbl < 2 || tbl >= 1008) { /* TODO: define kernel mem */
+ newTables[tbl] = oldTables[tbl];
+ continue;
+ }
+
+ /* Copy everything else */
+ newTables[tbl] = alloc_frames(1) | PG_ATTR(oldTables[tbl]);
+ oldTable = (page_t *) 0xFFC00000 + (tbl * 1024);
+ newTable = (page_t *) 0xFF800000 + (tbl * 1024);
+ for (pg = 0; pg < 1024; pg++) {
+ if ((oldTable[pg] & PTE_PRESENT) == 0) {
+ newTable[pg] = 0;
+ continue;
+ }
+
+ /* Copy-On-Write behaviour */
+ if (tbl < 960) {
+ oldTable[pg] &= ~PTE_WRITE;
+ flush_tlb((uintptr_t) (((tbl * 1024) + pg) << 12));
+ newTable[pg] = oldTable[pg];
+ } else {
+ newTable[pg] = alloc_frames(1) | PG_ATTR(oldTable[pg]);
+ copy_page_frame((void *) PG_ADDR(oldTable[pg]),
+ (void *) PG_ADDR(newTable[pg]));
+ }
+ /* FIXME */
+ }
+ }
+ newTables[1023] = oldTables[1022];
+
+ /* Unlink paging structures */
+ oldTables[1022] = restore;
+ for (i = 0; i < 1024; i++)
+ flush_tlb((uintptr_t) newTables + (0x1000 * i));
+
+ return dir;
+}
+
+/* Free all (copied) pages in the current directory */
+void
+clean_dir(void)
+{
+ page_t *mappings = (void *) 0xFFC00000;
+ page_table_t *tables = (void *) 0xFFFFF000;
+ page_t *pages;
+ uint16_t tbl, pg;
+ for (tbl = 2; tbl < 1008; tbl++) {
+ if ((tables[tbl] & PDE_PRESENT) == 0)
+ continue;
+ pages = mappings + (tbl * 1024);
+ for (pg = 0; pg < 1024; pg++) {
+ if ((pages[pg] & PDE_PRESENT) == 0)
+ continue;
+ free_page(pages + pg);
+ }
+ }
+}
+
+/* Quickly map a page frame into view for temporary use */
+page_t
+quick_page(uintptr_t frame)
+{
+ page_t *mappings = (void *) 0xFFC00000;
+ page_t old;
+ old = mappings[2047];
+ mappings[2047] = PG_ADDR(frame) | PG_ATTR(old);
+ flush_tlb(0x7FF000);
+ return PG_ADDR(old);
+}
+
+/* Initialise paging */
+void
+init_paging(void)
+{
+ zeroFrame = alloc_frames(1);
+ memset((void *) zeroFrame, 0, 0x1000);
+
+ uint16_t tbl, pg;
+ page_t *table;
+ page_table_t *kernelTables;
+ kernelDir = alloc_frames(1);
+ kernelTables = (page_table_t *) kernelDir;
+ for (tbl = 0; tbl < 1024; tbl++)
+ kernelTables[tbl] = 0x00000000 | PDE_WRITE;
+ for (tbl = 0; tbl < 2; tbl++) {
+ table = (void *) alloc_frames(1);
+ kernelTables[tbl] = ((page_table_t) table)
+ | PDE_WRITE | PDE_PRESENT;
+ for (pg = 0; pg < 1024; pg++) {
+ if (!tbl && !pg)
+ continue;
+ table[pg] = (((tbl * 1024) + pg) << 12)
+ | PTE_WRITE | PTE_PRESENT | PTE_GLOBAL;
+ }
+ }
+ /* Map the directory into itself */
+ kernelTables[1023] = kernelDir | PDE_WRITE | PDE_PRESENT;
+
+ /* Use Kernel directory */
+ switch_dir(kernelDir);
+ register_exception(14, early_page_fault_handler);
+ enable_paging();
+
+ /* Identity page the APIC registers */
+ *get_page((void *) lapicPtr) = lapicPtr
+ | PTE_PRESENT | PTE_WRITE | PTE_GLOBAL;
+ *get_page((void *) ioapicPtr) = ioapicPtr
+ | PTE_PRESENT | PTE_WRITE | PTE_GLOBAL;
+
+ /* Allocate Kernel stack */
+ uintptr_t stk;
+ for (stk = 0xF0400000; stk < 0xF0800000; stk += 0x1000)
+ alloc_page(get_page((void *) stk),
+ PTE_PRESENT | PTE_WRITE | PTE_USER, -1);
+}
diff --git a/mem/paging.h b/mem/paging.h
new file mode 100644
index 0000000..a3acaf2
--- /dev/null
+++ b/mem/paging.h
@@ -0,0 +1,58 @@
+#ifndef KERNEL_MEM_PAGING_H
+#define KERNEL_MEM_PAGING_H
+
+#include <stdint.h>
+#include "../proc/proc.h"
+#include "../spinlock.h"
+
+#define PG_ADDR(pg) (pg & 0xFFFFF000)
+#define PG_ATTR(pg) (pg & 0x00000FFF)
+
+#define QUICK_PAGE ((void *) 0x7FF000)
+
+typedef uint32_t page_t;
+typedef uint32_t page_table_t;
+typedef uint32_t page_dir_t;
+
+/* Page flags */
+enum PageFlag {
+ PTE_PRESENT = (1 << 0),
+ PTE_WRITE = (1 << 1),
+ PTE_USER = (1 << 2),
+ PTE_THROUGH = (1 << 3),
+ PTE_NOCACHE = (1 << 4),
+ PTE_ACCESS = (1 << 5),
+ PTE_DIRTY = (1 << 6),
+ PTE_GLOBAL = (1 << 8),
+};
+/* Page Table flags */
+enum PageTableFlag {
+ PDE_PRESENT = (1 << 0),
+ PDE_WRITE = (1 << 1),
+ PDE_USER = (1 << 2),
+ PDE_THROUGH = (1 << 3),
+ PDE_NOCACHE = (1 << 4),
+ PDE_ACCESS = (1 << 5),
+};
+
+/* Flush Translation Lookaside Buffer */
+static inline void
+flush_tlb(uintptr_t addr)
+{
+ asm volatile("invlpg (%0)" :: "r" (addr) : "memory");
+}
+
+extern Spinlock quickPageLock;
+extern page_t zeroFrame;
+
+void init_paging(void);
+void early_page_fault_handler(InterruptFrame *frame);
+void page_fault_handler(InterruptFrame *frame);
+void alloc_page(page_t *page, uint16_t flags, page_t frame);
+void free_page(page_t *page);
+page_t *get_page(void *addr);
+page_dir_t clone_dir(void);
+void clean_dir(void);
+page_t quick_page(page_t frame);
+
+#endif
diff --git a/mem/user.c b/mem/user.c
new file mode 100644
index 0000000..b9ed273
--- /dev/null
+++ b/mem/user.c
@@ -0,0 +1,37 @@
+/*
+ * This file handles safely getting data from userspace for the Kernel. This is
+ * for security reasons to prevent the user from tricking a syscall into
+ * manipulating/leaking Kernel data structures. User memory is defined as any
+ * address range that completely sits in a Virtual Memory Region.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "vm.h"
+#include "../task/task.h"
+
+/* User can read this address */
+int
+verify_access(const void *addr, size_t len, int prot)
+{
+ if (!in_syscall() || !addr || !len)
+ return 1;
+
+ /* Iterate all user memory regions */
+ VMRegion *head;
+ for (head = current->vm->regions; head; head = head->next) {
+ if ((uintptr_t) addr >= head->start
+ && ((uintptr_t) addr + len) < head->end)
+ break;
+ }
+ if (!head) {
+ head = current->stack;
+ if ((uintptr_t) addr < head->start
+ || ((uintptr_t) addr + len) >= head->end)
+ head = NULL;
+ }
+ /* No fitting region */
+ if (!head)
+ return 0;
+ return (head->prot & prot);
+}
diff --git a/mem/vm.c b/mem/vm.c
new file mode 100644
index 0000000..46b40fc
--- /dev/null
+++ b/mem/vm.c
@@ -0,0 +1,261 @@
+/*
+ * This file handles the Virtual Memory system for processes. It splits each
+ * process into several memory regions, and points each of those reasons to a
+ * memory object. Each object can be modified on demand, and can be made up of
+ * several pages, and backed by various stores. This allows objects such as
+ * files to be easily mapped into an address space, or for large regions to be
+ * shared between processes.
+ */
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include "heap.h"
+#include "paging.h"
+#include "vm.h"
+#include "../vfs/vfs.h"
+#include "../vfs/inode.h"
+#include "../vfs/cache.h"
+#include "../vfs/tmpfs/fs.h"
+#include "../task/task.h"
+#include "../proc/proc.h"
+#include "../screen.h"
+
+/* Unmap a range of pages from page directory */
+static void
+unmap_page_range(uintptr_t start, uintptr_t end)
+{
+ uintptr_t addr;
+ for (addr = start; addr < end; addr += 0x1000) {
+ *get_page((void *) addr) = 0x00000000;
+ flush_tlb(addr);
+ }
+}
+
+/* Remove a range of pages from a region's page cache */
+static void
+remove_cache_range(VMRegion *region, uintptr_t start, uintptr_t end)
+{
+ Page *page;
+ Inode *inode;
+ uintptr_t p;
+ for (p = 0; p < end - start; p += 0x1000) {
+ page = NULL;
+ if (!page && region->front) {
+ inode = region->front->inode;
+ page = page_find(inode, region->offset + p);
+ }
+ if (!page && region->back) {
+ inode = region->back->inode;
+ page = page_find(inode, region->offset + p);
+ }
+ if (page)
+ page_remove(inode, page);
+ }
+}
+
+/* Create a new VM Region */
+VMRegion *
+vm_create_region(void *addr, size_t len, int prot, int flags, off_t offset,
+ File *back)
+{
+ /* Create new region */
+ VMRegion *head, *next, *insert, *region = kmalloc(sizeof(VMRegion));
+ region->end = (uintptr_t) addr + len;
+ if (region->end % 0x1000)
+ region->end += 0x1000 - (region->end % 0x1000);
+ region->start = (uintptr_t) addr & ~0xFFF;
+ region->prot = prot;
+ region->flags = flags;
+ region->offset = offset;
+ region->front = NULL;
+ region->back = NULL;
+ if (back)
+ region->back = file_get(back);
+
+ /* Create new list */
+ if (!current->vm->regions) {
+ current->vm->regions = region;
+ return region;
+ }
+
+ /* Fix overlaps */
+ uintptr_t p;
+ for (head = current->vm->regions; head; head = next) {
+ next = head->next; /* head may be destroyed during iteration */
+ if (head->start >= region->end || head->end <= region->start)
+ continue;
+
+ /* Middle eclipsed */
+ if (head->start < region->start && head->end > region->end) {
+ /* Create region after current */
+ insert = kmalloc(sizeof(VMRegion));
+ insert->end = head->end;
+ insert->start = head->end = region->start;
+ insert->prot = head->prot;
+ insert->flags = head->flags;
+ insert->offset = head->offset;
+ insert->offset += (insert->start - head->start);
+ if (head->front)
+ insert->front = file_get(head->front);
+ if (head->back)
+ insert->back = file_get(head->back);
+ /* Insert into list */
+ insert->next = head->next;
+ head->next = insert;
+ insert->prev = head;
+ insert->next->prev = insert;
+ /* Inserted region will be dealt with on next pass */
+ }
+ /* Start eclipsed */
+ if (head->start >= region->start && head->end > region->end) {
+ unmap_page_range(head->start, region->end);
+ remove_cache_range(head, head->start, region->end);
+ head->start = region->end;
+ head->offset += (region->end - head->start);
+ }
+ /* End eclipsed */
+ if (head->start < region->start && head->end <= region->end) {
+ unmap_page_range(region->start, head->end);
+ remove_cache_range(head, region->start, head->end);
+ head->end = region->start;
+ }
+ /* Total eclipse */
+ if (head->start >= region->start && head->end <= region->end)
+ vm_destroy_region(head);
+ }
+ /* Add to ordered list */
+ for (head = current->vm->regions; head->next; head = head->next)
+ if (head->end <= region->start
+ && head->next->start >= region->end)
+ break;
+ region->next = head->next;
+ region->prev = head;
+ region->prev->next = region;
+ if (region->next)
+ region->next->prev = region;
+
+ return region;
+}
+
+/* Remove a VM Region */
+void
+vm_remove_region(VMRegion *region)
+{
+ /* Remove from list */
+ if (current->vm->regions == region)
+ current->vm->regions = region->next;
+ if (region->prev)
+ region->prev->next = region->next;
+ if (region->next)
+ region->next->prev = region->prev;
+// region->prev = region->next = NULL;
+}
+
+/* Destroy a VM Region */
+void
+vm_destroy_region(VMRegion *region)
+{
+ /* Unlink files */
+ if (region->front)
+ file_put(region->front);
+ if (region->back)
+ file_put(region->back);
+
+ /* Clean page directory */
+ unmap_page_range(region->start, region->end);
+
+ vm_remove_region(region);
+ kfree(region);
+}
+
+/* Clone a set of VM Regions */
+VMRegion *
+vm_clone_regions(VMRegion *head)
+{
+ if (!head)
+ return NULL;
+
+ VMRegion *newhead = NULL, *newcurr, *newprev = NULL;
+ VMRegion *curr = head;
+ off_t i;
+ Page *page;
+ File *file;
+
+ while (curr) {
+ newcurr = kmalloc(sizeof(VMRegion));
+ if (!newhead)
+ newhead = newcurr;
+
+ newcurr->prev = newprev;
+ newcurr->next = NULL;
+ if (newprev)
+ newprev->next = newcurr;
+
+ newcurr->start = curr->start;
+ newcurr->end = curr->end;
+ newcurr->prot = curr->prot;
+ newcurr->flags = curr->flags;
+ newcurr->offset = curr->offset;
+ /* Front (anonymous regions) */
+ if (curr->front && (curr->flags & MAP_PRIVATE)) {
+ /* Copy the file */
+ file = kmalloc(sizeof(File));
+ file->inode = inode_get(kmalloc(sizeof(Inode)));
+ file->ops = &tmpfsFileOps;
+ newcurr->front = file_get(file);
+ for (i = 0; i < curr->end - curr->start; i += 0x1000) {
+ page = page_find(curr->front->inode,
+ i + curr->offset);
+ if (page)
+ page_add(file->inode, page);
+ }
+ } else if (curr->front) {
+ newcurr->front = file_get(curr->front);
+ }
+ /* Back (always a file) */
+ if (curr->back)
+ newcurr->back = file_get(curr->back);
+
+ curr = curr->next;
+ newprev = newcurr;
+ };
+
+ return newhead;
+}
+
+/* Map an object into memory */
+void *
+mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
+{
+ VMRegion *region;
+
+ /* Find gap big enough */
+ if (!addr) {
+ for (region = current->vm->regions;
+ region->next; region = region->next) {
+ if (region->next->start - region->end >= len)
+ break;
+ }
+ addr = (void *) region->end;
+ }
+
+ /* Map anonymous memory */
+ if (flags & MAP_ANONYMOUS) {
+ region = vm_create_region(addr, len, prot, flags, 0, NULL);
+ goto end;
+ }
+
+ /* Map a file */
+ if (fildes < 0 || fildes >= NFILES)
+ return (void *) -EBADF;
+ File *file = current->files->fd[fildes];
+ if (!file)
+ return (void *) -EBADF;
+ region = vm_create_region(addr, len, prot, flags, off, file);
+end:
+ if (!region)
+ return (void *) -ENOMEM;
+ return (void *) region->start;
+}
diff --git a/mem/vm.h b/mem/vm.h
new file mode 100644
index 0000000..eab852d
--- /dev/null
+++ b/mem/vm.h
@@ -0,0 +1,37 @@
+#ifndef KERNEL_MEM_VM_H
+#define KERNEL_MEM_VM_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include "../vfs/vfs.h"
+
+typedef struct VirtualMemory VirtualMemory;
+typedef struct VMRegion VMRegion;
+typedef struct VMObject VMObject;
+
+/* Virtual Memory Namespace */
+struct VirtualMemory {
+ VMRegion *regions;
+ refcount_t usage;
+};
+
+/* Structure for a Virtual Memory Map Entry */
+struct VMRegion {
+ VMRegion *prev, *next;
+ uintptr_t start, end;
+ int prot;
+ int flags;
+ off_t offset;
+ File *front, *back;
+};
+
+VMRegion *vm_create_region(void *addr, size_t len, int prot, int flags,
+ off_t offset, File *back);
+void vm_remove_region(VMRegion *region);
+void vm_destroy_region(VMRegion *region);
+VMRegion *vm_clone_regions(VMRegion *head);
+
+int verify_access(const void *addr, size_t len, int prot);
+
+#endif