Orion
Barry Importing existing Orion kernel d41a53c (2 years, 4 months ago)diff --git a/mem/frame.c b/mem/frame.c new file mode 100644 index 0000000..ee88fd8 --- /dev/null +++ b/mem/frame.c @@ -0,0 +1,207 @@ +/* + * This file handles allocating and freeing of page frames. It it the core of + * the physical memory manager. The Kernel heap and paging systems both sit on + * top of this. It keeps track of which frames of physical memory are free and + * hands them out as needed. To do this it keeps a simple bitmap of the frames. + */ + +#include <stdint.h> +#include "mem.h" +#include "paging.h" +#include "frame.h" +#include "../screen.h" + +#define INDEX(a) ((a)/32) +#define OFFSET(a) ((a)%32) + +/* Descriptor of a Frame Region */ +typedef struct FrameRegion FrameRegion; +struct FrameRegion { + uint32_t base; + size_t numFrames; + size_t usedFrames; + FrameRegion *next; + uint32_t bitmap[]; +}; + +size_t numFrames, usedFrames; +FrameRegion *regions; + +/* Count the number of pages a number of bytes occupies */ +static size_t +page_count(size_t bytes) +{ + size_t pages = bytes/0x1000; + if (bytes & 0xFFF) + pages++; + return pages; +} + +/* Set a bit in the frame bitset */ +static void +set_frame(FrameRegion *region, uint32_t idx) +{ + region->bitmap[INDEX(idx)] |= (1 << OFFSET(idx)); + region->usedFrames++; + usedFrames++; +} + +/* Clear a bit in the frame bitset */ +static void +clear_frame(FrameRegion *region, uint32_t idx) +{ + region->bitmap[INDEX(idx)] &= ~(1 << OFFSET(idx)); + region->usedFrames--; + usedFrames--; +} + +/* Test a bit in the frame bitset */ +static uint32_t +test_frame(FrameRegion *region, uint32_t idx) +{ + return (region->bitmap[INDEX(idx)] & (1 << OFFSET(idx))); +} + +/* Get n bits from position p */ +static inline uint32_t +get_bits(uint32_t x, uint8_t p, uint8_t n) +{ + return (x >> (p+1-n)) & ~(~0 << n); +} + +/* Find first free frame */ +static uint32_t +find_frames(FrameRegion *region, size_t frames) +{ + /* TODO: best fit */ + /* FIXME: frames can be at most 32 */ + uint32_t i, j; + for (i = 0; i < INDEX(region->numFrames); i++) { + if (!~region->bitmap[i]) + continue; + for (j = 0; j < 32; j++) { + if (get_bits(~region->bitmap[i], j, frames) == ~(~0 << + frames)) + return (i*32+j)-frames+1; + /* TODO: check across uint32_t boundaries */ + } + } + return (uint32_t) -1; +} + +/* Allocate a set of contiguous page frames */ +uint32_t +alloc_frames(size_t frames) +{ + uint32_t idx; + size_t i; + /* Walk the regions, first fit */ + FrameRegion *region; + for (region = regions; region; region = region->next) { + idx = find_frames(region, frames); + if (idx != -1) + break; + } + if (idx == -1) + return -1; + + for (i = 0; i < frames; i++) + set_frame(region, idx + i); + + return (uint32_t) region->base + (idx << 12); +} + +/* Free a page frame */ +void +free_frame(uint32_t frame) +{ + /* Walk the regions */ + FrameRegion *region = regions; + while (region) { + if ((uint32_t) region > frame) { + frame -= region->base; + clear_frame(region, frame >> 12); + break; + } + region = region->next; + } +} + +/* Setup the frame allocator */ +void +init_frames(uint32_t memMapSize, struct E820Entry *memMap) +{ + /* Relocate the memory map */ + struct E820Entry *top = (void *) 0x20000; + if (memMap < top) { + memcpy(top, memMap, memMapSize); + memMap = (void *) top; + } + + /* + * When the OS starts, the bootloader passes the Kernel a map of memory. + * This map must be read, so the memory manager can avoid bad areas of + * memory, that as mapped to hardware, ACPI, etc. + * The frame allocator should only create a bitmap for frames that it + * can actually allocate. This means setting avoiding bad areas, and + * avoiding the Kernel. + */ + uint32_t i, j; + FrameRegion *prev = 0, *head = regions; + uint32_t bumpAlloc = 0x1000; + for (i = 0; i < memMapSize / sizeof(struct E820Entry); i++) { + kprintf("MemMap: base=%#.8x%.8x, length=%#.8x%.8x, type=%d", + memMap[i].baseHigh, memMap[i].base, + memMap[i].lengthHigh, memMap[i].length, + memMap[i].type); + if (memMap[i].baseHigh > 0 + || memMap[i].base > 0xFFFFFFFF + || memMap[i].type != 1) + continue; + + /* Usable region - create bitmap */ + size_t frameSize = memMap[i].length / 0x1000; + size_t bitmapSize = (frameSize / 8); + head = (FrameRegion *) bumpAlloc; + bumpAlloc += bitmapSize + sizeof(FrameRegion); + head->base = memMap[i].base; + head->numFrames = frameSize; + head->usedFrames = 0; + memset(head->bitmap, 0, bitmapSize); + /* Set top bits to 1, so they're never allocated */ + for (j = OFFSET(head->numFrames); j < 32; j++) + set_frame(head, head->numFrames + j); + if (prev) prev->next = head; + else regions = head; + prev = head; + } + + /* Regions to be remapped */ + struct {uint32_t start, end;} remaps[] = { + {.start = 0x0000, .end = bumpAlloc}, /* PMM bitmaps */ + {.start = 0xB0000, .end = 0xC0000 }, /* VGA memory */ + {.start = 0x100000, .end = 0x180000 }, /* Kernel */ + }; + + kprintf("Bump allocator top @ %#.8x", bumpAlloc); + + /* Check bitmaps */ + usedFrames = 0; + numFrames = 0; + FrameRegion *region = regions; + uint32_t regionEnd; + while (region) { + numFrames += region->numFrames; + regionEnd = region->base + (region->numFrames * 0x1000); + /* Iterate the remaps[] to find overlapping regions */ + for (i = 0; i < sizeof(remaps)/sizeof(remaps[0]); i++) + for (j = remaps[i].start; j < remaps[i].end + && j >= region->base && j < regionEnd; + j += 0x1000) + set_frame(region, (j - region->base) >> 12); + region = region->next; + } + + if (numFrames < 1024) /* 4MB */ + panic("Not enough memory"); +} diff --git a/mem/frame.h b/mem/frame.h new file mode 100644 index 0000000..ba68567 --- /dev/null +++ b/mem/frame.h @@ -0,0 +1,25 @@ +#ifndef KERNEL_MEM_FRAME_H +#define KERNEL_MEM_FRAME_H + +#include <stdint.h> +#include <stddef.h> + +/* Structure of an E820 entry */ +struct E820Entry { + uint32_t size; + uint32_t base, baseHigh; + uint32_t length, lengthHigh; + uint32_t type; +} __attribute__((packed)); + +/* Structure of an E820 Entry */ +//struct E820Entry { +// uint64_t base, length; +// uint32_t type, attr; +//} __attribute__((packed)); + +uint32_t alloc_frames(size_t frames); +void free_frame(uint32_t frame); +void init_frames(uint32_t memMapSize, struct E820Entry *memMap); + +#endif diff --git a/mem/heap.c b/mem/heap.c new file mode 100644 index 0000000..9f9bbe2 --- /dev/null +++ b/mem/heap.c @@ -0,0 +1,125 @@ +/* + * This file contains the functions related to the Kernel's heap. It uses a + * simple method of allocating and freeing blocks from a pool in the Kernel's + * memory space. This heap will be present in every Page Directory, so can be + * used to store any Kernel data structures. + */ + +#include <stdint.h> +#include <stddef.h> +#include "mem.h" +#include "heap.h" +#include "frame.h" +#include "../vfs/cache.h" +#include "../screen.h" +#include "../spinlock.h" + +#define KHEAP_START 0x200000 /* 2 MB */ +#define KHEAP_END 0x800000 /* 8 MB */ +#define BLOCK_SIZE 16 + +/* Structure for a Memory Header */ +typedef struct Header { + struct Header *next, *prev; + size_t size; + char magic[4]; +} Header; /* 16 bytes */ + +size_t objectsAllocated = 0; +size_t freeSpace = KHEAP_END - KHEAP_START; + +/* Allocate a region of the heap */ +void * +kmalloc(size_t size) +{ + size_t blockSize, gapSize; + uintptr_t blockEnd; + Header *prev, *head, *next; + head = prev = (void *) KHEAP_START; + next = NULL; + objectsAllocated++; + + /* Minimum allocation */ + if (size % BLOCK_SIZE) + size += BLOCK_SIZE - (size % BLOCK_SIZE); + freeSpace -= size + sizeof(Header); + + /* Low-memory VFS cache reaper */ + if (freeSpace < 0x10000) /* 64 KB */ + cache_reaper(); + + /* Block does not exist, create heap */ + if (head->prev != head) { + head->prev = head; + head->next = NULL; + head->size = size; + memcpy(head->magic, "HEAP", 4); + memset((void *) (head + 1), 0, size); + return (void *) (head + 1); + } + + /* Find gap */ + while (head->next) { + next = head->next; + blockSize = sizeof(Header) + head->size; + blockEnd = (uintptr_t) head + blockSize; + gapSize = (size_t) next - blockEnd; + prev = head; + + /* Fit in gap */ + if (gapSize >= size + sizeof(Header)) { + head = (void *) blockEnd; + head->next = next; + head->prev = prev; + prev->next = head; + next->prev = head; + head->size = size; + memcpy(head->magic, "HEAP", 4); + memset((void *) (head + 1), 0, size); + return (void *) (head + 1); + } + + head = head->next; + } + + /* Add to end */ + blockSize = sizeof(Header) + head->size; + blockEnd = (uintptr_t) head + blockSize; + gapSize = (size_t) KHEAP_END - blockEnd; + /* Fit in gap */ + if (gapSize >= size + sizeof(Header)) { + prev = head; + head = (void *) blockEnd; + head->next = NULL; + head->prev = prev; + prev->next = head; + head->size = size; + memcpy(head->magic, "HEAP", 4); + memset((void *) (head + 1), 0, size); + return (void *) (head + 1); + } + + panic("Kernel heap exhausted"); +} + +/* Free an allocated region of the heap */ +void +_kfree(void *addr, char *file, int line) +{ + Header *prev, *head, *next; + head = (Header *) addr - 1; + objectsAllocated--; + freeSpace += head->size + sizeof(Header); + + if (memcmp(head->magic, "HEAP", 4)) + panic("Bad Kernel heap reference\n" + "Invalid target @ %#.8x (%s:%d)", addr, file, line); + prev = head->prev; + next = head->next; + memset(head, 0, sizeof(Header)); + + if (prev != head) + prev->next = next; + if (next) + next->prev = prev; +} diff --git a/mem/heap.h b/mem/heap.h new file mode 100644 index 0000000..1923583 --- /dev/null +++ b/mem/heap.h @@ -0,0 +1,11 @@ +#ifndef KERNEL_MEM_HEAP_H +#define KERNEL_MEM_HEAP_H + +#include <stdint.h> +#include <stddef.h> + +void *kmalloc(size_t size); +void _kfree(void *addr, char *file, int line); +#define kfree(a) _kfree(a, __FILE__, __LINE__) + +#endif diff --git a/mem/mem.c b/mem/mem.c new file mode 100644 index 0000000..80a7c89 --- /dev/null +++ b/mem/mem.c @@ -0,0 +1,106 @@ +/* + * This file contains a few routines for the manipulation of memory and strings. + * The functions would normally be part of a C library, but this is for the + * Kernel. The functions are standalone, and have no dependencies - they can be + * called immediately after boot. + */ + +#include <stdint.h> +#include <stddef.h> + +/* Fill a region of memory with the specified byte */ +void * +memset(void *s, int c, size_t n) +{ + unsigned char *a = s; + if (n > 0) { + while (n-- > 0) + *a++ = c; + } + return s; +} + +/* Copy one region of memory to another */ +void * +memcpy(void *dest, void *src, size_t n) +{ + unsigned char *a = (unsigned char *) dest, + *b = (unsigned char *) src; + while (n-- > 0) + *a++ = *b++; + return dest; +} + +/* Compare two regions of memory */ +int +memcmp(void *s1, void *s2, size_t n) +{ + unsigned char *a = (unsigned char *) s1, + *b = (unsigned char *) s2; + while (n-- > 0) + if (*a++ != *b++) + return a[-1] - b[-1]; + return 0; +} + +/* Find the length of a string */ +size_t +strlen(char *s) +{ + if (!s) + return 0; + size_t i; + for (i = 0; s[i]; i++); + return i; +} + +/* Find the length of a string up to maximum */ +size_t +strnlen(char *s, size_t maxlen) +{ + if (!s) + return 0; + size_t i; + for (i = 0; s[i] && i <= maxlen; i++); + return i; +} + +/* Compare two strings */ +int +strcmp(char *s1, char *s2) +{ + for (; *s1 == *s2 && *s1 && *s2; s1++, s2++); + return *(unsigned char *) s1 - *(unsigned char *) s2; +} + +/* Compare two limited strings */ +int +strncmp(char *s1, char *s2, size_t n) +{ + if (!n--) return 0; + for (; *s1 == *s2 && *s1 && *s2 && n; s1++, s2++, n--); + return *(unsigned char *) s1 - *(unsigned char *) s2; +} + +/* Copy a string */ +char * +strcpy(char *dest, const char *src) +{ + char *ret = dest; + while (*src) + *dest++ = *src++; + *dest = '\0'; + return ret; +} + +/* Copy a limited string */ +char * +strncpy(char *dest, const char *src, size_t n) +{ + char *ret = dest; + while (*src && n--) + *dest++ = *src++; + *dest = '\0'; + return ret; +} + diff --git a/mem/mem.h b/mem/mem.h new file mode 100644 index 0000000..fa6dfc4 --- /dev/null +++ b/mem/mem.h @@ -0,0 +1,17 @@ +#ifndef KERNEL_MEM_H +#define KERNEL_MEM_H + +#include <stdint.h> +#include <stddef.h> + +void *memset(void *s, int c, size_t n); +void *memcpy(void *dest, void *src, size_t n); +int memcmp(void *s1, void *s2, size_t n); +size_t strlen(char *s); +size_t strnlen(char *s, size_t maxlen); +int strcmp(const char *s1, const char *s2); +int strncmp(const char *s1, const char *s2, size_t n); +char *strcpy(char *dest, const char *src); +char *strncpy(char *dest, const char *src, size_t n); + +#endif diff --git a/mem/page.S b/mem/page.S new file mode 100644 index 0000000..5e1dd4a --- /dev/null +++ b/mem/page.S @@ -0,0 +1,60 @@ +; This file implements some simple low level assembly helper functions for the +; paging system. The functions are done in assembly for the speed benefit. +; They may be called frequently by the memory manager, so the benefit will be +; significant. + +[bits 32] + +; Enable paging +[global enable_paging] +enable_paging: + mov edx, cr0 + or edx, 0x80000000 + mov cr0, edx + ret + +; Disable paging +[global disable_paging] +disable_paging: + mov edx, cr0 + and edx, 0x7FFFFFFF + mov cr0, edx + ret + +; Copy the contents of a page frame +[global copy_page_frame] +copy_page_frame: +; push ebx +; pushf +; mov esi, [esp + 12] +; mov edi, [esp + 16] +; call disable_paging +; mov ecx, 1024 +; rep movsd +; call enable_paging +; popf +; pop ebx +; ret + push ebx + pushf + cli + mov ebx, [esp + 12] + mov ecx, [esp + 16] + mov edx, cr0 + and edx, 0x7FFFFFFF + mov cr0, edx + mov edx, 1024 +.loop: + mov eax, [ebx] + mov [ecx], eax + add ebx, 4 + add ecx, 4 + dec edx + jnz .loop + mov edx, cr0 + or edx, 0x80000000 + mov cr0, edx + sti + popf + pop ebx + ret diff --git a/mem/pagefault.c b/mem/pagefault.c new file mode 100644 index 0000000..14219b7 --- /dev/null +++ b/mem/pagefault.c @@ -0,0 +1,264 @@ +/* + * This is the page fault handler. It handles/dispatches all handlers for page + * faults. This includes various tasking functions. + */ + +#include <stdint.h> +#include <signal.h> +#include "paging.h" +#include "../vfs/cache.h" +#include "../vfs/inode.h" +#include "../vfs/tmpfs/fs.h" +#include "../mem/heap.h" +#include "../mem/mem.h" +#include "../task/task.h" +#include "../proc/proc.h" +#include "../screen.h" + +extern size_t numFrames, usedFrames; + +void copy_page_frame(void *src, void *dest); + +/* Copy-On-Write */ +static void +copy_on_write(VMRegion *region, uintptr_t addr) +{ + Page *page = NULL; + File *front = region->front, + *back = region->back; + off_t offset = ((addr & ~0xFFF) - region->start) + region->offset; + page_t *pg = get_page((void *) addr); + + /* Create front if it doesn't exist and is needed */ + uint8_t private = region->flags & MAP_PRIVATE; + uint8_t sharedanon = (region->flags & MAP_SHARED) && + (region->flags & MAP_ANONYMOUS); + uint8_t created = 0; + if (!front && (private || sharedanon)) { + /* + * A private mapping will always write to the front. A shared + * mapping will write to the back. If a shared mapping is + * anonymous, then the back is the front. The front must be + * created if it is required - which means if the mapping is + * private, or if the mapping is shared & anonymous. + */ + front = kmalloc(sizeof(File)); + front->inode = inode_get(kmalloc(sizeof(Inode))); + front->ops = &tmpfsFileOps; + region->front = file_get(front); + created++; + } + + /* Find original page frame */ + Inode *inode; + if (!page && front) { + inode = front->inode; + ASSERT(inode); + page = page_find(inode, offset); + } + if (!page && back) { + inode = back->inode; + ASSERT(inode); + page = page_find(inode, offset); + } + ASSERT(page); + + /* Copy already happened, just link */ + if (page->usage == 1 && page->frame != zeroFrame) { + *pg |= PTE_WRITE; + return; + } + /* Put that page, and create a new one */ + *pg = 0; + alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1); + copy_page_frame((void *) PG_ADDR(page->frame), + (void *) PG_ADDR(*pg)); + page_remove(inode, page); + page = page_create(front->inode, PG_ADDR(*pg), offset); +} + +/* Handle a not-present read page fault */ +static void +not_present_read(VMRegion *region, uintptr_t addr) +{ + Page *page; + File *front = region->front, + *back = region->back; + off_t offset = ((addr & ~0xFFF) - region->start) + region->offset; + page_t *pg = get_page((void *) addr); + + /* Handle uninitialised anonymous regions */ + if (!front && (region->flags & MAP_ANONYMOUS)) { + front = kmalloc(sizeof(File)); + front->inode = inode_get(kmalloc(sizeof(Inode))); + front->ops = &tmpfsFileOps; + region->front = file_get(front); + } + + /* Attempt to use front */ + if (front) { + page = page_find(front->inode, offset); + if (page) { + page_get(page); + alloc_page(pg, PTE_PRESENT | PTE_USER, page->frame); + return; + } + if (region->flags & MAP_ANONYMOUS) { + /* Must be anonymous, zero-fill */ + alloc_page(pg, PTE_PRESENT | PTE_USER, zeroFrame); + page_create(front->inode, zeroFrame, offset); + return; + } + } + + /* Use back */ + ASSERT(back); + page = page_find(back->inode, offset); + if (page) { + page_get(page); + alloc_page(pg, PTE_PRESENT | PTE_USER, page->frame); + return; + } + /* Create new block cache entry */ + alloc_page(pg, PTE_PRESENT | PTE_USER, -1); + file_mmap(back, (void *) PG_ADDR(addr), 0x1000, offset); + page_create(back->inode, PG_ADDR(*pg), offset); +} + +/* Handle a not-present write page fault */ +static void +not_present_write(VMRegion *region, uintptr_t addr) +{ + Page *page = NULL; + File *front = region->front, + *back = region->back; + off_t offset = ((addr & ~0xFFF) - region->start) + region->offset; + page_t *pg = get_page((void *) addr); + + /* Handle uninitialised anonymous regions */ + if (!front && ((region->flags & MAP_PRIVATE) + || (region->flags & MAP_ANONYMOUS))) { + /* + * This applies to all private regions, anonymous or not. + * Unless the region is shared, the process should write to the + * front, which will be the private copy. If the region is + * shared, and also anonymous, then the write will occur to the + * front too. + */ + front = kmalloc(sizeof(File)); + front->inode = inode_get(kmalloc(sizeof(Inode))); + front->ops = &tmpfsFileOps; + region->front = file_get(front); + } + + /* Shared region, write-through to back */ + if (region->flags & MAP_SHARED) { + if (region->flags & MAP_ANONYMOUS) + back = front; + ASSERT(back); + page = page_find(back->inode, offset); + if (page) { + page_get(page); + alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, + page->frame); + return; + } + *pg = 0; + alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1); + memset((void *) PG_ADDR(addr), 0, 0x1000); + page_create(back->inode, PG_ADDR(*pg), offset); + return; + } + + /* Private region, copy to front */ + alloc_page(pg, PTE_PRESENT | PTE_USER | PTE_WRITE, -1); + if (front) + page = page_find(front->inode, offset); + if (page) { + copy_page_frame((void *) PG_ADDR(page->frame), + (void *) PG_ADDR(*pg)); + page_remove(front->inode, page); + page_create(front->inode, PG_ADDR(*pg), offset); + return; + } + + /* Anonymous region, zero-fill */ + if (region->flags & MAP_ANONYMOUS) { + memset((void *) PG_ADDR(addr), 0, 0x1000); + page_create(front->inode, PG_ADDR(*pg), offset); + return; + } + + /* Use back */ + ASSERT(back); + page = page_find(back->inode, offset); + if (page) { + copy_page_frame((void *) PG_ADDR(page->frame), + (void *) PG_ADDR(*pg)); + page_remove(back->inode, page); + } else { + file_mmap(back, (void *) PG_ADDR(addr), 0x1000, offset); + } + page_create(front->inode, PG_ADDR(*pg), offset); +} + +/* Page fault handler */ +void +page_fault_handler(InterruptFrame *frame) +{ + uintptr_t addr; + asm volatile("mov %%cr2, %0" : "=r" (addr)); + uint8_t present = frame->errCode & (1 << 0); + uint8_t write = frame->errCode & (1 << 1); + uint8_t user = frame->errCode & (1 << 2); + + /* Iterate VM Regions */ + VMRegion *region; + for (region = current->vm->regions; region; region = region->next) { + if (region->start <= addr && region->end > addr) + break; + } + if (!region && current->stack) { + region = current->stack; + if (region->start > addr || region->end <= addr) + region = NULL; + } + if (!region && current->tls) { + region = current->tls; + if (region->start > addr || region->end <= addr) + region = NULL; + } + /* Not in a region */ + if (!region) { + page_t *pg = get_page((void *) addr); + panic("Page Fault [%d:%d] (%#.8x -> %#.8x [tbl:%d, pg:%d][%#.8x]), %s, %s, %s", + current->tgid, current->tid, frame->eip, + addr, (addr>>12) / 1024, (addr>>12) % 1024, *pg, + present ? "present" : "not present", + write ? "write" : "read", + user ? "user" : "kernel"); + } + + if (user && write && !(region->prot & PROT_WRITE)) + return (void) kill(current->tgid, SIGSEGV); + + if (present && write) + return copy_on_write(region, addr); + + if (write) + return not_present_write(region, addr); + else + return not_present_read(region, addr); +} + +/* Early (pre-VFS/tasking) page fault handler */ +void +early_page_fault_handler(InterruptFrame *frame) +{ + uintptr_t addr; + asm volatile("mov %%cr2, %0" : "=r" (addr)); + if (!PG_ADDR(addr)) + panic("Null dereference @ %#.8x", frame->eip); + alloc_page(get_page((void *) addr), + PTE_PRESENT | PTE_WRITE | PTE_GLOBAL, -1); +} diff --git a/mem/paging.c b/mem/paging.c new file mode 100644 index 0000000..b44e843 --- /dev/null +++ b/mem/paging.c @@ -0,0 +1,228 @@ +/* + * This file contains all functions used to manipulate the virtual address + * spaces. It has a static Kernel page directory and table, which it uses to + * initialsed an identity-paged environment for the Kernel to work in. This is + * enough for the heap to function in. The file also exposes several functions + * that allow a page directory to be manipulated and have pages added and moved. + * These functions are used by other components of the Kernel - mostly the heap + * and IPC. There are also functions to create new and destroy existing page + * directories. The paging system also implements features like copy-on-write. + */ + +#include <stdint.h> +#include <sys/mman.h> +#include "frame.h" +#include "heap.h" +#include "mem.h" +#include "paging.h" +#include "../vfs/vfs.h" +#include "../proc/proc.h" +#include "../io.h" +#include "../screen.h" + +Spinlock quickPageLock; + +page_table_t kernelDir; +page_t zeroFrame; + +void enable_paging(void); +void disable_paging(void); +void copy_page_frame(void *src, void *dest); + +/* Switch page directory */ +static void +switch_dir(page_dir_t dir) +{ + asm volatile("mov %0, %%cr3" :: "r" (dir)); +} + +/* Allocate a page a frame */ +void +alloc_page(page_t *page, uint16_t flags, page_t frame) +{ + page_t *mappings = (void *) 0xFFC00000; + page_table_t *tables = (void *) 0xFFFFF000; + if ((tables[(page - mappings) / 1024] & PDE_PRESENT) == 0) + return; + if (*page & 0xFFFFF000) + return; + + if (frame == (page_t) -1) + frame = alloc_frames(1); + if (frame == (page_t) -1) + return; + + *page = frame | flags; + flush_tlb((page - mappings) << 12); +} + +/* Release a page's frame */ +void +free_page(page_t *page) +{ + page_t *mappings = (void *) 0xFFC00000; + page_table_t *tables = (void *) 0xFFFFF000; + if ((tables[(page - mappings) / 1024] & PDE_PRESENT) == 0) + return; + if ((*page & 0xFFFFF000) == 0) + return; + + free_frame(*page & 0xFFFFF000); + *page = 0x00000000; + flush_tlb((page - mappings) << 12); +} + +/* Get Page Table Entry from virtual address */ +page_t * +get_page(void *addr) +{ + page_t *mappings = (void *) 0xFFC00000; + page_table_t *tables = (void *) 0xFFFFF000; + uint32_t address = (uint32_t) addr >> 12; + uint32_t tbl = address / 1024; + /* Create table not present */ + if ((tables[tbl] & PDE_PRESENT) == 0) { + tables[tbl] = alloc_frames(1) + | PDE_PRESENT | PDE_WRITE | PDE_USER; + memset((void *) mappings + (tbl * 0x1000), 0, 0x1000); + } + return &mappings[address]; +} + +/* Clone a page directory */ +page_dir_t +clone_dir(void) +{ + page_table_t *oldTables = (void *) 0xFFFFF000; + page_table_t *newTables = (void *) 0xFFFFE000; + page_t *oldTable, *newTable; + page_dir_t dir = alloc_frames(1); + uint16_t i, tbl, pg; + + /* Temporarily link new paging structures into current directory */ + page_table_t restore = oldTables[1022]; + oldTables[1022] = dir | PDE_PRESENT | PDE_WRITE; + for (i = 0; i < 1024; i++) + flush_tlb((uintptr_t) newTables + (0x1000 * i)); + + /* Iterate tables */ + for (tbl = 0; tbl < 1022; tbl++) { + if ((oldTables[tbl] & PDE_PRESENT) == 0) + continue; + + /* Link Kernel tables */ + if (tbl < 2 || tbl >= 1008) { /* TODO: define kernel mem */ + newTables[tbl] = oldTables[tbl]; + continue; + } + + /* Copy everything else */ + newTables[tbl] = alloc_frames(1) | PG_ATTR(oldTables[tbl]); + oldTable = (page_t *) 0xFFC00000 + (tbl * 1024); + newTable = (page_t *) 0xFF800000 + (tbl * 1024); + for (pg = 0; pg < 1024; pg++) { + if ((oldTable[pg] & PTE_PRESENT) == 0) { + newTable[pg] = 0; + continue; + } + + /* Copy-On-Write behaviour */ + if (tbl < 960) { + oldTable[pg] &= ~PTE_WRITE; + flush_tlb((uintptr_t) (((tbl * 1024) + pg) << 12)); + newTable[pg] = oldTable[pg]; + } else { + newTable[pg] = alloc_frames(1) | PG_ATTR(oldTable[pg]); + copy_page_frame((void *) PG_ADDR(oldTable[pg]), + (void *) PG_ADDR(newTable[pg])); + } + /* FIXME */ + } + } + newTables[1023] = oldTables[1022]; + + /* Unlink paging structures */ + oldTables[1022] = restore; + for (i = 0; i < 1024; i++) + flush_tlb((uintptr_t) newTables + (0x1000 * i)); + + return dir; +} + +/* Free all (copied) pages in the current directory */ +void +clean_dir(void) +{ + page_t *mappings = (void *) 0xFFC00000; + page_table_t *tables = (void *) 0xFFFFF000; + page_t *pages; + uint16_t tbl, pg; + for (tbl = 2; tbl < 1008; tbl++) { + if ((tables[tbl] & PDE_PRESENT) == 0) + continue; + pages = mappings + (tbl * 1024); + for (pg = 0; pg < 1024; pg++) { + if ((pages[pg] & PDE_PRESENT) == 0) + continue; + free_page(pages + pg); + } + } +} + +/* Quickly map a page frame into view for temporary use */ +page_t +quick_page(uintptr_t frame) +{ + page_t *mappings = (void *) 0xFFC00000; + page_t old; + old = mappings[2047]; + mappings[2047] = PG_ADDR(frame) | PG_ATTR(old); + flush_tlb(0x7FF000); + return PG_ADDR(old); +} + +/* Initialise paging */ +void +init_paging(void) +{ + zeroFrame = alloc_frames(1); + memset((void *) zeroFrame, 0, 0x1000); + + uint16_t tbl, pg; + page_t *table; + page_table_t *kernelTables; + kernelDir = alloc_frames(1); + kernelTables = (page_table_t *) kernelDir; + for (tbl = 0; tbl < 1024; tbl++) + kernelTables[tbl] = 0x00000000 | PDE_WRITE; + for (tbl = 0; tbl < 2; tbl++) { + table = (void *) alloc_frames(1); + kernelTables[tbl] = ((page_table_t) table) + | PDE_WRITE | PDE_PRESENT; + for (pg = 0; pg < 1024; pg++) { + if (!tbl && !pg) + continue; + table[pg] = (((tbl * 1024) + pg) << 12) + | PTE_WRITE | PTE_PRESENT | PTE_GLOBAL; + } + } + /* Map the directory into itself */ + kernelTables[1023] = kernelDir | PDE_WRITE | PDE_PRESENT; + + /* Use Kernel directory */ + switch_dir(kernelDir); + register_exception(14, early_page_fault_handler); + enable_paging(); + + /* Identity page the APIC registers */ + *get_page((void *) lapicPtr) = lapicPtr + | PTE_PRESENT | PTE_WRITE | PTE_GLOBAL; + *get_page((void *) ioapicPtr) = ioapicPtr + | PTE_PRESENT | PTE_WRITE | PTE_GLOBAL; + + /* Allocate Kernel stack */ + uintptr_t stk; + for (stk = 0xF0400000; stk < 0xF0800000; stk += 0x1000) + alloc_page(get_page((void *) stk), + PTE_PRESENT | PTE_WRITE | PTE_USER, -1); +} diff --git a/mem/paging.h b/mem/paging.h new file mode 100644 index 0000000..a3acaf2 --- /dev/null +++ b/mem/paging.h @@ -0,0 +1,58 @@ +#ifndef KERNEL_MEM_PAGING_H +#define KERNEL_MEM_PAGING_H + +#include <stdint.h> +#include "../proc/proc.h" +#include "../spinlock.h" + +#define PG_ADDR(pg) (pg & 0xFFFFF000) +#define PG_ATTR(pg) (pg & 0x00000FFF) + +#define QUICK_PAGE ((void *) 0x7FF000) + +typedef uint32_t page_t; +typedef uint32_t page_table_t; +typedef uint32_t page_dir_t; + +/* Page flags */ +enum PageFlag { + PTE_PRESENT = (1 << 0), + PTE_WRITE = (1 << 1), + PTE_USER = (1 << 2), + PTE_THROUGH = (1 << 3), + PTE_NOCACHE = (1 << 4), + PTE_ACCESS = (1 << 5), + PTE_DIRTY = (1 << 6), + PTE_GLOBAL = (1 << 8), +}; +/* Page Table flags */ +enum PageTableFlag { + PDE_PRESENT = (1 << 0), + PDE_WRITE = (1 << 1), + PDE_USER = (1 << 2), + PDE_THROUGH = (1 << 3), + PDE_NOCACHE = (1 << 4), + PDE_ACCESS = (1 << 5), +}; + +/* Flush Translation Lookaside Buffer */ +static inline void +flush_tlb(uintptr_t addr) +{ + asm volatile("invlpg (%0)" :: "r" (addr) : "memory"); +} + +extern Spinlock quickPageLock; +extern page_t zeroFrame; + +void init_paging(void); +void early_page_fault_handler(InterruptFrame *frame); +void page_fault_handler(InterruptFrame *frame); +void alloc_page(page_t *page, uint16_t flags, page_t frame); +void free_page(page_t *page); +page_t *get_page(void *addr); +page_dir_t clone_dir(void); +void clean_dir(void); +page_t quick_page(page_t frame); + +#endif diff --git a/mem/user.c b/mem/user.c new file mode 100644 index 0000000..b9ed273 --- /dev/null +++ b/mem/user.c @@ -0,0 +1,37 @@ +/* + * This file handles safely getting data from userspace for the Kernel. This is + * for security reasons to prevent the user from tricking a syscall into + * manipulating/leaking Kernel data structures. User memory is defined as any + * address range that completely sits in a Virtual Memory Region. + */ + +#include <stdint.h> +#include <string.h> +#include "vm.h" +#include "../task/task.h" + +/* User can read this address */ +int +verify_access(const void *addr, size_t len, int prot) +{ + if (!in_syscall() || !addr || !len) + return 1; + + /* Iterate all user memory regions */ + VMRegion *head; + for (head = current->vm->regions; head; head = head->next) { + if ((uintptr_t) addr >= head->start + && ((uintptr_t) addr + len) < head->end) + break; + } + if (!head) { + head = current->stack; + if ((uintptr_t) addr < head->start + || ((uintptr_t) addr + len) >= head->end) + head = NULL; + } + /* No fitting region */ + if (!head) + return 0; + return (head->prot & prot); +} diff --git a/mem/vm.c b/mem/vm.c new file mode 100644 index 0000000..46b40fc --- /dev/null +++ b/mem/vm.c @@ -0,0 +1,261 @@ +/* + * This file handles the Virtual Memory system for processes. It splits each + * process into several memory regions, and points each of those reasons to a + * memory object. Each object can be modified on demand, and can be made up of + * several pages, and backed by various stores. This allows objects such as + * files to be easily mapped into an address space, or for large regions to be + * shared between processes. + */ + +#include <stdint.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <errno.h> +#include "heap.h" +#include "paging.h" +#include "vm.h" +#include "../vfs/vfs.h" +#include "../vfs/inode.h" +#include "../vfs/cache.h" +#include "../vfs/tmpfs/fs.h" +#include "../task/task.h" +#include "../proc/proc.h" +#include "../screen.h" + +/* Unmap a range of pages from page directory */ +static void +unmap_page_range(uintptr_t start, uintptr_t end) +{ + uintptr_t addr; + for (addr = start; addr < end; addr += 0x1000) { + *get_page((void *) addr) = 0x00000000; + flush_tlb(addr); + } +} + +/* Remove a range of pages from a region's page cache */ +static void +remove_cache_range(VMRegion *region, uintptr_t start, uintptr_t end) +{ + Page *page; + Inode *inode; + uintptr_t p; + for (p = 0; p < end - start; p += 0x1000) { + page = NULL; + if (!page && region->front) { + inode = region->front->inode; + page = page_find(inode, region->offset + p); + } + if (!page && region->back) { + inode = region->back->inode; + page = page_find(inode, region->offset + p); + } + if (page) + page_remove(inode, page); + } +} + +/* Create a new VM Region */ +VMRegion * +vm_create_region(void *addr, size_t len, int prot, int flags, off_t offset, + File *back) +{ + /* Create new region */ + VMRegion *head, *next, *insert, *region = kmalloc(sizeof(VMRegion)); + region->end = (uintptr_t) addr + len; + if (region->end % 0x1000) + region->end += 0x1000 - (region->end % 0x1000); + region->start = (uintptr_t) addr & ~0xFFF; + region->prot = prot; + region->flags = flags; + region->offset = offset; + region->front = NULL; + region->back = NULL; + if (back) + region->back = file_get(back); + + /* Create new list */ + if (!current->vm->regions) { + current->vm->regions = region; + return region; + } + + /* Fix overlaps */ + uintptr_t p; + for (head = current->vm->regions; head; head = next) { + next = head->next; /* head may be destroyed during iteration */ + if (head->start >= region->end || head->end <= region->start) + continue; + + /* Middle eclipsed */ + if (head->start < region->start && head->end > region->end) { + /* Create region after current */ + insert = kmalloc(sizeof(VMRegion)); + insert->end = head->end; + insert->start = head->end = region->start; + insert->prot = head->prot; + insert->flags = head->flags; + insert->offset = head->offset; + insert->offset += (insert->start - head->start); + if (head->front) + insert->front = file_get(head->front); + if (head->back) + insert->back = file_get(head->back); + /* Insert into list */ + insert->next = head->next; + head->next = insert; + insert->prev = head; + insert->next->prev = insert; + /* Inserted region will be dealt with on next pass */ + } + /* Start eclipsed */ + if (head->start >= region->start && head->end > region->end) { + unmap_page_range(head->start, region->end); + remove_cache_range(head, head->start, region->end); + head->start = region->end; + head->offset += (region->end - head->start); + } + /* End eclipsed */ + if (head->start < region->start && head->end <= region->end) { + unmap_page_range(region->start, head->end); + remove_cache_range(head, region->start, head->end); + head->end = region->start; + } + /* Total eclipse */ + if (head->start >= region->start && head->end <= region->end) + vm_destroy_region(head); + } + /* Add to ordered list */ + for (head = current->vm->regions; head->next; head = head->next) + if (head->end <= region->start + && head->next->start >= region->end) + break; + region->next = head->next; + region->prev = head; + region->prev->next = region; + if (region->next) + region->next->prev = region; + + return region; +} + +/* Remove a VM Region */ +void +vm_remove_region(VMRegion *region) +{ + /* Remove from list */ + if (current->vm->regions == region) + current->vm->regions = region->next; + if (region->prev) + region->prev->next = region->next; + if (region->next) + region->next->prev = region->prev; +// region->prev = region->next = NULL; +} + +/* Destroy a VM Region */ +void +vm_destroy_region(VMRegion *region) +{ + /* Unlink files */ + if (region->front) + file_put(region->front); + if (region->back) + file_put(region->back); + + /* Clean page directory */ + unmap_page_range(region->start, region->end); + + vm_remove_region(region); + kfree(region); +} + +/* Clone a set of VM Regions */ +VMRegion * +vm_clone_regions(VMRegion *head) +{ + if (!head) + return NULL; + + VMRegion *newhead = NULL, *newcurr, *newprev = NULL; + VMRegion *curr = head; + off_t i; + Page *page; + File *file; + + while (curr) { + newcurr = kmalloc(sizeof(VMRegion)); + if (!newhead) + newhead = newcurr; + + newcurr->prev = newprev; + newcurr->next = NULL; + if (newprev) + newprev->next = newcurr; + + newcurr->start = curr->start; + newcurr->end = curr->end; + newcurr->prot = curr->prot; + newcurr->flags = curr->flags; + newcurr->offset = curr->offset; + /* Front (anonymous regions) */ + if (curr->front && (curr->flags & MAP_PRIVATE)) { + /* Copy the file */ + file = kmalloc(sizeof(File)); + file->inode = inode_get(kmalloc(sizeof(Inode))); + file->ops = &tmpfsFileOps; + newcurr->front = file_get(file); + for (i = 0; i < curr->end - curr->start; i += 0x1000) { + page = page_find(curr->front->inode, + i + curr->offset); + if (page) + page_add(file->inode, page); + } + } else if (curr->front) { + newcurr->front = file_get(curr->front); + } + /* Back (always a file) */ + if (curr->back) + newcurr->back = file_get(curr->back); + + curr = curr->next; + newprev = newcurr; + }; + + return newhead; +} + +/* Map an object into memory */ +void * +mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off) +{ + VMRegion *region; + + /* Find gap big enough */ + if (!addr) { + for (region = current->vm->regions; + region->next; region = region->next) { + if (region->next->start - region->end >= len) + break; + } + addr = (void *) region->end; + } + + /* Map anonymous memory */ + if (flags & MAP_ANONYMOUS) { + region = vm_create_region(addr, len, prot, flags, 0, NULL); + goto end; + } + + /* Map a file */ + if (fildes < 0 || fildes >= NFILES) + return (void *) -EBADF; + File *file = current->files->fd[fildes]; + if (!file) + return (void *) -EBADF; + region = vm_create_region(addr, len, prot, flags, off, file); +end: + if (!region) + return (void *) -ENOMEM; + return (void *) region->start; +} diff --git a/mem/vm.h b/mem/vm.h new file mode 100644 index 0000000..eab852d --- /dev/null +++ b/mem/vm.h @@ -0,0 +1,37 @@ +#ifndef KERNEL_MEM_VM_H +#define KERNEL_MEM_VM_H + +#include <stddef.h> +#include <stdint.h> +#include <sys/mman.h> +#include "../vfs/vfs.h" + +typedef struct VirtualMemory VirtualMemory; +typedef struct VMRegion VMRegion; +typedef struct VMObject VMObject; + +/* Virtual Memory Namespace */ +struct VirtualMemory { + VMRegion *regions; + refcount_t usage; +}; + +/* Structure for a Virtual Memory Map Entry */ +struct VMRegion { + VMRegion *prev, *next; + uintptr_t start, end; + int prot; + int flags; + off_t offset; + File *front, *back; +}; + +VMRegion *vm_create_region(void *addr, size_t len, int prot, int flags, + off_t offset, File *back); +void vm_remove_region(VMRegion *region); +void vm_destroy_region(VMRegion *region); +VMRegion *vm_clone_regions(VMRegion *head); + +int verify_access(const void *addr, size_t len, int prot); + +#endif