diff --git a/arch/Kconfig b/arch/Kconfig index 4218c4500b3..a247ddc32c8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -32,6 +32,10 @@ config X86 select ATOMIC_OPERATIONS_BUILTIN select HAS_DTS +config X86_64 + bool "x86_64 architecture" + select ATOMIC_OPERATIONS_BUILTIN + config NIOS2 bool "Nios II Gen 2 architecture" select ATOMIC_OPERATIONS_C diff --git a/arch/x86_64/CMakeLists.txt b/arch/x86_64/CMakeLists.txt new file mode 100644 index 00000000000..7975866fdeb --- /dev/null +++ b/arch/x86_64/CMakeLists.txt @@ -0,0 +1,12 @@ +set(X86_64_BASE_CFLAGS + -ffreestanding + -fno-pic + -fno-asynchronous-unwind-tables + -mno-sse + -mno-red-zone) + +add_subdirectory(core) + +zephyr_compile_options(${X86_64_BASE_CFLAGS} -mx32) + +zephyr_link_libraries(-mx32) diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig new file mode 100644 index 00000000000..d8c7441ffef --- /dev/null +++ b/arch/x86_64/Kconfig @@ -0,0 +1,31 @@ +config ARCH + default "x86_64" + +config XUK_DEBUG + bool "Debug logging at lowest level" + default n + help + When true, enables debug logging from the XUK layer in very + early boot situations (including the 16 and 32 bit stub + code) on the first serial port (115200 8n1) and VGA text + console. Also wires that output stream to the printk() + function so it can be used before any console drivers are + initialized. + +config XUK_APIC_TSC_SHIFT + int "Power-of-two divisor between TSC and APIC timer" + default 6 + help + Configures the precision of the APIC timer as a bit shift of + the TSC frequency. High values "slow down" the tick rate of + the APIC timer and allow for longer timeouts at the expense + of precision. + +config IRQ_OFFLOAD_VECTOR + int "Interrupt vector for irq_offload" + default 255 + help + This is the interrupt vector to use for the self-directed + IPIs used to implement irq_offload(). Most apps will never + change this. It's configurable in case someone wants to + play with its priority. diff --git a/arch/x86_64/core/CMakeLists.txt b/arch/x86_64/core/CMakeLists.txt new file mode 100644 index 00000000000..93ce5cbf2ed --- /dev/null +++ b/arch/x86_64/core/CMakeLists.txt @@ -0,0 +1,71 @@ +zephyr_library() + +zephyr_library_sources( + x86_64.c + xuk.c + xuk-stubs-copy.c # <-- generated, see below +) + +set(incdir ${PROJECT_BINARY_DIR}/include/generated) + +# We want to include two non-x86_64 stubs as sections/symbols in our +# link (one 16 bit code for SMP real mode bootstraping, the other a 32 +# bit hook for OS protected mode entry). This is tedious to do with +# the linker directly, so the mechanism picked here is to have a C +# file (which really is all assembly) import them with ".incbin" +# statements. But I can't figure out how to add a dependency to a C +# file directly, so we copy the file so it can live as a separate +# dependency node we control. +# +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stubs.c + ${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c + DEPENDS ${incdir}/xuk-stub16.bin + ${incdir}/xuk-stub32.bin +) + +add_custom_command( + OUTPUT ${incdir}/xuk-stub16.bin + COMMAND ${CMAKE_C_COMPILER} -m16 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H} + -c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub16.c + -o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o + COMMAND ${CMAKE_OBJCOPY} -O binary -j .text + ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o + ${incdir}/xuk-stub16.bin +) + +add_custom_command( + OUTPUT ${incdir}/xuk-stub32.bin + COMMAND ${CMAKE_C_COMPILER} -m32 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H} + -c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.c + -o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o + COMMAND ${CMAKE_C_COMPILER} -m32 ${X86_64_BASE_CFLAGS} + -Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles + -T ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.ld + ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o + -o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf + COMMAND ${CMAKE_OBJCOPY} -O binary + ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf + ${incdir}/xuk-stub32.bin +) + +# The zephyr.elf file generated for an x86_64 binary is a 64 bit +# binary, but Qemu requires a traditional i386 file (because the entry +# point from multiboot is in 386 protected mode). Do a relink dance +# with objcopy to convert. Note use of the same .incbin trick with +# copy, per above. +# +set(qkernel_file ${CMAKE_BINARY_DIR}/zephyr-qemu.elf) +add_custom_target(qemu_kernel_target DEPENDS ${qkernel_file}) +add_custom_command( + OUTPUT ${qkernel_file} + DEPENDS zephyr_prebuilt + COMMAND ${CMAKE_OBJCOPY} -O binary ${CMAKE_BINARY_DIR}/zephyr/zephyr.elf ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.bin + COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/qemuinc.c ${CMAKE_CURRENT_BINARY_DIR} + COMMAND ${CMAKE_C_COMPILER} -m32 -c ${CMAKE_CURRENT_BINARY_DIR}/qemuinc.c -o ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o + COMMAND ${CMAKE_C_COMPILER} -m32 -T ${CMAKE_CURRENT_SOURCE_DIR}/xuk64.ld + -Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles + -o ${qkernel_file} ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o + ) diff --git a/arch/x86_64/core/Makefile.xuk b/arch/x86_64/core/Makefile.xuk new file mode 100644 index 00000000000..a9b393f6dec --- /dev/null +++ b/arch/x86_64/core/Makefile.xuk @@ -0,0 +1,60 @@ +# Any linux host toolchain should work as a default +CC ?= gcc +OBJCOPY ?= objcopy +QEMU ?= qemu-system-x86_64 + +# No unwind tables is just to save size. No SSE is allowed because GCC +# uses it for miscellaneous optimizations that aren't related to +# floating point, and we don't want to take the traps except on +# threads that definitely need it. No red zone because it's +# incompatible with traditional stack-based interrupt entry. +CFLAGS = -Os -I../include -std=c11 -ffreestanding -fno-pic -fno-asynchronous-unwind-tables -mno-sse -mno-red-zone + +LDFLAGS = -Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles + +# This works great. But note that distros ship no libgcc for the +# target, so once we start to need stuff from that we'll need to move +# to a custom cross compiler. +ARCHFLAGS = -mx32 + +# The default build target just links the stub files. Broader OS +# builds just care about these files. The xuk.elf target is a +# demonstration kernel. +stubs: xuk-stub32.bin xuk-stub16.bin + +# First link the initial 32 bit stub, which goes at the front of our +# image. +xuk-stub32.bin: xuk-stub32.c *.h xuk-stub32.ld + $(CC) -Wall -m32 $(CFLAGS) -c xuk-stub32.c + $(CC) -m32 -T xuk-stub32.ld $(LDFLAGS) -o stub32.elf $(CFLAGS) xuk-stub32.o + $(OBJCOPY) -O binary stub32.elf $@ + +# This is the main OS image, starting with the 32 bit stub and +# containing all the 64 bit code. +xuk.elf64: xuk-stub32.bin xuk-stub16.bin xuk.c xuk-stubs.c demo-kernel.c *.h xuk64.ld + $(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk.c + $(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk-stubs.c + $(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c demo-kernel.c + $(CC) $(ARCHFLAGS) -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk.o xuk-stubs.o demo-kernel.o + +# Final step. We now have an x86_64 ELF binary, which is not a valid +# multiboot image as the entry point is of course 32 bit. It needs to +# be a i386 image, so copy out the segment and relink the blob one +# last time. +xuk.elf: xuk.elf64 xuk64.ld + $(OBJCOPY) -O binary $< xuk.bin + echo '.incbin "xuk.bin"' | as --32 -c - -o xuk32.o + $(CC) -m32 -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk32.o + +# We can rely on the bootloader to handover a machine running in 386 +# protected mode, but SMP cores start in real mode and need a tiny +# bootstrap layer of 16 bit code. +xuk-stub16.bin: xuk-stub16.c + $(CC) -m16 $(CFLAGS) -c $< + $(OBJCOPY) -O binary -j .text xuk-stub16.o $@ + +run: xuk.elf + $(QEMU) -serial mon:stdio -smp cpus=2 -icount shift=1 -no-reboot -no-shutdown -d unimp,pcall,guest_errors -kernel $< + +clean: + rm -f *.elf *.elf64 *.o *~ *.bin *.disasm diff --git a/arch/x86_64/core/demo-kernel.c b/arch/x86_64/core/demo-kernel.c new file mode 100644 index 00000000000..7aaed2f2444 --- /dev/null +++ b/arch/x86_64/core/demo-kernel.c @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "serial.h" +#include "vgacon.h" +#include "printf.h" +#include "xuk.h" + +/* Tiny demonstration of the core64 code. Implements enough of an + * "OS" layer to do some simple unit testing. + */ + +static void putchar(int c) +{ + serial_putc(c); + vgacon_putc(c); +} + +void test_timers(void) +{ + /* Quickly calibrate the timers against each other. Note that + * the APIC is counting DOWN instead of up! Seems like on + * qemu, the APIC base frequency is 3.7x slower than the tsc. + * Looking at source, it seems like APIC is uniformly shifted + * down from a nominal 1Ghz reference + * (i.e. qemu_get_time_ns()), where the TSC is based on + * cpu_get_ticks() and thus pulls in wall clock time & such. + * If you specify "-icount shift=1", then they synchronize + * properly. + */ + int tsc0, apic0, tsc1, apic1; + + __asm__ volatile("rdtsc" : "=a"(tsc0) : : "rdx"); + apic0 = _apic.CURR_COUNT; + do { + /* Qemu misbehaves if I spam these registers. */ + for (int i = 0; i < 1000; i++) { + __asm__ volatile("nop"); + } + + __asm__ volatile("rdtsc" : "=a"(tsc1) : : "rdx"); + apic1 = _apic.CURR_COUNT; + } while ((tsc1 - tsc0) < 10000 || (apic0 - apic1) < 10000); + printf("tsc %d apic %d\n", tsc1 - tsc0, apic0 - apic1); +} + +unsigned int _init_cpu_stack(int cpu) +{ + return (long)alloc_page(0) + 4096; +} + +void handler_timer(void *arg, int err) +{ + printf("Timer expired on CPU%d\n", (int)(long)xuk_get_f_ptr()); +} + +void handler_f3(void *arg, int err) +{ + printf("f3 handler on cpu%d arg %x, triggering INT 0xff\n", + (int)(long)xuk_get_f_ptr(), (int)(long)arg); + __asm__ volatile("int $0xff"); + printf("end f3 handler\n"); +} + +void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f) +{ + (void)f; + _putchar = putchar; + printf("Unhandled vector %d (err %xh) on CPU%d\n", + vector, err, (int)(long)xuk_get_f_ptr()); +} + +void _isr_entry(void) +{ +} + +void *_isr_exit_restore_stack(void *interrupted) +{ + /* Somewhat hacky test of the ISR exit modes. Two ways of + * specifying "this stack", one of which does the full spill + * and restore and one shortcuts that due to the NULL + * return + */ + if (rdtsc() & 1) { + return interrupted; + } else { + return 0; + } +} + +void *switch_back_to; + +void switch_back(int arg1, int arg2, int arg3) +{ + printf("Switching back (%d, %d, %d) sbt %xh\n", + arg1, arg2, arg3, (int)(long)switch_back_to); + xuk_switch(switch_back_to, &switch_back_to); +} + +void test_switch(void) +{ + static unsigned long long stack[256]; + long args[] = { 5, 4, 3 }; + int eflags = 0x20; /* interrupts disabled */ + + long handle = xuk_setup_stack((long)(sizeof(stack) + (char *)stack), + switch_back, eflags, args, 3); + + printf("Switching to %xh (stack %xh)\n", + (int)handle, (int)(long)&stack[0]); + __asm__ volatile("cli"); + xuk_switch((void *)handle, &switch_back_to); + __asm__ volatile("sti"); + printf("Back from switch\n"); +} + +void local_ipi_handler(void *arg, int err) +{ + printf("local IPI handler on CPU%d\n", (int)(long)xuk_get_f_ptr()); +} + +/* Sends an IPI to the current CPU and validates it ran */ +void test_local_ipi(void) +{ + printf("Testing a local IPI on CPU%d\n", (int)(long)xuk_get_f_ptr()); + + _apic.ICR_HI = (struct apic_icr_hi) {}; + _apic.ICR_LO = (struct apic_icr_lo) { + .delivery_mode = FIXED, + .vector = 0x90, + .shorthand = SELF, + }; +} + +void _cpu_start(int cpu) +{ + _putchar = putchar; + printf("Entering demo kernel\n"); + + /* Make sure the FS/GS pointers work, then set F to store our + * CPU ID + */ + xuk_set_f_ptr(cpu, (void *)(long)(0x19283700 + cpu)); + xuk_set_g_ptr(cpu, (void *)(long)(0xabacad00 + cpu)); + printf("fptr %p gptr %p\n", xuk_get_f_ptr(), xuk_get_g_ptr()); + + xuk_set_f_ptr(cpu, (void *)(long)cpu); + + /* Set up this CPU's timer */ + /* FIXME: this sets up a separate vector for every CPU's + * timer, and we'll run out. They should share the vector but + * still have individually-set APIC config. Probably wants a + * "timer" API + */ + xuk_set_isr(INT_APIC_LVT_TIMER, 10, handler_timer, 0); + _apic.INIT_COUNT = 5000000; + test_timers(); + + if (cpu == 0) { + xuk_set_isr(0x1f3, 0, (void *)handler_f3, (void *)0x12345678); + } + + __asm__ volatile("int $0xf3"); + + /* Fire it all up */ + printf("Enabling Interrupts\n"); + __asm__ volatile("sti"); + printf("Interrupts are unmasked (eflags %xh), here we go...\n", + eflags()); + + /* Wait a teeny bit then send an IPI to CPU0, which will hit + * the unhandled_vector handler + */ + if (cpu == 1) { + int t0 = rdtsc(); + + while (rdtsc() - t0 < 1000000) { + } + + _apic.ICR_HI = (struct apic_icr_hi) { + .destination = 0 + }; + _apic.ICR_LO = (struct apic_icr_lo) { + .delivery_mode = FIXED, + .vector = 66, + }; + while (_apic.ICR_LO.send_pending) { + } + } + + test_switch(); + + xuk_set_isr(XUK_INT_RAW_VECTOR(0x90), -1, local_ipi_handler, 0); + test_local_ipi(); + + printf("CPU%d initialized, sleeping\n", cpu); + while (1) { + __asm__ volatile("hlt"); + } +} diff --git a/arch/x86_64/core/offsets/offsets.c b/arch/x86_64/core/offsets/offsets.c new file mode 100644 index 00000000000..d5921dc50c1 --- /dev/null +++ b/arch/x86_64/core/offsets/offsets.c @@ -0,0 +1,5 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ diff --git a/arch/x86_64/core/printf.h b/arch/x86_64/core/printf.h new file mode 100644 index 00000000000..0d7fe02bb53 --- /dev/null +++ b/arch/x86_64/core/printf.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include + +/* Tiny, but not-as-primitive-as-it-looks implementation of something + * like s/n/printf(). Handles %d, %x, %c and %s only, no precision + * specifiers or type modifiers. + */ + +struct _pfr { + char *buf; + int len; + int idx; +}; + +/* Set this function pointer to something that generates output */ +static void (*_putchar)(int c); + +static void pc(struct _pfr *r, int c) +{ + if (r->buf) { + if (r->idx <= r->len) + r->buf[r->idx] = c; + } else { + _putchar(c); + } + r->idx++; +} + +static void prdec(struct _pfr *r, int v) +{ + if (v < 0) { + pc(r, '-'); + v = -v; + } + + char digs[11]; + int i = 10; + + digs[i--] = 0; + while (v || i == 9) { + digs[i--] = '0' + (v % 10); + v /= 10; + } + + while (digs[++i]) + pc(r, digs[i]); +} + +static void endrec(struct _pfr *r) +{ + if (r->buf && r->idx < r->len) + r->buf[r->idx] = 0; +} + +static int _vpf(struct _pfr *r, const char *f, va_list ap) +{ + for (/**/; *f; f++) { + if (*f != '%') { + pc(r, *f); + continue; + } + + switch (*(++f)) { + case '%': + pc(r, '%'); + break; + case 'c': + pc(r, va_arg(ap, int)); + break; + case 's': { + char *s = va_arg(ap, char *); + + while (*s) + pc(r, *s++); + break; + } + case 'p': + pc(r, '0'); + pc(r, 'x'); /* fall through... */ + case 'x': { + int sig = 0; + unsigned int v = va_arg(ap, unsigned int); + + for (int i = 7; i >= 0; i--) { + int d = (v >> (i*4)) & 0xf; + + sig += !!d; + if (sig || i == 0) + pc(r, "0123456789abcdef"[d]); + } + break; + } + case 'd': + prdec(r, va_arg(ap, int)); + break; + default: + pc(r, '%'); + pc(r, *f); + } + } + endrec(r); + return r->idx; +} + +#define CALL_VPF(rec) \ + va_list ap; \ + va_start(ap, f); \ + int ret = _vpf(&r, f, ap); \ + va_end(ap); \ + return ret + +static inline int snprintf(char *buf, unsigned long len, const char *f, ...) +{ + struct _pfr r = { .buf = buf, .len = len }; + + CALL_VPF(&r); +} + +static inline int sprintf(char *buf, const char *f, ...) +{ + struct _pfr r = { .buf = buf, .len = 0x7fffffff }; + + CALL_VPF(&r); +} + +static inline int printf(const char *f, ...) +{ + struct _pfr r = {0}; + + CALL_VPF(&r); +} diff --git a/arch/x86_64/core/qemuinc.c b/arch/x86_64/core/qemuinc.c new file mode 100644 index 00000000000..4dbccede4ce --- /dev/null +++ b/arch/x86_64/core/qemuinc.c @@ -0,0 +1,11 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* This file exists solely to include a single binary blob in a link, + * used by the qemu kernel file architecture swap code in the cmake + * configuration. + */ + +__asm__(".incbin \"zephyr-qemu.bin\""); diff --git a/arch/x86_64/core/serial.h b/arch/x86_64/core/serial.h new file mode 100644 index 00000000000..fcc682c5a9c --- /dev/null +++ b/arch/x86_64/core/serial.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "x86_64-hw.h" + +/* Super-primitive 8250 serial output-only driver, 115200 8n1 */ + +#define _PORT 0x3f8 + +static inline void _serout(int c) +{ + while (!(ioport_in8(_PORT + 5) & 0x20)) { + } + ioport_out8(_PORT, c); +} + +static inline void serial_putc(int c) +{ + if (c == '\n') { + _serout('\r'); + } + _serout(c); +} + +static inline void serial_puts(const char *s) +{ + while (*s) { + serial_putc(*s++); + } +} + +static inline void serial_init(void) +{ + /* In fact Qemu already has most of this set up and works by + * default + */ + ioport_out8(_PORT+1, 0); /* IER = 0 */ + ioport_out8(_PORT+3, 0x80); /* LCR = 8n1 + DLAB select */ + ioport_out8(_PORT, 1); /* Divisor Latch low byte */ + ioport_out8(_PORT+1, 0); /* Divisor Latch high byte */ + ioport_out8(_PORT+3, 0x03); /* LCR = 8n1 + DLAB off */ + ioport_out8(_PORT+4, 0x03); /* MCR = DTR & RTS asserted */ +} diff --git a/arch/x86_64/core/shared-page.h b/arch/x86_64/core/shared-page.h new file mode 100644 index 00000000000..253ab442b7b --- /dev/null +++ b/arch/x86_64/core/shared-page.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _SHARED_PAGE_H +#define _SHARED_PAGE_H + +/* Defines a simple interface for sharing a single page of data across + * CPU modes and SMP cores where it can be easily found and relied + * upon. + */ + +#include "xuk-config.h" +#include "x86_64-hw.h" + +/* The shared block lives in the 5th page of memory, immediately after + * the 16k null guard region + */ +#define SHARED_ADDR 0x4000 + +/* Magic cookies passed to stub32 to tell it what's going on */ +#define BOOT_MAGIC_MULTIBOOT 0x2badb002 /* initial handoff from bootloader */ +#define BOOT_MAGIC_STUB16 0xaaf08df7 /* AP cpu initialization */ + +struct xuk_shared_mem { + /* Stack to be used by SMP cpus at startup. MUST BE FIRST. */ + unsigned int smpinit_stack; + + /* Spinlock used to serialize SMP initialization */ + int smpinit_lock; + + /* Byte address of next page to allocate */ + unsigned int next_page; + + /* Top-level page table address */ + unsigned int base_cr3; + + /* 64 bit GDT */ + struct gdt64 gdt[3 + (2 * CONFIG_MP_NUM_CPUS)]; + + /* 64 bit IDT */ + unsigned int idt_addr; + + /* Precomputed GDT for the 16 bit stub */ + unsigned int gdt16_addr; + + /* Each pointer in these arrays is the base of the FS/GS + * segment for the indexed CPU. + */ + long long fs_ptrs[CONFIG_MP_NUM_CPUS]; + long long gs_ptrs[CONFIG_MP_NUM_CPUS]; + + int num_active_cpus; + + /* Current output column in the VGA console */ + int vgacol; +}; + +#define _shared (*((struct xuk_shared_mem *)(long)SHARED_ADDR)) + +static inline void shared_init(void) +{ + for (int i = 0; i < sizeof(_shared)/sizeof(int); i++) { + ((int *)&_shared)[i] = 0; + } + + _shared.next_page = 0x5000; + _shared.vgacol = 80; +} + +static inline void *alloc_page(int clear) +{ + int *p = (int *)(long)_shared.next_page; + + _shared.next_page += 4096; + + for (int i = 0; clear && i < 1024; i++) { + p[i] = 0; + } + + return p; +} + +#endif /* _SHARED_PAGE_H */ diff --git a/arch/x86_64/core/vgacon.h b/arch/x86_64/core/vgacon.h new file mode 100644 index 00000000000..86b1c852aa2 --- /dev/null +++ b/arch/x86_64/core/vgacon.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "shared-page.h" + +/* Super-primitive VGA text console output-only "terminal" driver */ + +static inline unsigned short *_vga_row(int row) +{ + return ((unsigned short *)0xb8000) + 80 * row; +} + +/* Foreground color is four bit, high to low: "intensity", red, green, + * blue. Normal text is low intensity, so 0b0111 (7) is standard. + * The high nybble is the background color. + */ +static inline void vga_put(int ch, int color, int row, int col) +{ + unsigned short *rp = _vga_row(row); + + rp[col] = (color << 8) | ch; +} + +static inline void vgacon_putc(char c) +{ + if (_shared.vgacol == 80) { + for (int r = 0; r < 24; r++) { + for (int c = 0; c < 80; c++) { + _vga_row(r)[c] = _vga_row(r+1)[c]; + } + } + for (int c = 0; c < 80; c++) { + _vga_row(24)[c] = 0x9000; + } + _shared.vgacol = 0; + } + + if (c == '\n') { + _shared.vgacol = 80; + } else if (c == '\r') { + _shared.vgacol = 0; + } else { + vga_put(c, 0x1f, 24, _shared.vgacol++); + } +} diff --git a/arch/x86_64/core/x86_64-hw.h b/arch/x86_64/core/x86_64-hw.h new file mode 100644 index 00000000000..229cb9a0552 --- /dev/null +++ b/arch/x86_64/core/x86_64-hw.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _X86_64_HW_H +#define _X86_64_HW_H + +/* + * Struct declarations and helper inlines for core x86_64 hardware + * functionality. Anything related to ioports, CR's MSR's, I/L/GDTs, + * PTEs or (IO-)APICs can be found here. Note that because this + * header is included in limited stub contexts, it should include + * declarations and inlines only: no data definitions, even extern + * ones! + */ + +static inline unsigned long eflags(void) +{ + int eflags; + + __asm__ volatile("pushfq; pop %%rax" : "=a"(eflags)); + return eflags; +} + +/* PAE page table record. Note that "addr" is aligned naturally as an + * address, but of course must be masked to change only significant + * bits (which depend on whether it's storing a 4k, 2M or 1G memory + * block) so as to not clobber the bitfields (remember "negative" + * addresses must mask off the top bits too!). The natural idiom is + * to assign addr first, then write the bitfields. + */ +struct pte64 { + union { + unsigned long long addr; + struct { + unsigned long long present : 1; + unsigned long long writable : 1; + unsigned long long usermode : 1; + unsigned long long writethrough : 1; + unsigned long long uncached : 1; + unsigned long long accessed : 1; + unsigned long long dirty : 1; + unsigned long long pagesize_pat : 1; + unsigned long long global : 1; + unsigned long long _UNUSED1 : 3; + unsigned long long pat : 1; + unsigned long long _UNUSED2 : 50; + unsigned long long exdisable : 1; + }; + }; +}; + +struct gdt64 { + union { + unsigned int dwords[2]; + struct { + unsigned long long limit_lo16 : 16; + unsigned long long base_lo16 : 16; + unsigned long long base_mid8 : 8; + unsigned long long accessed : 1; + unsigned long long readable : 1; + unsigned long long conforming : 1; + unsigned long long codeseg : 1; + unsigned long long notsystem : 1; + unsigned long long ring : 2; + unsigned long long present : 1; + unsigned long long limit_hi4 : 4; + unsigned long long available : 1; + unsigned long long long64 : 1; + unsigned long long default_size : 1; + unsigned long long page_granularity : 1; + unsigned long long base_hi8 : 8; + }; + }; +}; + +static inline void gdt64_set_base(struct gdt64 *g, unsigned int base) +{ + g->base_lo16 = base & 0xffff; + g->base_mid8 = (base >> 16) & 0xff; + g->base_hi8 = base >> 24; +} + +#define GDT_SELECTOR(seg) ((seg) << 3) + +struct idt64 { + unsigned short offset_lo16; + unsigned short segment; + unsigned int ist : 3; + unsigned int _UNUSED1 : 5; + unsigned int type : 4; + unsigned int _UNUSED2 : 1; + unsigned int ring : 2; + unsigned int present : 1; + unsigned short offset_mid16; + unsigned int offset_hi32; + unsigned int _UNUSED3; +}; + +static inline void idt64_set_isr(struct idt64 *desc, void *isr) +{ + unsigned long long addr = (unsigned long)isr; + + desc->offset_lo16 = addr & 0xffff; + desc->offset_mid16 = (addr >> 16) & 0xffff; + desc->offset_hi32 = addr >> 32; +} + +enum apic_delivery_mode { + FIXED = 0, LOWEST = 1, SMI = 2, NMI = 4, + INIT = 5, STARTUP = 6, EXTINT = 7, +}; + +struct apic_icr_lo { + unsigned int vector : 8; + enum apic_delivery_mode delivery_mode : 3; + unsigned int logical : 1; + unsigned int send_pending : 1; + unsigned int _unused : 1; + unsigned int assert : 1; + unsigned int level_trig : 1; + unsigned int _unused2 : 2; + enum { NONE, SELF, ALL, NOTSELF } shorthand : 2; +}; + +struct apic_icr_hi { + unsigned int _unused : 24; + unsigned int destination : 8; +}; + +/* Generic struct, not all field applicable to all LVT interrupts */ +struct apic_lvt { + unsigned int vector : 8; + enum apic_delivery_mode delivery_mode : 4; + unsigned int _UNUSED : 1; + unsigned int send_pending : 1; + unsigned int polarity : 1; + unsigned int remote_irr : 1; + unsigned int level_trig : 1; + unsigned int masked : 1; + enum { ONESHOT, PERIODIC, TSCDEADLINE } mode : 2; +}; + +/* Memory-mapped local APIC registers. Note that the registers are + * always the first dword in a 16 byte block, the other 3 being + * unused. So each line represents one of these registers, or an + * array thereof. Lots of (_u)nused fields in the layout, but the usage + * becomes pleasingly clean. + */ +struct apic_regs { + unsigned int _u1[4][2]; + unsigned int ID, _u2[3]; + unsigned int VER, _u3[3]; + unsigned int _u4[4][4]; + unsigned int TPR, _u5[3]; + unsigned int APR, _u6[3]; + unsigned int PPR, _u7[3]; + unsigned int EOI, _u8[3]; + unsigned int RRD, _u9[3]; + unsigned int LDR, _u10[3]; + unsigned int DFR, _u11[3]; + unsigned int SPURIOUS, _u12[3]; + unsigned int ISR_BITS[4][8]; + unsigned int TMR_BITS[4][8]; + unsigned int IRR_BITS[4][8]; + unsigned int ERR_STATUS, _u13[3]; + unsigned int _u14[4][6]; + struct apic_lvt LVT_CMCI; unsigned int _u15[3]; + struct apic_icr_lo ICR_LO, _u16[3]; + struct apic_icr_hi ICR_HI, _u17[3]; + struct apic_lvt LVT_TIMER; unsigned int _u18[3]; + struct apic_lvt LVT_THERMAL; unsigned int _u19[3]; + struct apic_lvt LVT_PERF; unsigned int _u20[3]; + struct apic_lvt LVT_LINT0; unsigned int _u21[3]; + struct apic_lvt LVT_LINT1; unsigned int _u22[3]; + struct apic_lvt LVT_ERROR; unsigned int _u23[3]; + unsigned int INIT_COUNT, _u24[3]; + unsigned int CURR_COUNT, _u25[3]; + unsigned int _u26[4][4]; + unsigned int DIVIDE_CONF, _u27[3]; +}; + +#define _apic (*((volatile struct apic_regs *)0xfee00000ll)) + +/* Crazy encoding for this, but susceptable to a formula. Returns the + * DIVIDE_CONF register value that divides the input clock by 2^n (n + * in the range 0-7). + */ +#define APIC_DIVISOR(n) (((((n) - 1) << 1) & 8)|(((n) - 1) & 3)) + +#define IOREGSEL (*(volatile unsigned int *)0xfec00000l) +#define IOREGWIN (*(volatile unsigned int *)0xfec00010l) + +/* Assumes one IO-APIC. Note that because of the way the register API + * works, this must be spinlocked or otherwise protected against other + * CPUs (e.g. do it all on cpu0 at startup, etc...). + */ +static inline unsigned int ioapic_read(int reg) +{ + IOREGSEL = reg; + return IOREGWIN; +} + +static inline void ioapic_write(int reg, unsigned int val) +{ + IOREGSEL = reg; + IOREGWIN = val; +} + +/* IOAPIC redirection table entry */ +struct ioapic_red { + union { + unsigned int regvals[2]; + struct { + unsigned int vector : 8; + enum apic_delivery_mode : 3; + unsigned int logical : 1; + unsigned int send_pending : 1; + unsigned int active_low : 1; + unsigned int remote_irr : 1; + unsigned int level_triggered : 1; + unsigned int masked : 1; + unsigned int _UNUSED1 : 15; + unsigned int _UNUSED2 : 24; + unsigned int destination : 8; + }; + }; +}; + +#define GET_CR(reg) ({ unsigned int _r; \ + __asm__ volatile("movl %%" reg ", %0\n\t" \ + : "=r"(_r)); \ + _r; }) + +#define SET_CR(reg, val) \ + do { \ + int tmp = val; \ + __asm__ volatile("movl %0, %%" reg "\n\t" :: "r"(tmp)); \ + } while (0) + +#define SET_CR_BIT(reg, bit) SET_CR(reg, GET_CR(reg) | (1 << bit)) + +static inline void ioport_out8(unsigned short port, unsigned char b) +{ + __asm__ volatile("outb %0, %1;\n\t" : : "a"(b), "d"(port)); +} + + +static inline unsigned char ioport_in8(unsigned short port) +{ + unsigned char ret; + + __asm__ volatile("inb %1, %0;\n\t" : "=a"(ret) : "d"(port)); + return ret; +} + +static inline void set_msr_bit(unsigned int msr, int bit) +{ + unsigned int mask = 1 << bit; + + __asm__ volatile("rdmsr; or %0, %%eax; wrmsr" + :: "r"(mask), "c"(msr) : "eax", "edx"); +} + +static inline unsigned int get_msr(unsigned int msr) +{ + unsigned int val; + + __asm__ volatile("rdmsr" : "=a"(val) : "c"(msr) : "edx"); + return val; +} + +static inline unsigned long long rdtsc(void) +{ + unsigned long long rax, rdx; + + __asm__ volatile("rdtsc" : "=a"(rax), "=d"(rdx)); + return rdx << 32 | rax; +} + +#endif /* _X86_64_HW_H */ diff --git a/arch/x86_64/core/x86_64.c b/arch/x86_64/core/x86_64.c new file mode 100644 index 00000000000..9fa48455252 --- /dev/null +++ b/arch/x86_64/core/x86_64.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include +#include +#include +#include "xuk.h" + +struct device; + +struct NANO_ESF { +}; + +void _new_thread(struct k_thread *t, k_thread_stack_t *stack, + size_t sz, k_thread_entry_t entry, + void *p1, void *p2, void *p3, + int prio, unsigned int opts) +{ + void *args[] = { entry, p1, p2, p3 }; + int nargs = 4; + int eflags = 0x200; + char *base = K_THREAD_STACK_BUFFER(stack); + char *top = base + sz; + + _new_thread_init(t, base, sz, prio, opts); + + t->switch_handle = (void *)xuk_setup_stack((long) top, + (void *)_thread_entry, + eflags, (long *)args, + nargs); +} + +void k_cpu_idle(void) +{ + z_sys_trace_idle(); + __asm__ volatile("sti; hlt"); +} + +void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f) +{ + /* Yes, there are five regsiters missing. See notes on + * xuk_entry_frame/xuk_stack_frame. + */ + printk("*** FATAL ERROR vector %d code %d\n", vector, err); + printk("*** RIP %d:0x%llx RSP %d:0x%llx RFLAGS 0x%llx\n", + (int)f->cs, f->rip, (int)f->ss, f->rsp, f->rflags); + printk("*** RAX 0x%llx RCX 0x%llx RDX 0x%llx RSI 0x%llx RDI 0x%llx\n", + f->rax, f->rcx, f->rdx, f->rsi, f->rdi); + printk("*** R8 0x%llx R9 0x%llx R10 0x%llx R11 0x%llx\n", + f->r8, f->r9, f->r10, f->r11); + + _NanoFatalErrorHandler(x86_64_except_reason, NULL); +} + +void _isr_entry(void) +{ + _arch_curr_cpu()->nested++; +} + +void *_isr_exit_restore_stack(void *interrupted) +{ + bool nested = (--_arch_curr_cpu()->nested) > 0; + void *next = _get_next_switch_handle(interrupted); + + return (nested || next == interrupted) ? NULL : next; +} + +struct { + void (*fn)(int, void*); + void *arg; + unsigned int esp; +} cpu_init[CONFIG_MP_NUM_CPUS]; + +/* Called from Zephyr initialization */ +void _arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz, + void (*fn)(int, void *), void *arg) +{ + cpu_init[cpu_num].arg = arg; + cpu_init[cpu_num].esp = (int)(long)(sz + (char *)stack); + + /* This is our flag to the spinning CPU. Do this last */ + cpu_init[cpu_num].fn = fn; +} + +#ifdef CONFIG_IRQ_OFFLOAD +static irq_offload_routine_t offload_fn; +static void *offload_arg; + +static void irq_offload_handler(void *arg, int err) +{ + ARG_UNUSED(arg); + ARG_UNUSED(err); + offload_fn(offload_arg); +} + +void irq_offload(irq_offload_routine_t fn, void *arg) +{ + offload_fn = fn; + offload_arg = arg; + __asm__ volatile("int %0" : : "i"(CONFIG_IRQ_OFFLOAD_VECTOR)); +} +#endif + +/* Default. Can be overridden at link time by a timer driver */ +void __weak x86_apic_timer_isr(void *arg, int code) +{ + ARG_UNUSED(arg); + ARG_UNUSED(code); +} + +/* Called from xuk layer on actual CPU start */ +void _cpu_start(int cpu) +{ + xuk_set_f_ptr(cpu, &_kernel.cpus[cpu]); + + /* Set up the timer ISR, but ensure the timer is disabled */ + xuk_set_isr(INT_APIC_LVT_TIMER, 13, x86_apic_timer_isr, 0); + _apic.INIT_COUNT = 0; + +#ifdef CONFIG_IRQ_OFFLOAD + xuk_set_isr(XUK_INT_RAW_VECTOR(CONFIG_IRQ_OFFLOAD_VECTOR), + -1, irq_offload_handler, 0); +#endif + + if (cpu <= 0) { + /* The SMP CPU startup function pointers act as init + * flags. Zero them here because this code is running + * BEFORE .bss is zeroed! Should probably move that + * out of _Cstart() for this architecture... + */ + for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) { + cpu_init[i].fn = 0; + } + + /* Enter Zephyr */ + _Cstart(); + + } else if (cpu < CONFIG_MP_NUM_CPUS) { + /* SMP initialization. First spin, waiting for + * _arch_start_cpu() to be called from the main CPU + */ + while (!cpu_init[cpu].fn) { + } + + /* Enter Zephyr, which will switch away and never return */ + cpu_init[cpu].fn(0, cpu_init[cpu].arg); + } + + /* Spin forever as a fallback */ + while (1) { + } +} + +/* Returns the initial stack to use for CPU startup on auxiliary (not + * cpu 0) processors to the xuk layer, which gets selected by the + * non-arch Zephyr kernel and stashed by _arch_start_cpu() + */ +unsigned int _init_cpu_stack(int cpu) +{ + return cpu_init[cpu].esp; +} + +int _arch_irq_connect_dynamic(unsigned int irq, unsigned int priority, + void (*routine)(void *parameter), void *parameter, + u32_t flags) +{ + ARG_UNUSED(flags); + __ASSERT(priority >= 2 && priority <= 15, + "APIC interrupt priority must be 2-15"); + + xuk_set_isr(irq, priority, (void *)routine, parameter); + return 0; +} + +void _arch_irq_disable(unsigned int irq) +{ + xuk_set_isr_mask(irq, 1); +} + +void _arch_irq_enable(unsigned int irq) +{ + xuk_set_isr_mask(irq, 0); +} + +void x86_apic_set_timeout(u32_t cyc_from_now) +{ + _apic.INIT_COUNT = cyc_from_now; +} + +const NANO_ESF _default_esf; + +int x86_64_except_reason; + +void _NanoFatalErrorHandler(unsigned int reason, const NANO_ESF *esf) +{ + _SysFatalErrorHandler(reason, esf); +} + +/* App-overridable handler. Does nothing here */ +void __weak _SysFatalErrorHandler(unsigned int reason, const NANO_ESF *esf) +{ + ARG_UNUSED(reason); + ARG_UNUSED(esf); + k_thread_abort(_current); +} diff --git a/arch/x86_64/core/xuk-config.h b/arch/x86_64/core/xuk-config.h new file mode 100644 index 00000000000..7c1b9a38c21 --- /dev/null +++ b/arch/x86_64/core/xuk-config.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _XUK_CONFIG_H +#define _XUK_CONFIG_H + +/* This file defines "kconfig" variables used by the xuk layer only in + * unit test situations where we aren't using pulling in the true + * autoconf.h + */ +#ifndef CONFIG_X86_64 + +/* #define CONFIG_XUK_DEBUG 1 */ + +/* The APIC timer will run 2^X times slower than the TSC. (X = 0-7) */ +#define CONFIG_XUK_APIC_TSC_SHIFT 5 + +#define CONFIG_MP_NUM_CPUS 2 + +#define CONFIG_XUK_64_BIT_ABI 1 + +#endif /* CONFIG_X86_64 */ +#endif /* _XUK_CONFIG_H */ diff --git a/arch/x86_64/core/xuk-stub16.c b/arch/x86_64/core/xuk-stub16.c new file mode 100644 index 00000000000..0e341d1d60e --- /dev/null +++ b/arch/x86_64/core/xuk-stub16.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "serial.h" +#include "x86_64-hw.h" +#include "shared-page.h" + +/* + * 16 bit boot stub. This code gets copied into a low memory page and + * used as the bootstrap code for SMP processors, which always start + * in real mode. It is compiled with gcc's -m16 switch, which is a + * wrapper around the assembler's .code16gcc directive which cleverly + * takes 32 bit assembly and "fixes" it with appropriate address size + * prefixes to run in real mode on a 386. + * + * It is just code! We have the .text segment and NOTHING ELSE. No + * static or global variables can be used, nor const read-only data. + * Neither is the linker run, so nothing can be relocated and all + * symbolic references need to be to addresses within this file. In + * fact, any relocations that do sneak in will be left at zero at + * runtime! + */ + +__asm__(" cli\n" + " xor %ax, %ax\n" + " mov %ax, %ss\n" + " mov %ax, %ds\n" + " mov $80000, %esp\n" /* FIXME: put stack someplace officiallerish */ + " jmp _start16\n"); + +void _start16(void) +{ +#ifdef XUK_DEBUG + serial_putc('1'); serial_putc('6'); serial_putc('\n'); +#endif + + /* First, serialize on a simple spinlock. Note there's a + * theoretical flaw here in that we are on a shared stack with the + * other CPUs here and we don't *technically* know that "oldlock" + * does not get written to the (clobberable!) stack memory. But + * in practice the compiler does the right thing here and we spin + * in registers until exiting the loop, at which point we are the + * only users of the stack, and thus safe. + */ + int oldlock; + + do { + __asm__ volatile("pause; mov $1, %%eax; xchg %%eax, (%1)" + : "=a"(oldlock) : "m"(_shared.smpinit_lock)); + } while (oldlock); + + /* Put a red banner at the top of the screen to announce our + * presence + */ + volatile unsigned short *vga = (unsigned short *)0xb8000; + + for (int i = 0; i < 240; i++) + vga[i] = 0xcc20; + + /* Spin again waiting on the BSP processor to give us a stack. We + * won't use it until the entry code of stub32, but we want to + * make sure it's there before we jump. + */ + while (!_shared.smpinit_stack) { + } + + /* Load the GDT the CPU0 already prepared for us */ + __asm__ volatile ("lgdtw (%0)\n" : : "r"(_shared.gdt16_addr)); + + /* Enter protected mode by setting the bottom bit of CR0 */ + int cr0; + + __asm__ volatile ("mov %%cr0, %0\n" : "=r"(cr0)); + cr0 |= 1; + __asm__ volatile ("mov %0, %%cr0\n" : : "r"(cr0)); + + /* Set up data and stack segments */ + short ds = GDT_SELECTOR(2); + + __asm__ volatile ("mov %0, %%ds; mov %0, %%ss" : : "r"(ds)); + + /* Far jump to the 32 bit entry point, passing a cookie in EAX to + * tell it what we're doing + */ + int magic = BOOT_MAGIC_STUB16; + + __asm__ volatile ("ljmpl $0x8,$0x100000" : : "a"(magic)); + + while (1) { + __asm__("hlt"); + } +} diff --git a/arch/x86_64/core/xuk-stub32.c b/arch/x86_64/core/xuk-stub32.c new file mode 100644 index 00000000000..f718cc746ee --- /dev/null +++ b/arch/x86_64/core/xuk-stub32.c @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "xuk-config.h" +#include "shared-page.h" +#include "x86_64-hw.h" + +#ifdef CONFIG_XUK_DEBUG +#include "printf.h" +#include "vgacon.h" +#include "serial.h" +#else +int printf(const char *fmt, ...) +{ + return 0; +} +#endif + +/* This i386 code stub is designed to link internally (i.e. it shares + * nothing with the 64 bit world) and be loaded into RAM in high + * memory (generally at 0x100000) in a single (R/W/X) block with its + * .text, .rodata, .data and .bss included. Its stack lives in the + * fifth page of memory at 0x04000-0x4fff. After finishing 64 bit + * initialization, it will JMP to the 16-byte-aligned address that + * immediately follows this block in memory (exposed by the linker as + * _start64), which should then be able to run in an environment where + * all of physical RAM is mapped, except for the bottom 16kb. + * + * Memory layout on exit: + * + * + Pages 0-3 are an unmapped NULL guard + * + Page 4: contains stack and bss for the setup code, and a GDT. + * After 64 bit setup, it's likely this will be reused . + * + Pages 5-11: are the bootstrap page table + * + * Note that the initial page table makes no attempt to identify + * memory regions. Everything in the first 4G is mapped as cachable + * RAM. MMIO drivers will need to remap their memory based on PCI BAR + * regions or whatever. + */ + +/* Cute trick to turn a preprocessor macro containing a number literal + * into a string immediate in gcc basic asm context + */ +#define _ASM_IMM(s) #s +#define ASM_IMM(s) "$" _ASM_IMM(s) + +/* Entry point, to be linked at the very start of the image. Set a + * known-good stack (either the top of the shared page for the boot + * CPU, or one provided by stub16 on others), push the multiboot + * arguments in EAX, EBX and call into C code. + */ +__asm__(".pushsection .start32\n" + " mov $0x5000, %esp\n" + " xor %edx, %edx\n" + " cmp " ASM_IMM(BOOT_MAGIC_STUB16) ", %eax\n" + " cmove 0x4000(%edx), %esp\n" + " pushl %ebx\n" + " pushl %eax\n" + " call cstart\n" + ".popsection\n"); + +/* The multiboot header can be anywhere in the first 4k of the file. + * This stub doesn't get that big, so we don't bother with special + * linkage. + */ +#define MULTIBOOT_MAGIC 0x1badb002 +#define MULTIBOOT_FLAGS (1<<1) /* 2nd bit is "want memory map" */ +const int multiboot_header[] = { + MULTIBOOT_MAGIC, + MULTIBOOT_FLAGS, + -(MULTIBOOT_MAGIC + MULTIBOOT_FLAGS), /* csum: -(magic+flags) */ +}; + +/* Creates and returns a generic/sane page table for 64 bit startup + * (64 bit mode requires paging enabled). All of the bottom 4G + * (whether backing memory is present or not) gets a mapping with 2M + * pages, except that the bottom 2M are mapped with 4k pages and leave + * the first four pages unmapped as a NULL guard. + * + * Makes no attempt to identify non-RAM/MMIO regions, it just maps + * everything. We rely on the firmware to have set up MTRRs for us + * where needed, otherwise that will all be cacheable memory. + */ +void *init_page_tables(void) +{ + /* Top level PML4E points to a single PDPTE in its first entry */ + struct pte64 *pml4e = alloc_page(1); + struct pte64 *pdpte = alloc_page(1); + + pml4e[0].addr = (unsigned long)pdpte; + pml4e[0].present = 1; + pml4e[0].writable = 1; + + /* The PDPTE has four entries covering the first 4G of memory, + * each pointing to a PDE + */ + for (unsigned int gb = 0; gb < 4; gb++) { + struct pte64 *pde = alloc_page(0); + + pdpte[gb].addr = (unsigned long)pde; + pdpte[gb].present = 1; + pdpte[gb].writable = 1; + + /* Each PDE filled with 2M supervisor pages */ + for (int i = 0; i < 512; i++) { + if (!(gb == 0 && i == 0)) { + pde[i].addr = (gb << 30) | (i << 21); + pde[i].present = 1; + pde[i].writable = 1; + pde[i].pagesize_pat = 1; + } else { + /* EXCEPT the very first entry of the + * first GB, which is a pointer to a + * PTE of 4k pages so that we can have + * a 16k (4-page) NULL guard unmapped. + */ + struct pte64 *pte = alloc_page(0); + + pde[0].addr = (unsigned long)pte; + pde[0].present = 1; + pde[0].writable = 1; + + for (int j = 0; j < 512; j++) { + if (j < 4) { + pte[j].addr = 0; + } else { + pte[j].addr = j << 12; + pte[j].present = 1; + pte[j].writable = 1; + } + } + } + } + } + + /* Flush caches out of paranoia. In theory, x86 page walking + * happens downstream of the system-coherent dcache and this + * isn't needed. + */ + __asm__ volatile("wbinvd"); + return pml4e; +} + +#ifdef CONFIG_XUK_DEBUG +void putchar(int c) +{ + serial_putc(c); + vgacon_putc(c); +} +#endif + +void cstart(unsigned int magic, unsigned int arg) +{ + if (magic == BOOT_MAGIC_STUB16) { + printf("SMP CPU up in 32 bit protected mode. Stack ~%xh\n", + &magic); + } + + if (magic != BOOT_MAGIC_STUB16) { + shared_init(); +#ifdef CONFIG_XUK_DEBUG + serial_init(); + _putchar = putchar; +#endif + + printf("Entering stub32 on boot cpu, magic %xh stack ~%xh\n", + magic, (int)&magic); + } + + /* The multiboot memory map turns out not to be very useful. + * The basic numbers logged here are only a subset of the true + * memory map if it has holes or >4G memory, and the full map + * passed in the second argument tends to live in low memory + * and get easily clobbered by our own muckery. If we care + * about reading memory maps at runtime we probably want to be + * using BIOS e820 like Linux does. + */ + if (magic == BOOT_MAGIC_MULTIBOOT) { + printf("Hi there!\n"); + printf("This is a second line!\n"); + printf("And this line was generated from %s\n", "printf!"); + + printf("Magic: %p MBI Addr: %p\n", magic, arg); + + int mem_lower = *(int *)(arg + 4); + int mem_upper = *(int *)(arg + 8); + int mmap_length = *(int *)(arg + 44); + int *mmap_addr = *(void **)(arg + 48); + + printf("mem lower %d upper %d mmap_len %d mmap_addr %p\n", + mem_lower, mem_upper, mmap_length, mmap_addr); + } + + /* Choose a stack pointer and CPU ID for the 64 bit code to + * use. Then if we're not the boot CPU, release the spinlock + * (taken in stub16) so the other CPUs can continue. + */ + int cpu_id = 0; + unsigned int init_stack = 0x5000; + + if (magic == BOOT_MAGIC_STUB16) { + cpu_id = _shared.num_active_cpus++; + init_stack = _shared.smpinit_stack; + _shared.smpinit_stack = 0; + __asm__ volatile("movl $0, (%0)" : : "m"(_shared.smpinit_lock)); + } + + /* Page table goes in CR3. This is a noop until paging is + * enabled later + */ + if (magic != BOOT_MAGIC_STUB16) { + _shared.base_cr3 = (unsigned int)init_page_tables(); + } + SET_CR("cr3", _shared.base_cr3); + + /* Enable PAE bit (5) in CR4, required because in long mode + * we'll be using the 64 bit page entry format. Likewise a + * noop until the CPU starts loading pages. + */ + SET_CR_BIT("cr4", 5); + + /* Set LME (long mode enable) in IA32_EFER. Still not a mode + * transition, simply tells the CPU that, once paging is + * enabled, we should enter long mode. At that point the LMA + * bit (10) will be set to indicate that it's active. + */ + const int MSR_IA32_EFER = 0xc0000080; + + set_msr_bit(MSR_IA32_EFER, 8); + + /* NOW we transition by turning paging on. The CPU will start + * page translation (which has been carefully + * identity-mapped!) and enter the 32 bit compatibility + * submode of long mode. So we're reading 64 bit page tables + * but still executing 32 bit instructions. + */ + SET_CR_BIT("cr0", 31); + + printf("Hello memory mapped world!\n"); + + /* Now we can enter true 64 bit long mode via a far call to a + * code segment with the 64 bit flag set. Allocate a 2-entry + * GDT (entry 0 is always a "null segment" architecturally and + * can't be used) here on the stack and throw it away after + * the jump. The 64 bit OS code will need to set the + * descriptors up for itself anyway + */ + struct gdt64 cs[] = { + { }, + { + .readable = 1, + .codeseg = 1, + .notsystem = 1, + .present = 1, + .long64 = 1, + }, + }; + + /* The limit comes first, but is 16 bits. The dummy is there + * for alignment, though docs aren't clear on whether it's + * required or not + */ + struct { + unsigned short dummy; + unsigned short limit; + unsigned int addr; + } gdtp = { .limit = sizeof(cs), .addr = (int)&cs[0], }; + + printf("CS descriptor 0x%x 0x%x\n", cs[1].dwords[1], cs[1].dwords[0]); + __asm__ volatile("lgdt %0" : : "m"(gdtp.limit) : "memory"); + + /* Finally, make a far jump into the 64 bit world. The entry + * point is a 16-byte-aligned address that immediately follows + * our stub, and is exposed by our linkage as "_start64". + * + * Indirect far jumps have a similar crazy setup to descriptor + * tables, but here the segment selector comes last so no + * alignment worries. + * + * The 64 bit entry reuses the same stack we're on, and takes + * the cpu_id in its first argument. + */ + extern int _start64; + unsigned int jmpaddr = (unsigned int) &_start64; + struct { + unsigned int addr; + unsigned short segment; + } farjmp = { .segment = GDT_SELECTOR(1), .addr = jmpaddr }; + + printf("Making far jump to 64 bit mode @%xh...\n", &_start64); + __asm__ volatile("mov %0, %%esp; ljmp *%1" :: + "r"(init_stack), "m"(farjmp), "D"(cpu_id) + : "memory"); +} diff --git a/arch/x86_64/core/xuk-stub32.ld b/arch/x86_64/core/xuk-stub32.ld new file mode 100644 index 00000000000..16e50761c63 --- /dev/null +++ b/arch/x86_64/core/xuk-stub32.ld @@ -0,0 +1,21 @@ +ENTRY(_start) + +PHDRS { + stub32 PT_LOAD; +} + +SECTIONS { + . = 0x100000; + _start = .; + .stub32 : { + *(.start32) + *(.text*) + *(.rodata*) + *(.data*) + *(.bss*) + *(COMMON) + } :stub32 + + . = ALIGN(16); + _start64 = .; +} diff --git a/arch/x86_64/core/xuk-stubs.c b/arch/x86_64/core/xuk-stubs.c new file mode 100644 index 00000000000..a4bc2ef9f8d --- /dev/null +++ b/arch/x86_64/core/xuk-stubs.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* This "C" file exists solely to include the contents of + * separately-compiled binary stubs into the link. It's easier than + * trying to objcopy the contents into linkable object files, + * especially when combined with cmake's somewhat odd special-cased + * dependency handling (which works fine with C files, of course). + */ + +/* The 32 bit stub is our entry point and goes into a separate linker + * section so it can be placed correctly + */ +__asm__(".section .xuk_stub32\n" + ".incbin \"xuk-stub32.bin\"\n"); + +/* The 16 bit stub is the start of execution for auxiliary SMP CPUs + * (also for real mode traps if we ever want to expose that + * capability) and just lives in rodata. It has to be copied into low + * memory by the kernel once it is running. + */ +__asm__(".section .rodata\n" + ".globl _xuk_stub16_start\n" + "_xuk_stub16_start:\n" + ".incbin \"xuk-stub16.bin\"\n" + ".globl _xuk_stub16_end\n" + "_xuk_stub16_end:\n"); + diff --git a/arch/x86_64/core/xuk.c b/arch/x86_64/core/xuk.c new file mode 100644 index 00000000000..8ffc06e9fd7 --- /dev/null +++ b/arch/x86_64/core/xuk.c @@ -0,0 +1,629 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "xuk-config.h" +#include "x86_64-hw.h" +#include "xuk.h" +#include "serial.h" + +#ifdef CONFIG_XUK_DEBUG +#include "vgacon.h" +#include "printf.h" +#else +#define printf(...) +#endif + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) + +/* Defined at the linker level in xuk-stubs.c */ +extern char _xuk_stub16_start, _xuk_stub16_end; + +/* 64 bit entry point. Lives immediately after the 32 bit stub. + * Expects to have its stack already set up. + */ +__asm__(".pushsection .xuk_start64\n" + ".align 16\n" + " jmp _cstart64\n" + ".popsection\n"); + +/* Interrupt/exception entry points stored in the IDT. + * + * FIXME: the assembly below uses XCHG r/m, because I'm lazy and this + * was SO much easier than hand coding the musical chairs required to + * emulate it. But that instruction is outrageously slow (like 20+ + * cycle latency on most CPUs!), and this is interrupt entry. + * Replace, once we have a test available to detect bad register + * contents + */ +extern char _isr_entry_err, _isr_entry_noerr; +__asm__(/* Exceptions that push an error code arrive here. */ + ".align 16\n" + "_isr_entry_err:\n" + " xchg %rdx, (%rsp)\n" + " jmp _isr_entry2\n" + + /* IRQs with no error code land here, then fall through */ + ".align 16\n" + "_isr_entry_noerr:\n" + " push %rdx\n" + + /* Arrive here with RDX already pushed to the stack below the + * interrupt frame and (if needed) populated with the error + * code from the exception. It will become the third argument + * to the C handler. Stuff the return address from the call + * in the stub table into RDI (the first argument). + */ + "_isr_entry2:\n" + " xchg %rdi, 8(%rsp)\n" + " push %rax\n" + " push %rcx\n" + " push %rsi\n" + " push %r8\n" + " push %r9\n" + " push %r10\n" + " push %r11\n" + " mov %rsp, %rsi\n" /* stack in second arg */ + " call _isr_c_top\n" + + /* We have pushed only the caller-save registers at this + * point. Check return value to see if we are returning back + * into the same context or if we need to do a full dump and + * restore. + */ + " test %rax, %rax\n" + " jnz _switch_bottom\n" + " pop %r11\n" + " pop %r10\n" + " pop %r9\n" + " pop %r8\n" + " pop %rsi\n" + " pop %rcx\n" + " pop %rax\n" + " pop %rdx\n" + " pop %rdi\n" + " iretq\n"); + +/* Top half of a context switch. Arrive here with the "CPU pushed" + * part of the exception frame (SS, RSP, RFLAGS, CS, RIP) already on + * the stack, the context pointer to which to switch stored in RAX and + * a pointer into which to store the current context in RDX (NOTE: + * this will be a pointer to a 32 bit memory location if we are in x32 + * mode!). It will push the first half of the register set (the same + * caller-save registers pushed by an ISR) and then continue on to + * _switch_bottom to finish up. + */ +__asm__(".align 16\n" + ".global _switch_top\n" + "_switch_top:\n" + " push %rdi\n" + " push %rdx\n" + " push %rax\n" + " push %rcx\n" + " push %rsi\n" + " push %r8\n" + " push %r9\n" + " push %r10\n" + " push %r11\n" + " mov %rsp, %r8\n" + " sub $48, %r8\n" +#ifdef CONFIG_XUK_64_BIT_ABI + " movq %r8, (%rdx)\n" +#else + " movl %r8d, (%rdx)\n" +#endif + /* Fall through... */ + /* Bottom half of a switch, used by both ISR return and + * context switching. Arrive here with the exception frame + * and caller-saved registers already on the stack and the + * stack pointer to use for the restore in RAX. It will push + * the remaining registers and then restore. + */ + ".align 16\n" + "_switch_bottom:\n" + " push %rbx\n" + " push %rbp\n" + " push %r12\n" + " push %r13\n" + " push %r14\n" + " push %r15\n" + " mov %rax, %rsp\n" + " pop %r15\n" + " pop %r14\n" + " pop %r13\n" + " pop %r12\n" + " pop %rbp\n" + " pop %rbx\n" + " pop %r11\n" + " pop %r10\n" + " pop %r9\n" + " pop %r8\n" + " pop %rsi\n" + " pop %rcx\n" + " pop %rax\n" + " pop %rdx\n" + " pop %rdi\n" + " iretq\n"); + +static unsigned int isr_stub_base; + +struct vhandler { + void (*fn)(void*, int); + void *arg; +}; + +static struct vhandler *vector_handlers; + +static void putchar(int c) +{ + serial_putc(c); +#ifdef XUK_DEBUG + vgacon_putc(c); +#endif +} + +long _isr_c_top(unsigned long vecret, unsigned long rsp, + unsigned long err) +{ + /* The vector stubs are 8-byte-aligned, so to get the vector + * index from the return address we just shift off the bottom + * bits + */ + int vector = (vecret - isr_stub_base) >> 3; + struct vhandler *h = &vector_handlers[vector]; + struct xuk_entry_frame *frame = (void *)rsp; + + _isr_entry(); + + /* Set current priority in CR8 to the currently-serviced IRQ + * and re-enable interrupts + */ + unsigned long long cr8, cr8new = vector >> 4; + + __asm__ volatile("movq %%cr8, %0;" + "movq %1, %%cr8;" + "sti" + : "=r"(cr8) : "r"(cr8new)); + + if (h->fn) { + h->fn(h->arg, err); + } else { + _unhandled_vector(vector, err, frame); + } + + /* Mask interrupts to finish processing (they'll get restored + * in the upcoming IRET) and restore CR8 + */ + __asm__ volatile("cli; movq %0, %%cr8" : : "r"(cr8)); + + /* Signal EOI if it's an APIC-managed interrupt */ + if (vector > 0x1f) { + _apic.EOI = 0; + } + + /* Subtle: for the "interrupted context pointer", we pass in + * the value our stack pointer WILL have once we finish + * spilling registers after this function returns. If this + * hook doesn't want to switch, it will return null and never + * save the value of the pointer. + */ + return (long)_isr_exit_restore_stack((void *)(rsp - 48)); +} + +static long choose_isr_entry(int vector) +{ + /* Constructed with 1's in the vector indexes defined to + * generate an error code. Couldn't find a clean way to make + * the compiler generate this code + */ + const int mask = 0x27d00; /* 0b00100111110100000000 */ + + if (vector < 32 && ((1 << vector) & mask)) { + return (long)&_isr_entry_err; + } else { + return (long)&_isr_entry_noerr; + } +} + +void xuk_set_isr(int interrupt, int priority, + void (*handler)(void *, int), void *arg) +{ + int v = interrupt - 0x100; + + /* Need to choose a vector number? Try all vectors at the + * specified priority. Clobber one if we have to. + */ + if (interrupt < 0x100 || interrupt > 0x1ff) { + for (int pi = 0; pi <= 0xf; pi++) { + v = (priority << 4) | pi; + if (!vector_handlers[v].fn) { + break; + } + } + } + + /* Need to set up IO-APIC? Set it up to deliver to all CPUs + * here (another API later will probably allow for IRQ + * affinity). Do a read/write cycle to avoid clobbering + * settings like edge triggering & polarity that might have + * been set up by other platform layers. We only want to muck + * with routing. + */ + if (interrupt < 0x100) { + struct ioapic_red red; + int regidx = 0x10 + 2 * interrupt; + + red.regvals[0] = ioapic_read(regidx); + red.regvals[1] = ioapic_read(regidx + 1); + red.vector = v; + red.logical = 0; + red.destination = 0xff; + red.masked = 1; + ioapic_write(regidx, red.regvals[0]); + ioapic_write(regidx + 1, red.regvals[1]); + } + + /* Is it a special interrupt? */ + if (interrupt == INT_APIC_LVT_TIMER) { + struct apic_lvt lvt = { + .vector = v, + .mode = ONESHOT, + }; + + _apic.LVT_TIMER = lvt; + } + + printf("set_isr v %d\n", v); + + vector_handlers[v].fn = handler; + vector_handlers[v].arg = arg; +} + +/* Note: "raw vector" interrupt numbers cannot be masked, as the APIC + * doesn't have a per-vector mask bit. Only specific LVT interrupts + * (we handle timer below) and IOAPIC-generated interrupts can be + * masked on x86. In practice, this isn't a problem as that API is a + * special-purpose kind of thing. Real devices will always go through + * the supported channel. + */ +void xuk_set_isr_mask(int interrupt, int masked) +{ + if (interrupt == INT_APIC_LVT_TIMER) { + struct apic_lvt lvt = _apic.LVT_TIMER; + + lvt.masked = masked; + _apic.LVT_TIMER = lvt; + } else if (interrupt < 0x100) { + struct ioapic_red red; + int regidx = 0x10 + 2 * interrupt; + + red.regvals[0] = ioapic_read(regidx); + red.regvals[1] = ioapic_read(regidx + 1); + red.masked = masked; + ioapic_write(regidx, red.regvals[0]); + ioapic_write(regidx + 1, red.regvals[1]); + } +} + +/* Note: these base pointers live together in a big block. Eventually + * we will probably want one of them for userspace TLS, which means it + * will need to be retargetted to point somewhere within the + * application memory. But this is fine for now. + */ +static void setup_fg_segs(int cpu) +{ + int fi = 3 + 2 * cpu, gi = 3 + 2 * cpu + 1; + struct gdt64 *fs = &_shared.gdt[fi]; + struct gdt64 *gs = &_shared.gdt[gi]; + + gdt64_set_base(fs, (long)&_shared.fs_ptrs[cpu]); + gdt64_set_base(gs, (long)&_shared.gs_ptrs[cpu]); + + int fsel = GDT_SELECTOR(fi), gsel = GDT_SELECTOR(gi); + + __asm__("mov %0, %%fs; mov %1, %%gs" : : "r"(fsel), "r"(gsel)); +} + +static void init_gdt(void) +{ + printf("Initializing 64 bit IDT\n"); + + /* Need a GDT for ourselves, not whatever the previous layer + * set up. The scheme is that segment zero is the null + * segment (required and enforced architecturally), segment + * one (selector 8) is the code segment, two (16) is a + * data/stack segment (ignored by code at runtime, but + * required to be present in the L/GDT when executing an + * IRET), and remaining segments come in pairs to provide + * FS/GS segment bases for each CPU. + */ + _shared.gdt[0] = (struct gdt64) {}; + _shared.gdt[1] = (struct gdt64) { + .readable = 1, + .codeseg = 1, + .notsystem = 1, + .present = 1, + .long64 = 1, + }; + _shared.gdt[2] = (struct gdt64) { + .readable = 1, + .codeseg = 0, + .notsystem = 1, + .present = 1, + .long64 = 1, + }; + for (int i = 3; i < ARRAY_SIZE(_shared.gdt); i++) { + _shared.gdt[i] = (struct gdt64) { + .readable = 1, + .codeseg = 0, + .notsystem = 1, + .present = 1, + .long64 = 1, + }; + } +} + +static void init_idt(void) +{ + printf("Initializing 64 bit IDT\n"); + + /* Make an IDT in the next unused page and fill in all 256 + * entries + */ + struct idt64 *idt = alloc_page(0); + + _shared.idt_addr = (unsigned int)(long)idt; + for (int i = 0; i < 256; i++) { + idt[i] = (struct idt64) { + .segment = GDT_SELECTOR(1), + .type = 14, /* == 64 bit interrupt gate */ + .present = 1, + }; + } + + /* Hand-encode stubs for each vector that are a simple 5-byte + * CALL instruction to the single handler entry point. That's + * an opcode of 0xe8 followd by a 4-byte offset from the start + * of the next (!) instruction. The call is used to push a + * return address on the stack that points into the stub, + * allowing us to extract the vector index by what stub it + * points into. + */ + struct istub { + unsigned char opcode; /* 0xe8 == CALLQ */ + int off; + unsigned char _unused[3]; + } __attribute__((packed)) *stubtab = alloc_page(0); + + isr_stub_base = (long)stubtab; + + /* FIXME: on x32, the entries in this handlers table are half + * the size as a native 64 bit build, and could be packed into + * the same page as the stubs above, saving the page of low + * memory. + */ + vector_handlers = alloc_page(1); + + for (int i = 0; i < 256; i++) { + struct istub *st = &stubtab[i]; + + st->opcode = 0xe8; + st->off = choose_isr_entry(i) - (long)st - 5; + idt64_set_isr(&idt[i], st); + } +} + +static void smp_init(void) +{ + /* Generate a GDT for the 16 bit stub to use when + * transitioning to 32 bit protected mode (so the poor thing + * doesn't have to do it itself). It can live right here on + * our stack. + */ + struct gdt64 gdt16[] = { + {}, + { + .codeseg = 1, + .default_size = 1, + .readable = 1, + .notsystem = 1, + .present = 1, + .limit_lo16 = 0xffff, + .limit_hi4 = 0xf, + .page_granularity = 1, + }, + { + .readable = 1, + .default_size = 1, + .notsystem = 1, + .present = 1, + .limit_lo16 = 0xffff, + .limit_hi4 = 0xf, + .page_granularity = 1, + }, + }; + struct { + short dummy; + short limit; + unsigned int addr; + } gdtp16 = { .limit = sizeof(gdt16), .addr = (long)&gdt16[0] }; + _shared.gdt16_addr = (long)&gdtp16.limit; + + /* FIXME: this is only used at startup, and only for a ~150 + * byte chunk of code. Find a way to return it, or maybe put + * it in the low memory null guard instead? + */ + char *sipi_page = alloc_page(1); + + int s16bytes = &_xuk_stub16_end - &_xuk_stub16_start; + + printf("Copying %d bytes of 16 bit code into page %p\n", + s16bytes, (int)(long)sipi_page); + for (int i = 0; i < s16bytes; i++) { + sipi_page[i] = ((char *)&_xuk_stub16_start)[i]; + } + + /* First send an INIT interrupt to all CPUs. This resets them + * regardless of whatever they were doing and they enter a + * "wait for SIPI" state + */ + printf("Sending INIT IPI\n"); + _apic.ICR_LO = (struct apic_icr_lo) { + .delivery_mode = INIT, + .shorthand = NOTSELF, + }; + while (_apic.ICR_LO.send_pending) { + } + + /* Now send the startup IPI (SIPI) to all CPUs. They will + * begin executing in real mode with IP=0 and CS pointing to + * the page we allocated. + */ + _shared.smpinit_lock = 0; + _shared.smpinit_stack = 0; + _shared.num_active_cpus = 1; + + printf("Sending SIPI IPI\n"); + _apic.ICR_LO = (struct apic_icr_lo) { + .delivery_mode = STARTUP, + .shorthand = NOTSELF, + .vector = ((long)sipi_page) >> 12, + }; + while (_apic.ICR_LO.send_pending) { + } + + for (int i = 1; i < CONFIG_MP_NUM_CPUS; i++) { + _shared.smpinit_stack = _init_cpu_stack(i); + printf("Granting stack @ %xh to CPU %d\n", + _shared.smpinit_stack, i); + + while (_shared.num_active_cpus <= i) { + __asm__("pause"); + } + } +} + +void _cstart64(int cpu_id) +{ + if (cpu_id == 0) { + extern char __bss_start, __bss_end; + + __builtin_memset(&__bss_start, 0, &__bss_end - &__bss_start); + } + +#ifdef CONFIG_XUK_DEBUG + _putchar = putchar; +#endif + printf("\n==\nHello from 64 bit C code on CPU%d (stack ~%xh)\n", + cpu_id, (int)(long)&cpu_id); + printf("sizeof(int) = %d, sizeof(long) = %d, sizeof(void*) = %d\n", + sizeof(int), sizeof(long), sizeof(void *)); + + if (cpu_id == 0) { + init_gdt(); + } + + struct { + unsigned short dummy[3]; + unsigned short limit; + unsigned long long addr; + } gdtp = { .limit = sizeof(_shared.gdt), .addr = (long)_shared.gdt }; + + printf("Loading 64 bit GDT\n"); + __asm__ volatile("lgdt %0" : : "m"(gdtp.limit)); + + /* Need to actually set the data & stack segments with those + * indexes. Whatever we have in those hidden registers works + * for data access *now*, but the next interrupt will push + * whatever the selector index was, and we need to know that + * our table contains the same layout! + */ + int selector = GDT_SELECTOR(2); + + __asm__ volatile("mov %0, %%ds; mov %0, %%ss" : : "r"(selector)); + + if (cpu_id == 0) { + init_idt(); + } + + struct { + unsigned short dummy[3]; + unsigned short limit; + unsigned long long addr; + } idtp = { .limit = 4096, .addr = _shared.idt_addr }; + + printf("Loading IDT lim %d addr %xh\n", idtp.limit, idtp.addr); + __asm__ volatile("lidt %0" : : "m"(idtp.limit)); + + /* Classic PC architecture gotcha: disable 8259 PICs before + * they fires a timer interrupt into our exception table. + * Write 1's into the interrupt masks. + */ + if (cpu_id == 0) { + printf("Disabling 8259 PICs\n"); + ioport_out8(0xa1, 0xff); /* slave */ + ioport_out8(0x21, 0xff); /* master */ + } + + /* Enable APIC. Set both the MSR bit and the "software + * enable" bit in the spurious interrupt vector register. + */ + const unsigned int IA32_APIC_BASE = 0x1b; + + printf("Enabling APIC id %xh ver %xh\n", _apic.ID, _apic.VER); + set_msr_bit(IA32_APIC_BASE, 11); + _apic.SPURIOUS |= 1<<8; + _apic.LDR = cpu_id << 24; + _apic.DIVIDE_CONF = APIC_DIVISOR(CONFIG_XUK_APIC_TSC_SHIFT); + + printf("Initializing FS/GS segments for local CPU%d\n", cpu_id); + setup_fg_segs(cpu_id); + + if (cpu_id == 0) { + printf("Brining up auxiliary CPUs...\n"); + smp_init(); + } + + printf("Calling _cpu_start on CPU %d\n", cpu_id); + _cpu_start(cpu_id); +} + +long xuk_setup_stack(long sp, void *fn, unsigned int eflags, + long *args, int nargs) +{ + long long *f = (long long *)(sp & ~7) - 20; + + /* FIXME: this should extend naturally to setting up usermode + * frames too: the frame should have a SS and RSP at the top + * that specifies the user stack into which to return (can be + * this same stack as long as the mapping is correct), and the + * CS should be a separate ring 3 segment. + */ + + f[19] = GDT_SELECTOR(2); + f[18] = sp; + f[17] = eflags; + f[16] = GDT_SELECTOR(1); + f[15] = (long)fn; + f[14] = nargs >= 1 ? args[0] : 0; /* RDI */ + f[13] = nargs >= 3 ? args[2] : 0; /* RDX */ + f[12] = 0; /* RAX */ + f[11] = nargs >= 4 ? args[3] : 0; /* RCX */ + f[10] = nargs >= 2 ? args[1] : 0; /* RSI */ + f[9] = nargs >= 5 ? args[4] : 0; /* R8 */ + f[8] = nargs >= 6 ? args[5] : 0; /* R9 */ + + /* R10, R11, RBX, RBP, R12, R13, R14, R15 */ + for (int i = 7; i >= 0; i--) { + f[i] = 0; + } + + return (long)f; +} + +int z_arch_printk_char_out(int c) +{ + putchar(c); + return 0; +} diff --git a/arch/x86_64/core/xuk.h b/arch/x86_64/core/xuk.h new file mode 100644 index 00000000000..9d21b9ce054 --- /dev/null +++ b/arch/x86_64/core/xuk.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _XUK_H +#define _XUK_H + +#include +#include "shared-page.h" + +/* + * APIs exposed by the xuk layer to OS integration: + */ + +/* Set a single CPU-specific pointer which can be retrieved (on that + * CPU!) with get_f_ptr() + */ +static inline void xuk_set_f_ptr(int cpu, void *p) +{ + _shared.fs_ptrs[cpu] = (long)p; +} + +/* Likewise, but "G" */ +static inline void xuk_set_g_ptr(int cpu, void *p) +{ + _shared.gs_ptrs[cpu] = (long)p; +} + +/* Retrieves the pointer set by set_f_ptr() for the current CPU */ +static inline void *xuk_get_f_ptr() +{ + long long ret, off = 0; + + __asm__("movq %%fs:(%1), %0" : "=r"(ret) : "r"(off)); + return (void *)(long)ret; +} + +/* Retrieves the pointer set by set_g_ptr() for the current CPU */ +static inline void *xuk_get_g_ptr() +{ + long long ret, off = 0; + + __asm__("movq %%gs:(%1), %0" : "=r"(ret) : "r"(off)); + return (void *)(long)ret; +} + +/** + * @brief Sets a global handler for the specified interrupt. + * + * Interrupt numbers live in a partitioned space: + * + * + Values from 0 - 0xff are mapped to INTIx interrupts in the global + * index of IO-APIC inputs, which on many systems correspond to + * legacy IRQ0-IRQ15 interrupts at the bottom of the interrupt + * range. These handlers are not passed a meaningful value in their + * first argument, though the function pointer type declares one. + * + * + Values from 0x100 to 0x1ff are mapped to raw vectors 0x00-0xff + * and can be used for handling exceptions, for INT instructions, or + * for MSI- or IPI-directed interrupts that specifiy specific + * vectors. + * + * + Values outside this range may be exposed symbolically for other + * interrupts sources, for example local APIC LVT interrupts. + * + * If there is a pre-existing handler specified for a specified raw + * vector, this function will replace it. + * + * @param interrupt Interrupt number. See above for interpretation. + * @param priority Integer in the range 2-15. Higher-valued interrupts + * can interrupt lower ones. Ignored for raw vector + * numbers, as their priority is encoded in the top + * four bits of the vector number. A priority of zero + * is treated as "don't care" and the interrupt will + * be assigned the lowest available vector. + * @param handler Function pointer to invoke on interrupt receipt. It + * will be passed the specified argument as the first + * argument and the x86 exception error code (if any) + * in the second. + * @param arg Opaque value to pass to the handler when invoked. + * + */ +void xuk_set_isr(int interrupt, int priority, + void (*handler)(void *, int), void *arg); + +#define INT_APIC_LVT_TIMER 0x200 + +#define XUK_INT_RAW_VECTOR(vector) ((vector)+0x100) + +void xuk_set_isr_mask(int interrupt, int masked); + +/* Stack frame on interrupt entry. Obviously they get pushed onto the + * stack in the opposite order than they appear here; the last few + * entries are the hardware frame. Note that not all registers are + * present, the ABI caller-save registers don't get pushed until after + * the handler as an optimization. + */ +struct xuk_entry_frame { + unsigned long long r11; + unsigned long long r10; + unsigned long long r9; + unsigned long long r8; + unsigned long long rsi; + unsigned long long rcx; + unsigned long long rax; + unsigned long long rdx; + unsigned long long rdi; + unsigned long long rip; + unsigned long long cs; + unsigned long long rflags; + unsigned long long rsp; + unsigned long long ss; +}; + +/* Full stack frame, i.e. the one used as the handles in xuk_switch(). + * Once more, the registers declared here are NOT POPULATED during the + * execution of an interrupt service routine. + */ +struct xuk_stack_frame { + unsigned long long r15; + unsigned long long r14; + unsigned long long r13; + unsigned long long r12; + unsigned long long rbp; + unsigned long long rbx; + struct xuk_entry_frame entry; +}; + +/* Sets up a new stack. The sp argument should point to the quadword + * above (!) the allocated stack area (i.e. the frame will be pushed + * below it). The frame will be set up to enter the function in the + * specified code segment with the specified flags register. An array + * of up to 6 function arguments may also be provided. Returns a + * handle suitable for passing to switch() or for returning from + * isr_exit_restore_stack(). + */ +long xuk_setup_stack(long sp, void *fn, unsigned int eflags, + long *args, int nargs); + +/* + * OS-defined utilities required by the xuk layer: + */ + +/* Returns the address of a stack pointer in 32 bit memory to be used + * by AP processor bootstraping and startup. + */ +unsigned int _init_cpu_stack(int cpu); + +/* OS CPU startup entry point, running on the stack returned by + * init_cpu_stack() + */ +void _cpu_start(int cpu); + +/* Called on receipt of an unregistered interrupt/exception. Passes + * the vector number and the CPU error code, if any. + */ +void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f); + +/* Called on ISR entry before nested interrupts are enabled so the OS + * can arrange bookeeping. Really should be exposed as an inline and + * not a function call; cycles on interrupt entry are precious. + */ +void _isr_entry(void); + +/* Called on ISR exit to choose a next thread to run. The argument is + * a context pointer to the thread that was interrupted. + */ +void *_isr_exit_restore_stack(void *interrupted); + +#endif /* _XUK_H */ diff --git a/arch/x86_64/core/xuk64.ld b/arch/x86_64/core/xuk64.ld new file mode 100644 index 00000000000..235b576e648 --- /dev/null +++ b/arch/x86_64/core/xuk64.ld @@ -0,0 +1,15 @@ +SECTIONS { + . = 0x100000; + + .text : { + *(.xuk_stub32) + . = ALIGN(16); + *(.xuk_start64*) + *(.text*) + } + .rodata : { *(.rodata*) } + .data : { *(.data*) } + __bss_start = .; + .bss : { *(.bss*) *(COMMON) } + __bss_end = .; +} diff --git a/arch/x86_64/include/kernel_arch_data.h b/arch/x86_64/include/kernel_arch_data.h new file mode 100644 index 00000000000..827bb3cd08d --- /dev/null +++ b/arch/x86_64/include/kernel_arch_data.h @@ -0,0 +1,11 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _KERNEL_ARCH_DATA_H +#define _KERNEL_ARCH_DATA_H + +struct _kernel_arch { }; + +#endif /* _KERNEL_ARCH_DATA_H */ diff --git a/arch/x86_64/include/kernel_arch_func.h b/arch/x86_64/include/kernel_arch_func.h new file mode 100644 index 00000000000..0268da3498b --- /dev/null +++ b/arch/x86_64/include/kernel_arch_func.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _KERNEL_ARCH_FUNC_H +#define _KERNEL_ARCH_FUNC_H + +#include +#include + +static inline void kernel_arch_init(void) +{ + /* This is a noop, we already took care of things before + * _Cstart() is entered + */ +} + +static inline struct _cpu *_arch_curr_cpu(void) +{ + long long ret, off = 0; + + /* The struct _cpu pointer for the current CPU lives at the + * start of the the FS segment + */ + __asm__("movq %%fs:(%1), %0" : "=r"(ret) : "r"(off)); + return (struct _cpu *)(long)ret; +} + +static inline unsigned int _arch_irq_lock(void) +{ + unsigned long long key; + + __asm__ volatile("pushfq; cli; popq %0" : "=r"(key)); + return (int)key; +} + +static inline void _arch_irq_unlock(unsigned int key) +{ + if (key & 0x200) { + __asm__ volatile("sti"); + } +} + +static inline void arch_nop(void) +{ + __asm__ volatile("nop"); +} + +void _arch_irq_disable(unsigned int irq); +void _arch_irq_enable(unsigned int irq); + +/* Not a standard Zephyr function, but probably will be */ +static inline unsigned long long _arch_k_cycle_get_64(void) +{ + unsigned int hi, lo; + + __asm__ volatile("rdtsc" : "=d"(hi), "=a"(lo)); + return (((unsigned long long)hi) << 32) | lo; +} + +static inline unsigned int _arch_k_cycle_get_32(void) +{ +#ifdef CONFIG_HPET_TIMER + extern u32_t _timer_cycle_get_32(void); + return _timer_cycle_get_32(); +#else + return (u32_t)_arch_k_cycle_get_64(); +#endif +} + +#define _is_in_isr() (_arch_curr_cpu()->nested != 0) + +static inline void _arch_switch(void *switch_to, void **switched_from) +{ + xuk_switch(switch_to, switched_from); +} + +static inline u32_t x86_apic_scaled_tsc(void) +{ + u32_t lo, hi; + u64_t tsc; + + __asm__ volatile("rdtsc" : "=a"(lo), "=d"(hi)); + tsc = (((u64_t)hi) << 32) | lo; + return (u32_t)(tsc >> CONFIG_XUK_APIC_TSC_SHIFT); +} + +void x86_apic_set_timeout(u32_t cyc_from_now); + +#define _ARCH_IRQ_CONNECT(irq, pri, isr, arg, flags) \ + _arch_irq_connect_dynamic(irq, pri, isr, arg, flags) + +extern int x86_64_except_reason; + + +/* Vector 5 is the "bounds" exception which is otherwise vestigial + * (BOUND is an illegal instruction in long mode) + */ +#define _ARCH_EXCEPT(reason) do { \ + x86_64_except_reason = reason; \ + __asm__ volatile("int $5"); \ + } while (false) + +#endif /* _KERNEL_ARCH_FUNC_H */ diff --git a/arch/x86_64/include/kernel_arch_thread.h b/arch/x86_64/include/kernel_arch_thread.h new file mode 100644 index 00000000000..78fb5ee900f --- /dev/null +++ b/arch/x86_64/include/kernel_arch_thread.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _KERNEL_ARCH_THREAD_H +#define _KERNEL_ARCH_THREAD_H + +/* Vestigial boilerplate. This must exist to it can be included in + * kernel.h to define these structs to provide types for fields in the + * Zephyr thread struct. But we don't need that for this arch. + */ + +struct _caller_saved { }; +struct _callee_saved { }; +struct _thread_arch { }; + +#endif /* _KERNEL_ARCH_THREAD_H */ diff --git a/arch/x86_64/include/offsets_short_arch.h b/arch/x86_64/include/offsets_short_arch.h new file mode 100644 index 00000000000..d5921dc50c1 --- /dev/null +++ b/arch/x86_64/include/offsets_short_arch.h @@ -0,0 +1,5 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ diff --git a/arch/x86_64/include/xuk-switch.h b/arch/x86_64/include/xuk-switch.h new file mode 100644 index 00000000000..8c3fe9d1ca1 --- /dev/null +++ b/arch/x86_64/include/xuk-switch.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _XUK_SWITCH_H +#define _XUK_SWITCH_H + +/* This lives separate from the rest of the xuk API, as it has + * to be inlined into Zephyr code. + */ + +static inline void xuk_switch(void *switch_to, void **switched_from) +{ + /* Constructs an IRETQ interrupt frame, the final CALL pushes + * the RIP to which to return + */ + __asm__ volatile("mov %%rsp, %%rcx;" + "pushq $0x10;" /* SS */ + "pushq %%rcx;" /* RSP */ + "pushfq;" /* RFLAGS */ + "pushq $0x08;" /* CS */ + "callq _switch_top" + : : "a"(switch_to), "d"(switched_from) + : "ecx", "memory"); +} + +#endif /* _XUK_SWITCH_H */ diff --git a/boards/x86_64/qemu_x86_64/Kconfig.board b/boards/x86_64/qemu_x86_64/Kconfig.board new file mode 100644 index 00000000000..fa74dacff9a --- /dev/null +++ b/boards/x86_64/qemu_x86_64/Kconfig.board @@ -0,0 +1,4 @@ +config BOARD_QEMU_X86_64 + bool "QEMU x86_64" + depends on SOC_X86_64 + select QEMU_TARGET diff --git a/boards/x86_64/qemu_x86_64/Kconfig.defconfig b/boards/x86_64/qemu_x86_64/Kconfig.defconfig new file mode 100644 index 00000000000..172733baf74 --- /dev/null +++ b/boards/x86_64/qemu_x86_64/Kconfig.defconfig @@ -0,0 +1,9 @@ +if BOARD_QEMU_X86_64 + +config BUILD_OUTPUT_BIN + default n + +config BOARD + default "qemu_x86_64" + +endif # BOARD_QEMU_X86_64 diff --git a/boards/x86_64/qemu_x86_64/board.cmake b/boards/x86_64/qemu_x86_64/board.cmake new file mode 100644 index 00000000000..a33665082e1 --- /dev/null +++ b/boards/x86_64/qemu_x86_64/board.cmake @@ -0,0 +1,2 @@ +set(EMU_PLATFORM qemu) +set(QEMU_FLAGS_${ARCH} -nographic) diff --git a/boards/x86_64/qemu_x86_64/board.h b/boards/x86_64/qemu_x86_64/board.h new file mode 100644 index 00000000000..d5921dc50c1 --- /dev/null +++ b/boards/x86_64/qemu_x86_64/board.h @@ -0,0 +1,5 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ diff --git a/boards/x86_64/qemu_x86_64/qemu_x86_64.yaml b/boards/x86_64/qemu_x86_64/qemu_x86_64.yaml new file mode 100644 index 00000000000..07cc94d47e6 --- /dev/null +++ b/boards/x86_64/qemu_x86_64/qemu_x86_64.yaml @@ -0,0 +1,9 @@ +identifier: qemu_x86_64 +name: QEMU Emulation for X86_64 +type: qemu +simulation: qemu +arch: x86_64 +toolchain: + - zephyr +testing: + default: true diff --git a/boards/x86_64/qemu_x86_64/qemu_x86_64_defconfig b/boards/x86_64/qemu_x86_64/qemu_x86_64_defconfig new file mode 100644 index 00000000000..5b7b548e22a --- /dev/null +++ b/boards/x86_64/qemu_x86_64/qemu_x86_64_defconfig @@ -0,0 +1,12 @@ +CONFIG_X86_64=y +CONFIG_SOC_X86_64=y +CONFIG_BOARD_QEMU_X86_64=y +CONFIG_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_TEST_RANDOM_GENERATOR=y +CONFIG_XIP=y +CONFIG_HPET_TIMER=y +CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC=1000000000 +CONFIG_MAIN_STACK_SIZE=1024 +CONFIG_IDLE_STACK_SIZE=1024 +CONFIG_TEST_EXTRA_STACKSIZE=2048 diff --git a/cmake/emu/qemu.cmake b/cmake/emu/qemu.cmake index b6ebad3ede3..7eabe9f61a2 100644 --- a/cmake/emu/qemu.cmake +++ b/cmake/emu/qemu.cmake @@ -219,6 +219,10 @@ if(CONFIG_X86_IAMCU) ) endif() +if(CONFIG_X86_64) + set(QEMU_KERNEL_FILE "${CMAKE_BINARY_DIR}/zephyr-qemu.elf") +endif() + if(NOT QEMU_PIPE) set(QEMU_PIPE_COMMENT "\nTo exit from QEMU enter: 'CTRL+a, x'\n") endif() @@ -230,9 +234,15 @@ list(APPEND QEMU_EXTRA_FLAGS ${env_qemu}) list(APPEND MORE_FLAGS_FOR_debugserver -s -S) -set_ifndef(QEMU_KERNEL_OPTION - "-kernel;$" - ) +# Architectures can define QEMU_KERNEL_FILE to use a specific output +# file to pass to qemu (and a "qemu_kernel_target" target to generate +# it), or set QEMU_KERNEL_OPTION if they want to replace the "-kernel +# ..." option entirely. +if(DEFINED QEMU_KERNEL_FILE) + set(QEMU_KERNEL_OPTION "-kernel;${QEMU_KERNEL_FILE}") +elseif(NOT DEFINED QEMU_KERNEL_OPTION) + set(QEMU_KERNEL_OPTION "-kernel;$") +endif() foreach(target ${qemu_targets}) add_custom_target(${target} @@ -250,4 +260,7 @@ foreach(target ${qemu_targets}) COMMENT "${QEMU_PIPE_COMMENT}[QEMU] CPU: ${QEMU_CPU_TYPE_${ARCH}}" USES_TERMINAL ) + if(DEFINED QEMU_KERNEL_FILE) + add_dependencies(${target} qemu_kernel_target) + endif() endforeach() diff --git a/drivers/timer/Kconfig b/drivers/timer/Kconfig index 7a01097cc24..a29cc50bd7e 100644 --- a/drivers/timer/Kconfig +++ b/drivers/timer/Kconfig @@ -12,9 +12,9 @@ menu "Timer Drivers" config HPET_TIMER bool "HPET timer" - depends on X86 - select IOAPIC - select LOAPIC + depends on (X86 || X86_64) + select IOAPIC if X86 + select LOAPIC if X86 select TIMER_READS_ITS_FREQUENCY_AT_RUNTIME select TICKLESS_CAPABLE help diff --git a/include/arch/cpu.h b/include/arch/cpu.h index 5c12d9c1d34..7b6320ac047 100644 --- a/include/arch/cpu.h +++ b/include/arch/cpu.h @@ -11,6 +11,8 @@ #if defined(CONFIG_X86) #include +#elif defined(CONFIG_X86_64) +#include #elif defined(CONFIG_ARM) #include #elif defined(CONFIG_ARC) diff --git a/include/arch/x86_64/arch.h b/include/arch/x86_64/arch.h new file mode 100644 index 00000000000..f3b52ae291c --- /dev/null +++ b/include/arch/x86_64/arch.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef _X86_64_ARCH_H +#define _X86_64_ARCH_H + +#include +#include + +#define STACK_ALIGN 8 + +typedef struct NANO_ESF NANO_ESF; +extern const NANO_ESF _default_esf; +void _SysFatalErrorHandler(unsigned int reason, const NANO_ESF *esf); +void _NanoFatalErrorHandler(unsigned int reason, const NANO_ESF *esf); + +/* Existing code requires only these particular symbols be defined, + * but doesn't put them in a global header. Needs cleaner + * cross-architecture standardization. Implement only the minimal set + * here. + */ +#define _NANO_ERR_STACK_CHK_FAIL 1 +#define _NANO_ERR_KERNEL_OOPS 2 +#define _NANO_ERR_KERNEL_PANIC 3 + +#endif /* _X86_64_ARCH_H */ diff --git a/include/device.h b/include/device.h index fac01a2354b..0032fb13126 100644 --- a/include/device.h +++ b/include/device.h @@ -237,6 +237,14 @@ struct device { struct device_config *config; const void *driver_api; void *driver_data; +#if defined(__x86_64) && __SIZEOF_POINTER__ == 4 + /* The x32 ABI hits an edge case. This is a 12 byte struct, + * but the x86_64 linker will pack them only in units of 8 + * bytes, leading to alignment problems when iterating over + * the link-time array. + */ + void *padding; +#endif }; void _sys_device_do_config_level(s32_t level); diff --git a/include/toolchain/common.h b/include/toolchain/common.h index 5d0d4146a61..49b80b43d7c 100644 --- a/include/toolchain/common.h +++ b/include/toolchain/common.h @@ -54,7 +54,7 @@ #ifdef _ASMLANGUAGE - #ifdef CONFIG_X86 + #if defined(CONFIG_X86) || defined(CONFIG_X86_64) #ifdef PERF_OPT #define PERFOPT_ALIGN .balign 16 diff --git a/include/toolchain/gcc.h b/include/toolchain/gcc.h index f800332e4e4..5eb27745480 100644 --- a/include/toolchain/gcc.h +++ b/include/toolchain/gcc.h @@ -347,6 +347,13 @@ A##a: ",%c0" \ "\n\t.type\t" #name ",@object" : : "n"(value)) +#elif defined(CONFIG_X86_64) + +#define GEN_ABSOLUTE_SYM(name, value) \ + __asm__(".globl\t" #name "\n\t.equ\t" #name \ + ",%0" \ + "\n\t.type\t" #name ",@object" : : "n"(value)) + #elif defined(CONFIG_NIOS2) || defined(CONFIG_RISCV32) || defined(CONFIG_XTENSA) /* No special prefixes necessary for constants in this arch AFAICT */ diff --git a/lib/libc/minimal/include/sys/types.h b/lib/libc/minimal/include/sys/types.h index 846dd313577..55a62116e21 100644 --- a/lib/libc/minimal/include/sys/types.h +++ b/lib/libc/minimal/include/sys/types.h @@ -20,8 +20,8 @@ typedef __SIZE_TYPE__ ssize_t; #if !defined(__off_t_defined) #define __off_t_defined -#ifdef __i386 -typedef long int off_t; +#if defined(__i386) || defined(__x86_64) +typedef long int off_t; /* "long" works for all of i386, X32 and true 64 bit */ #elif defined(__ARM_ARCH) typedef int off_t; #elif defined(__arc__) diff --git a/lib/libc/minimal/source/stdout/prf.c b/lib/libc/minimal/source/stdout/prf.c index bf7410d87f7..157634b315b 100644 --- a/lib/libc/minimal/source/stdout/prf.c +++ b/lib/libc/minimal/source/stdout/prf.c @@ -595,6 +595,14 @@ int _prf(int (*func)(), void *dest, char *format, va_list vargs) case 'G': /* standard platforms which supports double */ { +#ifdef CONFIG_X86_64 + /* Can't use a double here because + * we're operating in -mno-sse and + * va_arg() will expect this to be a + * register argument. + */ + double_temp = va_arg(vargs, uint64_t); +#else union { double d; uint64_t i; @@ -602,6 +610,7 @@ int _prf(int (*func)(), void *dest, char *format, va_list vargs) u.d = (double) va_arg(vargs, double); double_temp = u.i; +#endif } c = _to_float(buf, double_temp, c, falt, fplus, diff --git a/samples/application_development/external_lib/sample.yaml b/samples/application_development/external_lib/sample.yaml index 57033d94c3a..21d6a854b6b 100644 --- a/samples/application_development/external_lib/sample.yaml +++ b/samples/application_development/external_lib/sample.yaml @@ -3,6 +3,7 @@ sample: tests: test: tags: external + platform_exclude: qemu_x86_64 harness: console harness_config: type: multi_line diff --git a/samples/drivers/crypto/sample.yaml b/samples/drivers/crypto/sample.yaml index 93905f53f94..25b7c194728 100644 --- a/samples/drivers/crypto/sample.yaml +++ b/samples/drivers/crypto/sample.yaml @@ -6,7 +6,7 @@ common: tags: crypto harness: console min_ram: 20 - arch_exclude: xtensa + arch_exclude: xtensa x86_64 tests: test-mbedtls: min_flash: 34 diff --git a/samples/sensor/thermometer/sample.yaml b/samples/sensor/thermometer/sample.yaml index a21d6d9a30b..c509fe8f13c 100644 --- a/samples/sensor/thermometer/sample.yaml +++ b/samples/sensor/thermometer/sample.yaml @@ -4,3 +4,4 @@ tests: test: tags: sensors harness: sensor + arch_exclude: x86_64 # No floating point on x86_64 yet \ No newline at end of file diff --git a/samples/subsys/logging/logger/sample.yaml b/samples/subsys/logging/logger/sample.yaml index 96952fd816c..27b0154866f 100644 --- a/samples/subsys/logging/logger/sample.yaml +++ b/samples/subsys/logging/logger/sample.yaml @@ -5,7 +5,7 @@ sample: tests: samples.logger: tags: logging - platform_exclude: qemu_xtensa + platform_exclude: qemu_xtensa qemu_x86_64 harness: console harness_config: type: one_line diff --git a/samples/synchronization/sample.yaml b/samples/synchronization/sample.yaml index bc83331079d..b2415e7f5bf 100644 --- a/samples/synchronization/sample.yaml +++ b/samples/synchronization/sample.yaml @@ -16,7 +16,7 @@ tests: tags: kernel synchronization extra_configs: - CONFIG_OPENOCD_SUPPORT=y - arch_exclude: posix xtensa + arch_exclude: posix xtensa x86_64 harness: console harness_config: type: multi_line diff --git a/soc/x86_64/x86_64/CMakeLists.txt b/soc/x86_64/x86_64/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/soc/x86_64/x86_64/Kconfig.defconfig b/soc/x86_64/x86_64/Kconfig.defconfig new file mode 100644 index 00000000000..5a9693d0faa --- /dev/null +++ b/soc/x86_64/x86_64/Kconfig.defconfig @@ -0,0 +1,9 @@ +if SOC_X86_64 + +config SOC + default "x86_64" + +config USE_SWITCH + default y + +endif diff --git a/soc/x86_64/x86_64/Kconfig.soc b/soc/x86_64/x86_64/Kconfig.soc new file mode 100644 index 00000000000..3c52450b2cb --- /dev/null +++ b/soc/x86_64/x86_64/Kconfig.soc @@ -0,0 +1,2 @@ +config SOC_X86_64 + bool "Generic x86_64 PC" diff --git a/soc/x86_64/x86_64/linker.ld b/soc/x86_64/x86_64/linker.ld new file mode 100644 index 00000000000..871c5dc44d7 --- /dev/null +++ b/soc/x86_64/x86_64/linker.ld @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include /* Seems not to get picked up automatically? */ + +#define _LINKER +#define _ASMLANGUAGE +#include + +/* The common-ram.ld definitions are written to a sort of oddball + * preprocessor API which we reimplement here. The default implementation + * is incompatible with the simpler way we handle address assignment + * and ELF segment definitions + */ +#define SORT_BY_NAME(x) SORT(x) +#define OPTIONAL +#define SECTION_DATA_PROLOGUE(name, opts, align) name opts : align +#define SECTION_PROLOGUE(name, opts, align) name opts : align +#define GROUP_DATA_LINK_IN(v, l) :ram +#define GROUP_LINK_IN(a) :ram + +PHDRS { + ram PT_LOAD; +} + +_start = 0x100000; +ENTRY(_start); + +SECTIONS { + . = 0x100000; + + text : { + KEEP(*(.xuk_stub32*)) + . = ALIGN(16); + KEEP(*(.xuk_start64*)) + + *(.text_start*) + *(.text*) + *(.gnu.linkonce.t.*) + *(.eh_frame) + *(.init) + *(.fini) + *(.eini) + KEEP(*(.openocd_dbg*)) + } :ram + +#include + + rodata : { + *(.rodata*) + *(.gnu.linkonce.r.*) + } :ram + + datas : { + *(.data*) + } :ram + +#include + + __bss_start = .; + bss (NOLOAD) : { + *(COMMON) + *(.bss*) + } :ram + __bss_end = .; + + noinit (NOLOAD) : { + *(.noinit*) + } :ram + + + /* We enable orphan section warnings, so these standard sections all + * have to be called out explicitly + */ + .comment 0 : { *(.comment) } + .debug_frame 0 : { *(.debug_frame) } + .debug_info 0 : { *(.debug_info) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_aranges 0 : { *(.debug_aranges) } + .debug_ranges 0 : { *(.debug_ranges) } + .debug_line 0 : { *(.debug_line) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .note.GNU-stack 0 : { *(.note.GNU-stack) } + .picjunk 0 : { *(.got*) *(.*plt*) *(.rela.*) } +} diff --git a/soc/x86_64/x86_64/soc.h b/soc/x86_64/x86_64/soc.h new file mode 100644 index 00000000000..94a09d408a6 --- /dev/null +++ b/soc/x86_64/x86_64/soc.h @@ -0,0 +1,8 @@ +/* + * Copyright (c) 2018 Intel Corporation + * + * SPDX-License-Identifier: Apache-2.0 + */ +/* Empty file. There is no standard API to be defined here, yet some + * test code includes it. + */ diff --git a/tests/benchmarks/sys_kernel/testcase.yaml b/tests/benchmarks/sys_kernel/testcase.yaml index 37072d36107..36d7869216f 100644 --- a/tests/benchmarks/sys_kernel/testcase.yaml +++ b/tests/benchmarks/sys_kernel/testcase.yaml @@ -1,5 +1,5 @@ tests: benchmark.kernel: - arch_exclude: nios2 riscv32 xtensa + arch_exclude: nios2 riscv32 xtensa x86_64 min_ram: 32 tags: benchmark diff --git a/tests/include/test_asm_inline_gcc.h b/tests/include/test_asm_inline_gcc.h index 340c3fa14eb..521b58e4a67 100644 --- a/tests/include/test_asm_inline_gcc.h +++ b/tests/include/test_asm_inline_gcc.h @@ -23,6 +23,12 @@ static inline void timestamp_serialize(void) : : "%eax", "%ebx", "%ecx", "%edx"); } +#elif defined(CONFIG_X86_64) +static inline void timestamp_serialize(void) +{ + __asm__ volatile("xorq %%rax,%%rax; cpuid" + ::: "rax", "rdx", "rbx", "rcx"); +} #elif defined(CONFIG_CPU_CORTEX_M) #include static inline void timestamp_serialize(void) diff --git a/tests/kernel/fatal/src/main.c b/tests/kernel/fatal/src/main.c index e9aeaa00f2b..ffef259bab5 100644 --- a/tests/kernel/fatal/src/main.c +++ b/tests/kernel/fatal/src/main.c @@ -46,14 +46,15 @@ static volatile int crash_reason; * completing the exception path; the faulting thread is never run * again. * - * On Xtensa/asm2 the handler is running in interrupt context and on - * the interrupt stack and needs to return through the interrupt exit - * code. + * On Xtensa/asm2 and x86_64 the handler is running in interrupt + * context and on the interrupt stack and needs to return through the + * interrupt exit code. * * In both cases the thread is guaranteed never to run again once we * return from the _SysFatalErrorHandler(). */ -#if !(defined(CONFIG_ARM) || defined(CONFIG_XTENSA_ASM2) || defined(CONFIG_ARC)) +#if !(defined(CONFIG_ARM) || defined(CONFIG_XTENSA_ASM2) \ + || defined(CONFIG_ARC) || defined(CONFIG_X86_64)) #define ERR_IS_NORETURN 1 #endif @@ -73,7 +74,7 @@ void _SysFatalErrorHandler(unsigned int reason, const NANO_ESF *pEsf) void alt_thread1(void) { -#if defined(CONFIG_X86) +#if defined(CONFIG_X86) || defined(CONFIG_X86_64) __asm__ volatile ("ud2"); #elif defined(CONFIG_NIOS2) __asm__ volatile ("trap");