arch/x86_64: New architecture added

This patch adds a x86_64 architecture and qemu_x86_64 board to Zephyr.
Only the basic architecture support needed to run 64 bit code is
added; no drivers are added, though a low-level console exists and is
wired to printk().

The support is built on top of a "X86 underkernel" layer, which can be
built in isolation as a unit test on a Linux host.

Limitations:

+ Right now the SDK lacks an x86_64 toolchain.  The build will fall
  back to a host toolchain if it finds no cross compiler defined,
  which is tested to work on gcc 8.2.1 right now.

+ No x87/SSE/AVX usage is allowed.  This is a stronger limitation than
  other architectures where the instructions work from one thread even
  if the context switch code doesn't support it.  We are passing
  -no-sse to prevent gcc from automatically generating SSE
  instructions for non-floating-point purposes, which has the side
  effect of changing the ABI.  Future work to handle the FPU registers
  will need to be combined with an "application" ABI distinct from the
  kernel one (or just to require USERSPACE).

+ Paging is enabled (it has to be in long mode), but is a 1:1 mapping
  of all memory.  No MMU/USERSPACE support yet.

+ We are building with -mno-red-zone for stack size reasons, but this
  is a valuable optimization.  Enabling it requires automatic stack
  switching, which requires a TSS, which means it has to happen after
  MMU support.

+ The OS runs in 64 bit mode, but for compatibility reasons is
  compiled to the 32 bit "X32" ABI.  So while the full 64 bit
  registers and instruction set are available, C pointers are 32 bits
  long and Zephyr is constrained to run in the bottom 4G of memory.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2018-08-19 12:24:48 -07:00 committed by Anas Nashif
commit b69d0da82d
55 changed files with 2890 additions and 18 deletions

View file

@ -0,0 +1,12 @@
set(X86_64_BASE_CFLAGS
-ffreestanding
-fno-pic
-fno-asynchronous-unwind-tables
-mno-sse
-mno-red-zone)
add_subdirectory(core)
zephyr_compile_options(${X86_64_BASE_CFLAGS} -mx32)
zephyr_link_libraries(-mx32)

31
arch/x86_64/Kconfig Normal file
View file

@ -0,0 +1,31 @@
config ARCH
default "x86_64"
config XUK_DEBUG
bool "Debug logging at lowest level"
default n
help
When true, enables debug logging from the XUK layer in very
early boot situations (including the 16 and 32 bit stub
code) on the first serial port (115200 8n1) and VGA text
console. Also wires that output stream to the printk()
function so it can be used before any console drivers are
initialized.
config XUK_APIC_TSC_SHIFT
int "Power-of-two divisor between TSC and APIC timer"
default 6
help
Configures the precision of the APIC timer as a bit shift of
the TSC frequency. High values "slow down" the tick rate of
the APIC timer and allow for longer timeouts at the expense
of precision.
config IRQ_OFFLOAD_VECTOR
int "Interrupt vector for irq_offload"
default 255
help
This is the interrupt vector to use for the self-directed
IPIs used to implement irq_offload(). Most apps will never
change this. It's configurable in case someone wants to
play with its priority.

View file

@ -0,0 +1,71 @@
zephyr_library()
zephyr_library_sources(
x86_64.c
xuk.c
xuk-stubs-copy.c # <-- generated, see below
)
set(incdir ${PROJECT_BINARY_DIR}/include/generated)
# We want to include two non-x86_64 stubs as sections/symbols in our
# link (one 16 bit code for SMP real mode bootstraping, the other a 32
# bit hook for OS protected mode entry). This is tedious to do with
# the linker directly, so the mechanism picked here is to have a C
# file (which really is all assembly) import them with ".incbin"
# statements. But I can't figure out how to add a dependency to a C
# file directly, so we copy the file so it can live as a separate
# dependency node we control.
#
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/xuk-stubs.c
${CMAKE_CURRENT_BINARY_DIR}/xuk-stubs-copy.c
DEPENDS ${incdir}/xuk-stub16.bin
${incdir}/xuk-stub32.bin
)
add_custom_command(
OUTPUT ${incdir}/xuk-stub16.bin
COMMAND ${CMAKE_C_COMPILER} -m16 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H}
-c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub16.c
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o
COMMAND ${CMAKE_OBJCOPY} -O binary -j .text
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub16.o
${incdir}/xuk-stub16.bin
)
add_custom_command(
OUTPUT ${incdir}/xuk-stub32.bin
COMMAND ${CMAKE_C_COMPILER} -m32 -Os ${X86_64_BASE_CFLAGS} -imacros ${AUTOCONF_H}
-c ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.c
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o
COMMAND ${CMAKE_C_COMPILER} -m32 ${X86_64_BASE_CFLAGS}
-Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
-T ${CMAKE_CURRENT_SOURCE_DIR}/xuk-stub32.ld
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.o
-o ${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf
COMMAND ${CMAKE_OBJCOPY} -O binary
${CMAKE_CURRENT_BINARY_DIR}/xuk-stub32.elf
${incdir}/xuk-stub32.bin
)
# The zephyr.elf file generated for an x86_64 binary is a 64 bit
# binary, but Qemu requires a traditional i386 file (because the entry
# point from multiboot is in 386 protected mode). Do a relink dance
# with objcopy to convert. Note use of the same .incbin trick with
# copy, per above.
#
set(qkernel_file ${CMAKE_BINARY_DIR}/zephyr-qemu.elf)
add_custom_target(qemu_kernel_target DEPENDS ${qkernel_file})
add_custom_command(
OUTPUT ${qkernel_file}
DEPENDS zephyr_prebuilt
COMMAND ${CMAKE_OBJCOPY} -O binary ${CMAKE_BINARY_DIR}/zephyr/zephyr.elf ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.bin
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/qemuinc.c ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_C_COMPILER} -m32 -c ${CMAKE_CURRENT_BINARY_DIR}/qemuinc.c -o ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o
COMMAND ${CMAKE_C_COMPILER} -m32 -T ${CMAKE_CURRENT_SOURCE_DIR}/xuk64.ld
-Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
-o ${qkernel_file} ${CMAKE_CURRENT_BINARY_DIR}/zephyr-qemu.o
)

View file

@ -0,0 +1,60 @@
# Any linux host toolchain should work as a default
CC ?= gcc
OBJCOPY ?= objcopy
QEMU ?= qemu-system-x86_64
# No unwind tables is just to save size. No SSE is allowed because GCC
# uses it for miscellaneous optimizations that aren't related to
# floating point, and we don't want to take the traps except on
# threads that definitely need it. No red zone because it's
# incompatible with traditional stack-based interrupt entry.
CFLAGS = -Os -I../include -std=c11 -ffreestanding -fno-pic -fno-asynchronous-unwind-tables -mno-sse -mno-red-zone
LDFLAGS = -Wl,--build-id=none -nostdlib -nodefaultlibs -nostartfiles
# This works great. But note that distros ship no libgcc for the
# target, so once we start to need stuff from that we'll need to move
# to a custom cross compiler.
ARCHFLAGS = -mx32
# The default build target just links the stub files. Broader OS
# builds just care about these files. The xuk.elf target is a
# demonstration kernel.
stubs: xuk-stub32.bin xuk-stub16.bin
# First link the initial 32 bit stub, which goes at the front of our
# image.
xuk-stub32.bin: xuk-stub32.c *.h xuk-stub32.ld
$(CC) -Wall -m32 $(CFLAGS) -c xuk-stub32.c
$(CC) -m32 -T xuk-stub32.ld $(LDFLAGS) -o stub32.elf $(CFLAGS) xuk-stub32.o
$(OBJCOPY) -O binary stub32.elf $@
# This is the main OS image, starting with the 32 bit stub and
# containing all the 64 bit code.
xuk.elf64: xuk-stub32.bin xuk-stub16.bin xuk.c xuk-stubs.c demo-kernel.c *.h xuk64.ld
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk.c
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c xuk-stubs.c
$(CC) $(ARCHFLAGS) -Wall $(CFLAGS) -c demo-kernel.c
$(CC) $(ARCHFLAGS) -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk.o xuk-stubs.o demo-kernel.o
# Final step. We now have an x86_64 ELF binary, which is not a valid
# multiboot image as the entry point is of course 32 bit. It needs to
# be a i386 image, so copy out the segment and relink the blob one
# last time.
xuk.elf: xuk.elf64 xuk64.ld
$(OBJCOPY) -O binary $< xuk.bin
echo '.incbin "xuk.bin"' | as --32 -c - -o xuk32.o
$(CC) -m32 -T xuk64.ld $(LDFLAGS) -o $@ $(CFLAGS) xuk32.o
# We can rely on the bootloader to handover a machine running in 386
# protected mode, but SMP cores start in real mode and need a tiny
# bootstrap layer of 16 bit code.
xuk-stub16.bin: xuk-stub16.c
$(CC) -m16 $(CFLAGS) -c $<
$(OBJCOPY) -O binary -j .text xuk-stub16.o $@
run: xuk.elf
$(QEMU) -serial mon:stdio -smp cpus=2 -icount shift=1 -no-reboot -no-shutdown -d unimp,pcall,guest_errors -kernel $<
clean:
rm -f *.elf *.elf64 *.o *~ *.bin *.disasm

View file

@ -0,0 +1,202 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "serial.h"
#include "vgacon.h"
#include "printf.h"
#include "xuk.h"
/* Tiny demonstration of the core64 code. Implements enough of an
* "OS" layer to do some simple unit testing.
*/
static void putchar(int c)
{
serial_putc(c);
vgacon_putc(c);
}
void test_timers(void)
{
/* Quickly calibrate the timers against each other. Note that
* the APIC is counting DOWN instead of up! Seems like on
* qemu, the APIC base frequency is 3.7x slower than the tsc.
* Looking at source, it seems like APIC is uniformly shifted
* down from a nominal 1Ghz reference
* (i.e. qemu_get_time_ns()), where the TSC is based on
* cpu_get_ticks() and thus pulls in wall clock time & such.
* If you specify "-icount shift=1", then they synchronize
* properly.
*/
int tsc0, apic0, tsc1, apic1;
__asm__ volatile("rdtsc" : "=a"(tsc0) : : "rdx");
apic0 = _apic.CURR_COUNT;
do {
/* Qemu misbehaves if I spam these registers. */
for (int i = 0; i < 1000; i++) {
__asm__ volatile("nop");
}
__asm__ volatile("rdtsc" : "=a"(tsc1) : : "rdx");
apic1 = _apic.CURR_COUNT;
} while ((tsc1 - tsc0) < 10000 || (apic0 - apic1) < 10000);
printf("tsc %d apic %d\n", tsc1 - tsc0, apic0 - apic1);
}
unsigned int _init_cpu_stack(int cpu)
{
return (long)alloc_page(0) + 4096;
}
void handler_timer(void *arg, int err)
{
printf("Timer expired on CPU%d\n", (int)(long)xuk_get_f_ptr());
}
void handler_f3(void *arg, int err)
{
printf("f3 handler on cpu%d arg %x, triggering INT 0xff\n",
(int)(long)xuk_get_f_ptr(), (int)(long)arg);
__asm__ volatile("int $0xff");
printf("end f3 handler\n");
}
void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f)
{
(void)f;
_putchar = putchar;
printf("Unhandled vector %d (err %xh) on CPU%d\n",
vector, err, (int)(long)xuk_get_f_ptr());
}
void _isr_entry(void)
{
}
void *_isr_exit_restore_stack(void *interrupted)
{
/* Somewhat hacky test of the ISR exit modes. Two ways of
* specifying "this stack", one of which does the full spill
* and restore and one shortcuts that due to the NULL
* return
*/
if (rdtsc() & 1) {
return interrupted;
} else {
return 0;
}
}
void *switch_back_to;
void switch_back(int arg1, int arg2, int arg3)
{
printf("Switching back (%d, %d, %d) sbt %xh\n",
arg1, arg2, arg3, (int)(long)switch_back_to);
xuk_switch(switch_back_to, &switch_back_to);
}
void test_switch(void)
{
static unsigned long long stack[256];
long args[] = { 5, 4, 3 };
int eflags = 0x20; /* interrupts disabled */
long handle = xuk_setup_stack((long)(sizeof(stack) + (char *)stack),
switch_back, eflags, args, 3);
printf("Switching to %xh (stack %xh)\n",
(int)handle, (int)(long)&stack[0]);
__asm__ volatile("cli");
xuk_switch((void *)handle, &switch_back_to);
__asm__ volatile("sti");
printf("Back from switch\n");
}
void local_ipi_handler(void *arg, int err)
{
printf("local IPI handler on CPU%d\n", (int)(long)xuk_get_f_ptr());
}
/* Sends an IPI to the current CPU and validates it ran */
void test_local_ipi(void)
{
printf("Testing a local IPI on CPU%d\n", (int)(long)xuk_get_f_ptr());
_apic.ICR_HI = (struct apic_icr_hi) {};
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = FIXED,
.vector = 0x90,
.shorthand = SELF,
};
}
void _cpu_start(int cpu)
{
_putchar = putchar;
printf("Entering demo kernel\n");
/* Make sure the FS/GS pointers work, then set F to store our
* CPU ID
*/
xuk_set_f_ptr(cpu, (void *)(long)(0x19283700 + cpu));
xuk_set_g_ptr(cpu, (void *)(long)(0xabacad00 + cpu));
printf("fptr %p gptr %p\n", xuk_get_f_ptr(), xuk_get_g_ptr());
xuk_set_f_ptr(cpu, (void *)(long)cpu);
/* Set up this CPU's timer */
/* FIXME: this sets up a separate vector for every CPU's
* timer, and we'll run out. They should share the vector but
* still have individually-set APIC config. Probably wants a
* "timer" API
*/
xuk_set_isr(INT_APIC_LVT_TIMER, 10, handler_timer, 0);
_apic.INIT_COUNT = 5000000;
test_timers();
if (cpu == 0) {
xuk_set_isr(0x1f3, 0, (void *)handler_f3, (void *)0x12345678);
}
__asm__ volatile("int $0xf3");
/* Fire it all up */
printf("Enabling Interrupts\n");
__asm__ volatile("sti");
printf("Interrupts are unmasked (eflags %xh), here we go...\n",
eflags());
/* Wait a teeny bit then send an IPI to CPU0, which will hit
* the unhandled_vector handler
*/
if (cpu == 1) {
int t0 = rdtsc();
while (rdtsc() - t0 < 1000000) {
}
_apic.ICR_HI = (struct apic_icr_hi) {
.destination = 0
};
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = FIXED,
.vector = 66,
};
while (_apic.ICR_LO.send_pending) {
}
}
test_switch();
xuk_set_isr(XUK_INT_RAW_VECTOR(0x90), -1, local_ipi_handler, 0);
test_local_ipi();
printf("CPU%d initialized, sleeping\n", cpu);
while (1) {
__asm__ volatile("hlt");
}
}

View file

@ -0,0 +1,5 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/

135
arch/x86_64/core/printf.h Normal file
View file

@ -0,0 +1,135 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stdarg.h>
/* Tiny, but not-as-primitive-as-it-looks implementation of something
* like s/n/printf(). Handles %d, %x, %c and %s only, no precision
* specifiers or type modifiers.
*/
struct _pfr {
char *buf;
int len;
int idx;
};
/* Set this function pointer to something that generates output */
static void (*_putchar)(int c);
static void pc(struct _pfr *r, int c)
{
if (r->buf) {
if (r->idx <= r->len)
r->buf[r->idx] = c;
} else {
_putchar(c);
}
r->idx++;
}
static void prdec(struct _pfr *r, int v)
{
if (v < 0) {
pc(r, '-');
v = -v;
}
char digs[11];
int i = 10;
digs[i--] = 0;
while (v || i == 9) {
digs[i--] = '0' + (v % 10);
v /= 10;
}
while (digs[++i])
pc(r, digs[i]);
}
static void endrec(struct _pfr *r)
{
if (r->buf && r->idx < r->len)
r->buf[r->idx] = 0;
}
static int _vpf(struct _pfr *r, const char *f, va_list ap)
{
for (/**/; *f; f++) {
if (*f != '%') {
pc(r, *f);
continue;
}
switch (*(++f)) {
case '%':
pc(r, '%');
break;
case 'c':
pc(r, va_arg(ap, int));
break;
case 's': {
char *s = va_arg(ap, char *);
while (*s)
pc(r, *s++);
break;
}
case 'p':
pc(r, '0');
pc(r, 'x'); /* fall through... */
case 'x': {
int sig = 0;
unsigned int v = va_arg(ap, unsigned int);
for (int i = 7; i >= 0; i--) {
int d = (v >> (i*4)) & 0xf;
sig += !!d;
if (sig || i == 0)
pc(r, "0123456789abcdef"[d]);
}
break;
}
case 'd':
prdec(r, va_arg(ap, int));
break;
default:
pc(r, '%');
pc(r, *f);
}
}
endrec(r);
return r->idx;
}
#define CALL_VPF(rec) \
va_list ap; \
va_start(ap, f); \
int ret = _vpf(&r, f, ap); \
va_end(ap); \
return ret
static inline int snprintf(char *buf, unsigned long len, const char *f, ...)
{
struct _pfr r = { .buf = buf, .len = len };
CALL_VPF(&r);
}
static inline int sprintf(char *buf, const char *f, ...)
{
struct _pfr r = { .buf = buf, .len = 0x7fffffff };
CALL_VPF(&r);
}
static inline int printf(const char *f, ...)
{
struct _pfr r = {0};
CALL_VPF(&r);
}

View file

@ -0,0 +1,11 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/* This file exists solely to include a single binary blob in a link,
* used by the qemu kernel file architecture swap code in the cmake
* configuration.
*/
__asm__(".incbin \"zephyr-qemu.bin\"");

45
arch/x86_64/core/serial.h Normal file
View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "x86_64-hw.h"
/* Super-primitive 8250 serial output-only driver, 115200 8n1 */
#define _PORT 0x3f8
static inline void _serout(int c)
{
while (!(ioport_in8(_PORT + 5) & 0x20)) {
}
ioport_out8(_PORT, c);
}
static inline void serial_putc(int c)
{
if (c == '\n') {
_serout('\r');
}
_serout(c);
}
static inline void serial_puts(const char *s)
{
while (*s) {
serial_putc(*s++);
}
}
static inline void serial_init(void)
{
/* In fact Qemu already has most of this set up and works by
* default
*/
ioport_out8(_PORT+1, 0); /* IER = 0 */
ioport_out8(_PORT+3, 0x80); /* LCR = 8n1 + DLAB select */
ioport_out8(_PORT, 1); /* Divisor Latch low byte */
ioport_out8(_PORT+1, 0); /* Divisor Latch high byte */
ioport_out8(_PORT+3, 0x03); /* LCR = 8n1 + DLAB off */
ioport_out8(_PORT+4, 0x03); /* MCR = DTR & RTS asserted */
}

View file

@ -0,0 +1,85 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _SHARED_PAGE_H
#define _SHARED_PAGE_H
/* Defines a simple interface for sharing a single page of data across
* CPU modes and SMP cores where it can be easily found and relied
* upon.
*/
#include "xuk-config.h"
#include "x86_64-hw.h"
/* The shared block lives in the 5th page of memory, immediately after
* the 16k null guard region
*/
#define SHARED_ADDR 0x4000
/* Magic cookies passed to stub32 to tell it what's going on */
#define BOOT_MAGIC_MULTIBOOT 0x2badb002 /* initial handoff from bootloader */
#define BOOT_MAGIC_STUB16 0xaaf08df7 /* AP cpu initialization */
struct xuk_shared_mem {
/* Stack to be used by SMP cpus at startup. MUST BE FIRST. */
unsigned int smpinit_stack;
/* Spinlock used to serialize SMP initialization */
int smpinit_lock;
/* Byte address of next page to allocate */
unsigned int next_page;
/* Top-level page table address */
unsigned int base_cr3;
/* 64 bit GDT */
struct gdt64 gdt[3 + (2 * CONFIG_MP_NUM_CPUS)];
/* 64 bit IDT */
unsigned int idt_addr;
/* Precomputed GDT for the 16 bit stub */
unsigned int gdt16_addr;
/* Each pointer in these arrays is the base of the FS/GS
* segment for the indexed CPU.
*/
long long fs_ptrs[CONFIG_MP_NUM_CPUS];
long long gs_ptrs[CONFIG_MP_NUM_CPUS];
int num_active_cpus;
/* Current output column in the VGA console */
int vgacol;
};
#define _shared (*((struct xuk_shared_mem *)(long)SHARED_ADDR))
static inline void shared_init(void)
{
for (int i = 0; i < sizeof(_shared)/sizeof(int); i++) {
((int *)&_shared)[i] = 0;
}
_shared.next_page = 0x5000;
_shared.vgacol = 80;
}
static inline void *alloc_page(int clear)
{
int *p = (int *)(long)_shared.next_page;
_shared.next_page += 4096;
for (int i = 0; clear && i < 1024; i++) {
p[i] = 0;
}
return p;
}
#endif /* _SHARED_PAGE_H */

47
arch/x86_64/core/vgacon.h Normal file
View file

@ -0,0 +1,47 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "shared-page.h"
/* Super-primitive VGA text console output-only "terminal" driver */
static inline unsigned short *_vga_row(int row)
{
return ((unsigned short *)0xb8000) + 80 * row;
}
/* Foreground color is four bit, high to low: "intensity", red, green,
* blue. Normal text is low intensity, so 0b0111 (7) is standard.
* The high nybble is the background color.
*/
static inline void vga_put(int ch, int color, int row, int col)
{
unsigned short *rp = _vga_row(row);
rp[col] = (color << 8) | ch;
}
static inline void vgacon_putc(char c)
{
if (_shared.vgacol == 80) {
for (int r = 0; r < 24; r++) {
for (int c = 0; c < 80; c++) {
_vga_row(r)[c] = _vga_row(r+1)[c];
}
}
for (int c = 0; c < 80; c++) {
_vga_row(24)[c] = 0x9000;
}
_shared.vgacol = 0;
}
if (c == '\n') {
_shared.vgacol = 80;
} else if (c == '\r') {
_shared.vgacol = 0;
} else {
vga_put(c, 0x1f, 24, _shared.vgacol++);
}
}

View file

@ -0,0 +1,282 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _X86_64_HW_H
#define _X86_64_HW_H
/*
* Struct declarations and helper inlines for core x86_64 hardware
* functionality. Anything related to ioports, CR's MSR's, I/L/GDTs,
* PTEs or (IO-)APICs can be found here. Note that because this
* header is included in limited stub contexts, it should include
* declarations and inlines only: no data definitions, even extern
* ones!
*/
static inline unsigned long eflags(void)
{
int eflags;
__asm__ volatile("pushfq; pop %%rax" : "=a"(eflags));
return eflags;
}
/* PAE page table record. Note that "addr" is aligned naturally as an
* address, but of course must be masked to change only significant
* bits (which depend on whether it's storing a 4k, 2M or 1G memory
* block) so as to not clobber the bitfields (remember "negative"
* addresses must mask off the top bits too!). The natural idiom is
* to assign addr first, then write the bitfields.
*/
struct pte64 {
union {
unsigned long long addr;
struct {
unsigned long long present : 1;
unsigned long long writable : 1;
unsigned long long usermode : 1;
unsigned long long writethrough : 1;
unsigned long long uncached : 1;
unsigned long long accessed : 1;
unsigned long long dirty : 1;
unsigned long long pagesize_pat : 1;
unsigned long long global : 1;
unsigned long long _UNUSED1 : 3;
unsigned long long pat : 1;
unsigned long long _UNUSED2 : 50;
unsigned long long exdisable : 1;
};
};
};
struct gdt64 {
union {
unsigned int dwords[2];
struct {
unsigned long long limit_lo16 : 16;
unsigned long long base_lo16 : 16;
unsigned long long base_mid8 : 8;
unsigned long long accessed : 1;
unsigned long long readable : 1;
unsigned long long conforming : 1;
unsigned long long codeseg : 1;
unsigned long long notsystem : 1;
unsigned long long ring : 2;
unsigned long long present : 1;
unsigned long long limit_hi4 : 4;
unsigned long long available : 1;
unsigned long long long64 : 1;
unsigned long long default_size : 1;
unsigned long long page_granularity : 1;
unsigned long long base_hi8 : 8;
};
};
};
static inline void gdt64_set_base(struct gdt64 *g, unsigned int base)
{
g->base_lo16 = base & 0xffff;
g->base_mid8 = (base >> 16) & 0xff;
g->base_hi8 = base >> 24;
}
#define GDT_SELECTOR(seg) ((seg) << 3)
struct idt64 {
unsigned short offset_lo16;
unsigned short segment;
unsigned int ist : 3;
unsigned int _UNUSED1 : 5;
unsigned int type : 4;
unsigned int _UNUSED2 : 1;
unsigned int ring : 2;
unsigned int present : 1;
unsigned short offset_mid16;
unsigned int offset_hi32;
unsigned int _UNUSED3;
};
static inline void idt64_set_isr(struct idt64 *desc, void *isr)
{
unsigned long long addr = (unsigned long)isr;
desc->offset_lo16 = addr & 0xffff;
desc->offset_mid16 = (addr >> 16) & 0xffff;
desc->offset_hi32 = addr >> 32;
}
enum apic_delivery_mode {
FIXED = 0, LOWEST = 1, SMI = 2, NMI = 4,
INIT = 5, STARTUP = 6, EXTINT = 7,
};
struct apic_icr_lo {
unsigned int vector : 8;
enum apic_delivery_mode delivery_mode : 3;
unsigned int logical : 1;
unsigned int send_pending : 1;
unsigned int _unused : 1;
unsigned int assert : 1;
unsigned int level_trig : 1;
unsigned int _unused2 : 2;
enum { NONE, SELF, ALL, NOTSELF } shorthand : 2;
};
struct apic_icr_hi {
unsigned int _unused : 24;
unsigned int destination : 8;
};
/* Generic struct, not all field applicable to all LVT interrupts */
struct apic_lvt {
unsigned int vector : 8;
enum apic_delivery_mode delivery_mode : 4;
unsigned int _UNUSED : 1;
unsigned int send_pending : 1;
unsigned int polarity : 1;
unsigned int remote_irr : 1;
unsigned int level_trig : 1;
unsigned int masked : 1;
enum { ONESHOT, PERIODIC, TSCDEADLINE } mode : 2;
};
/* Memory-mapped local APIC registers. Note that the registers are
* always the first dword in a 16 byte block, the other 3 being
* unused. So each line represents one of these registers, or an
* array thereof. Lots of (_u)nused fields in the layout, but the usage
* becomes pleasingly clean.
*/
struct apic_regs {
unsigned int _u1[4][2];
unsigned int ID, _u2[3];
unsigned int VER, _u3[3];
unsigned int _u4[4][4];
unsigned int TPR, _u5[3];
unsigned int APR, _u6[3];
unsigned int PPR, _u7[3];
unsigned int EOI, _u8[3];
unsigned int RRD, _u9[3];
unsigned int LDR, _u10[3];
unsigned int DFR, _u11[3];
unsigned int SPURIOUS, _u12[3];
unsigned int ISR_BITS[4][8];
unsigned int TMR_BITS[4][8];
unsigned int IRR_BITS[4][8];
unsigned int ERR_STATUS, _u13[3];
unsigned int _u14[4][6];
struct apic_lvt LVT_CMCI; unsigned int _u15[3];
struct apic_icr_lo ICR_LO, _u16[3];
struct apic_icr_hi ICR_HI, _u17[3];
struct apic_lvt LVT_TIMER; unsigned int _u18[3];
struct apic_lvt LVT_THERMAL; unsigned int _u19[3];
struct apic_lvt LVT_PERF; unsigned int _u20[3];
struct apic_lvt LVT_LINT0; unsigned int _u21[3];
struct apic_lvt LVT_LINT1; unsigned int _u22[3];
struct apic_lvt LVT_ERROR; unsigned int _u23[3];
unsigned int INIT_COUNT, _u24[3];
unsigned int CURR_COUNT, _u25[3];
unsigned int _u26[4][4];
unsigned int DIVIDE_CONF, _u27[3];
};
#define _apic (*((volatile struct apic_regs *)0xfee00000ll))
/* Crazy encoding for this, but susceptable to a formula. Returns the
* DIVIDE_CONF register value that divides the input clock by 2^n (n
* in the range 0-7).
*/
#define APIC_DIVISOR(n) (((((n) - 1) << 1) & 8)|(((n) - 1) & 3))
#define IOREGSEL (*(volatile unsigned int *)0xfec00000l)
#define IOREGWIN (*(volatile unsigned int *)0xfec00010l)
/* Assumes one IO-APIC. Note that because of the way the register API
* works, this must be spinlocked or otherwise protected against other
* CPUs (e.g. do it all on cpu0 at startup, etc...).
*/
static inline unsigned int ioapic_read(int reg)
{
IOREGSEL = reg;
return IOREGWIN;
}
static inline void ioapic_write(int reg, unsigned int val)
{
IOREGSEL = reg;
IOREGWIN = val;
}
/* IOAPIC redirection table entry */
struct ioapic_red {
union {
unsigned int regvals[2];
struct {
unsigned int vector : 8;
enum apic_delivery_mode : 3;
unsigned int logical : 1;
unsigned int send_pending : 1;
unsigned int active_low : 1;
unsigned int remote_irr : 1;
unsigned int level_triggered : 1;
unsigned int masked : 1;
unsigned int _UNUSED1 : 15;
unsigned int _UNUSED2 : 24;
unsigned int destination : 8;
};
};
};
#define GET_CR(reg) ({ unsigned int _r; \
__asm__ volatile("movl %%" reg ", %0\n\t" \
: "=r"(_r)); \
_r; })
#define SET_CR(reg, val) \
do { \
int tmp = val; \
__asm__ volatile("movl %0, %%" reg "\n\t" :: "r"(tmp)); \
} while (0)
#define SET_CR_BIT(reg, bit) SET_CR(reg, GET_CR(reg) | (1 << bit))
static inline void ioport_out8(unsigned short port, unsigned char b)
{
__asm__ volatile("outb %0, %1;\n\t" : : "a"(b), "d"(port));
}
static inline unsigned char ioport_in8(unsigned short port)
{
unsigned char ret;
__asm__ volatile("inb %1, %0;\n\t" : "=a"(ret) : "d"(port));
return ret;
}
static inline void set_msr_bit(unsigned int msr, int bit)
{
unsigned int mask = 1 << bit;
__asm__ volatile("rdmsr; or %0, %%eax; wrmsr"
:: "r"(mask), "c"(msr) : "eax", "edx");
}
static inline unsigned int get_msr(unsigned int msr)
{
unsigned int val;
__asm__ volatile("rdmsr" : "=a"(val) : "c"(msr) : "edx");
return val;
}
static inline unsigned long long rdtsc(void)
{
unsigned long long rax, rdx;
__asm__ volatile("rdtsc" : "=a"(rax), "=d"(rdx));
return rdx << 32 | rax;
}
#endif /* _X86_64_HW_H */

209
arch/x86_64/core/x86_64.c Normal file
View file

@ -0,0 +1,209 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <kernel_internal.h>
#include <kernel_structs.h>
#include <tracing.h>
#include <ksched.h>
#include <irq_offload.h>
#include "xuk.h"
struct device;
struct NANO_ESF {
};
void _new_thread(struct k_thread *t, k_thread_stack_t *stack,
size_t sz, k_thread_entry_t entry,
void *p1, void *p2, void *p3,
int prio, unsigned int opts)
{
void *args[] = { entry, p1, p2, p3 };
int nargs = 4;
int eflags = 0x200;
char *base = K_THREAD_STACK_BUFFER(stack);
char *top = base + sz;
_new_thread_init(t, base, sz, prio, opts);
t->switch_handle = (void *)xuk_setup_stack((long) top,
(void *)_thread_entry,
eflags, (long *)args,
nargs);
}
void k_cpu_idle(void)
{
z_sys_trace_idle();
__asm__ volatile("sti; hlt");
}
void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f)
{
/* Yes, there are five regsiters missing. See notes on
* xuk_entry_frame/xuk_stack_frame.
*/
printk("*** FATAL ERROR vector %d code %d\n", vector, err);
printk("*** RIP %d:0x%llx RSP %d:0x%llx RFLAGS 0x%llx\n",
(int)f->cs, f->rip, (int)f->ss, f->rsp, f->rflags);
printk("*** RAX 0x%llx RCX 0x%llx RDX 0x%llx RSI 0x%llx RDI 0x%llx\n",
f->rax, f->rcx, f->rdx, f->rsi, f->rdi);
printk("*** R8 0x%llx R9 0x%llx R10 0x%llx R11 0x%llx\n",
f->r8, f->r9, f->r10, f->r11);
_NanoFatalErrorHandler(x86_64_except_reason, NULL);
}
void _isr_entry(void)
{
_arch_curr_cpu()->nested++;
}
void *_isr_exit_restore_stack(void *interrupted)
{
bool nested = (--_arch_curr_cpu()->nested) > 0;
void *next = _get_next_switch_handle(interrupted);
return (nested || next == interrupted) ? NULL : next;
}
struct {
void (*fn)(int, void*);
void *arg;
unsigned int esp;
} cpu_init[CONFIG_MP_NUM_CPUS];
/* Called from Zephyr initialization */
void _arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
void (*fn)(int, void *), void *arg)
{
cpu_init[cpu_num].arg = arg;
cpu_init[cpu_num].esp = (int)(long)(sz + (char *)stack);
/* This is our flag to the spinning CPU. Do this last */
cpu_init[cpu_num].fn = fn;
}
#ifdef CONFIG_IRQ_OFFLOAD
static irq_offload_routine_t offload_fn;
static void *offload_arg;
static void irq_offload_handler(void *arg, int err)
{
ARG_UNUSED(arg);
ARG_UNUSED(err);
offload_fn(offload_arg);
}
void irq_offload(irq_offload_routine_t fn, void *arg)
{
offload_fn = fn;
offload_arg = arg;
__asm__ volatile("int %0" : : "i"(CONFIG_IRQ_OFFLOAD_VECTOR));
}
#endif
/* Default. Can be overridden at link time by a timer driver */
void __weak x86_apic_timer_isr(void *arg, int code)
{
ARG_UNUSED(arg);
ARG_UNUSED(code);
}
/* Called from xuk layer on actual CPU start */
void _cpu_start(int cpu)
{
xuk_set_f_ptr(cpu, &_kernel.cpus[cpu]);
/* Set up the timer ISR, but ensure the timer is disabled */
xuk_set_isr(INT_APIC_LVT_TIMER, 13, x86_apic_timer_isr, 0);
_apic.INIT_COUNT = 0;
#ifdef CONFIG_IRQ_OFFLOAD
xuk_set_isr(XUK_INT_RAW_VECTOR(CONFIG_IRQ_OFFLOAD_VECTOR),
-1, irq_offload_handler, 0);
#endif
if (cpu <= 0) {
/* The SMP CPU startup function pointers act as init
* flags. Zero them here because this code is running
* BEFORE .bss is zeroed! Should probably move that
* out of _Cstart() for this architecture...
*/
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
cpu_init[i].fn = 0;
}
/* Enter Zephyr */
_Cstart();
} else if (cpu < CONFIG_MP_NUM_CPUS) {
/* SMP initialization. First spin, waiting for
* _arch_start_cpu() to be called from the main CPU
*/
while (!cpu_init[cpu].fn) {
}
/* Enter Zephyr, which will switch away and never return */
cpu_init[cpu].fn(0, cpu_init[cpu].arg);
}
/* Spin forever as a fallback */
while (1) {
}
}
/* Returns the initial stack to use for CPU startup on auxiliary (not
* cpu 0) processors to the xuk layer, which gets selected by the
* non-arch Zephyr kernel and stashed by _arch_start_cpu()
*/
unsigned int _init_cpu_stack(int cpu)
{
return cpu_init[cpu].esp;
}
int _arch_irq_connect_dynamic(unsigned int irq, unsigned int priority,
void (*routine)(void *parameter), void *parameter,
u32_t flags)
{
ARG_UNUSED(flags);
__ASSERT(priority >= 2 && priority <= 15,
"APIC interrupt priority must be 2-15");
xuk_set_isr(irq, priority, (void *)routine, parameter);
return 0;
}
void _arch_irq_disable(unsigned int irq)
{
xuk_set_isr_mask(irq, 1);
}
void _arch_irq_enable(unsigned int irq)
{
xuk_set_isr_mask(irq, 0);
}
void x86_apic_set_timeout(u32_t cyc_from_now)
{
_apic.INIT_COUNT = cyc_from_now;
}
const NANO_ESF _default_esf;
int x86_64_except_reason;
void _NanoFatalErrorHandler(unsigned int reason, const NANO_ESF *esf)
{
_SysFatalErrorHandler(reason, esf);
}
/* App-overridable handler. Does nothing here */
void __weak _SysFatalErrorHandler(unsigned int reason, const NANO_ESF *esf)
{
ARG_UNUSED(reason);
ARG_UNUSED(esf);
k_thread_abort(_current);
}

View file

@ -0,0 +1,25 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _XUK_CONFIG_H
#define _XUK_CONFIG_H
/* This file defines "kconfig" variables used by the xuk layer only in
* unit test situations where we aren't using pulling in the true
* autoconf.h
*/
#ifndef CONFIG_X86_64
/* #define CONFIG_XUK_DEBUG 1 */
/* The APIC timer will run 2^X times slower than the TSC. (X = 0-7) */
#define CONFIG_XUK_APIC_TSC_SHIFT 5
#define CONFIG_MP_NUM_CPUS 2
#define CONFIG_XUK_64_BIT_ABI 1
#endif /* CONFIG_X86_64 */
#endif /* _XUK_CONFIG_H */

View file

@ -0,0 +1,94 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "serial.h"
#include "x86_64-hw.h"
#include "shared-page.h"
/*
* 16 bit boot stub. This code gets copied into a low memory page and
* used as the bootstrap code for SMP processors, which always start
* in real mode. It is compiled with gcc's -m16 switch, which is a
* wrapper around the assembler's .code16gcc directive which cleverly
* takes 32 bit assembly and "fixes" it with appropriate address size
* prefixes to run in real mode on a 386.
*
* It is just code! We have the .text segment and NOTHING ELSE. No
* static or global variables can be used, nor const read-only data.
* Neither is the linker run, so nothing can be relocated and all
* symbolic references need to be to addresses within this file. In
* fact, any relocations that do sneak in will be left at zero at
* runtime!
*/
__asm__(" cli\n"
" xor %ax, %ax\n"
" mov %ax, %ss\n"
" mov %ax, %ds\n"
" mov $80000, %esp\n" /* FIXME: put stack someplace officiallerish */
" jmp _start16\n");
void _start16(void)
{
#ifdef XUK_DEBUG
serial_putc('1'); serial_putc('6'); serial_putc('\n');
#endif
/* First, serialize on a simple spinlock. Note there's a
* theoretical flaw here in that we are on a shared stack with the
* other CPUs here and we don't *technically* know that "oldlock"
* does not get written to the (clobberable!) stack memory. But
* in practice the compiler does the right thing here and we spin
* in registers until exiting the loop, at which point we are the
* only users of the stack, and thus safe.
*/
int oldlock;
do {
__asm__ volatile("pause; mov $1, %%eax; xchg %%eax, (%1)"
: "=a"(oldlock) : "m"(_shared.smpinit_lock));
} while (oldlock);
/* Put a red banner at the top of the screen to announce our
* presence
*/
volatile unsigned short *vga = (unsigned short *)0xb8000;
for (int i = 0; i < 240; i++)
vga[i] = 0xcc20;
/* Spin again waiting on the BSP processor to give us a stack. We
* won't use it until the entry code of stub32, but we want to
* make sure it's there before we jump.
*/
while (!_shared.smpinit_stack) {
}
/* Load the GDT the CPU0 already prepared for us */
__asm__ volatile ("lgdtw (%0)\n" : : "r"(_shared.gdt16_addr));
/* Enter protected mode by setting the bottom bit of CR0 */
int cr0;
__asm__ volatile ("mov %%cr0, %0\n" : "=r"(cr0));
cr0 |= 1;
__asm__ volatile ("mov %0, %%cr0\n" : : "r"(cr0));
/* Set up data and stack segments */
short ds = GDT_SELECTOR(2);
__asm__ volatile ("mov %0, %%ds; mov %0, %%ss" : : "r"(ds));
/* Far jump to the 32 bit entry point, passing a cookie in EAX to
* tell it what we're doing
*/
int magic = BOOT_MAGIC_STUB16;
__asm__ volatile ("ljmpl $0x8,$0x100000" : : "a"(magic));
while (1) {
__asm__("hlt");
}
}

View file

@ -0,0 +1,297 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "xuk-config.h"
#include "shared-page.h"
#include "x86_64-hw.h"
#ifdef CONFIG_XUK_DEBUG
#include "printf.h"
#include "vgacon.h"
#include "serial.h"
#else
int printf(const char *fmt, ...)
{
return 0;
}
#endif
/* This i386 code stub is designed to link internally (i.e. it shares
* nothing with the 64 bit world) and be loaded into RAM in high
* memory (generally at 0x100000) in a single (R/W/X) block with its
* .text, .rodata, .data and .bss included. Its stack lives in the
* fifth page of memory at 0x04000-0x4fff. After finishing 64 bit
* initialization, it will JMP to the 16-byte-aligned address that
* immediately follows this block in memory (exposed by the linker as
* _start64), which should then be able to run in an environment where
* all of physical RAM is mapped, except for the bottom 16kb.
*
* Memory layout on exit:
*
* + Pages 0-3 are an unmapped NULL guard
* + Page 4: contains stack and bss for the setup code, and a GDT.
* After 64 bit setup, it's likely this will be reused .
* + Pages 5-11: are the bootstrap page table
*
* Note that the initial page table makes no attempt to identify
* memory regions. Everything in the first 4G is mapped as cachable
* RAM. MMIO drivers will need to remap their memory based on PCI BAR
* regions or whatever.
*/
/* Cute trick to turn a preprocessor macro containing a number literal
* into a string immediate in gcc basic asm context
*/
#define _ASM_IMM(s) #s
#define ASM_IMM(s) "$" _ASM_IMM(s)
/* Entry point, to be linked at the very start of the image. Set a
* known-good stack (either the top of the shared page for the boot
* CPU, or one provided by stub16 on others), push the multiboot
* arguments in EAX, EBX and call into C code.
*/
__asm__(".pushsection .start32\n"
" mov $0x5000, %esp\n"
" xor %edx, %edx\n"
" cmp " ASM_IMM(BOOT_MAGIC_STUB16) ", %eax\n"
" cmove 0x4000(%edx), %esp\n"
" pushl %ebx\n"
" pushl %eax\n"
" call cstart\n"
".popsection\n");
/* The multiboot header can be anywhere in the first 4k of the file.
* This stub doesn't get that big, so we don't bother with special
* linkage.
*/
#define MULTIBOOT_MAGIC 0x1badb002
#define MULTIBOOT_FLAGS (1<<1) /* 2nd bit is "want memory map" */
const int multiboot_header[] = {
MULTIBOOT_MAGIC,
MULTIBOOT_FLAGS,
-(MULTIBOOT_MAGIC + MULTIBOOT_FLAGS), /* csum: -(magic+flags) */
};
/* Creates and returns a generic/sane page table for 64 bit startup
* (64 bit mode requires paging enabled). All of the bottom 4G
* (whether backing memory is present or not) gets a mapping with 2M
* pages, except that the bottom 2M are mapped with 4k pages and leave
* the first four pages unmapped as a NULL guard.
*
* Makes no attempt to identify non-RAM/MMIO regions, it just maps
* everything. We rely on the firmware to have set up MTRRs for us
* where needed, otherwise that will all be cacheable memory.
*/
void *init_page_tables(void)
{
/* Top level PML4E points to a single PDPTE in its first entry */
struct pte64 *pml4e = alloc_page(1);
struct pte64 *pdpte = alloc_page(1);
pml4e[0].addr = (unsigned long)pdpte;
pml4e[0].present = 1;
pml4e[0].writable = 1;
/* The PDPTE has four entries covering the first 4G of memory,
* each pointing to a PDE
*/
for (unsigned int gb = 0; gb < 4; gb++) {
struct pte64 *pde = alloc_page(0);
pdpte[gb].addr = (unsigned long)pde;
pdpte[gb].present = 1;
pdpte[gb].writable = 1;
/* Each PDE filled with 2M supervisor pages */
for (int i = 0; i < 512; i++) {
if (!(gb == 0 && i == 0)) {
pde[i].addr = (gb << 30) | (i << 21);
pde[i].present = 1;
pde[i].writable = 1;
pde[i].pagesize_pat = 1;
} else {
/* EXCEPT the very first entry of the
* first GB, which is a pointer to a
* PTE of 4k pages so that we can have
* a 16k (4-page) NULL guard unmapped.
*/
struct pte64 *pte = alloc_page(0);
pde[0].addr = (unsigned long)pte;
pde[0].present = 1;
pde[0].writable = 1;
for (int j = 0; j < 512; j++) {
if (j < 4) {
pte[j].addr = 0;
} else {
pte[j].addr = j << 12;
pte[j].present = 1;
pte[j].writable = 1;
}
}
}
}
}
/* Flush caches out of paranoia. In theory, x86 page walking
* happens downstream of the system-coherent dcache and this
* isn't needed.
*/
__asm__ volatile("wbinvd");
return pml4e;
}
#ifdef CONFIG_XUK_DEBUG
void putchar(int c)
{
serial_putc(c);
vgacon_putc(c);
}
#endif
void cstart(unsigned int magic, unsigned int arg)
{
if (magic == BOOT_MAGIC_STUB16) {
printf("SMP CPU up in 32 bit protected mode. Stack ~%xh\n",
&magic);
}
if (magic != BOOT_MAGIC_STUB16) {
shared_init();
#ifdef CONFIG_XUK_DEBUG
serial_init();
_putchar = putchar;
#endif
printf("Entering stub32 on boot cpu, magic %xh stack ~%xh\n",
magic, (int)&magic);
}
/* The multiboot memory map turns out not to be very useful.
* The basic numbers logged here are only a subset of the true
* memory map if it has holes or >4G memory, and the full map
* passed in the second argument tends to live in low memory
* and get easily clobbered by our own muckery. If we care
* about reading memory maps at runtime we probably want to be
* using BIOS e820 like Linux does.
*/
if (magic == BOOT_MAGIC_MULTIBOOT) {
printf("Hi there!\n");
printf("This is a second line!\n");
printf("And this line was generated from %s\n", "printf!");
printf("Magic: %p MBI Addr: %p\n", magic, arg);
int mem_lower = *(int *)(arg + 4);
int mem_upper = *(int *)(arg + 8);
int mmap_length = *(int *)(arg + 44);
int *mmap_addr = *(void **)(arg + 48);
printf("mem lower %d upper %d mmap_len %d mmap_addr %p\n",
mem_lower, mem_upper, mmap_length, mmap_addr);
}
/* Choose a stack pointer and CPU ID for the 64 bit code to
* use. Then if we're not the boot CPU, release the spinlock
* (taken in stub16) so the other CPUs can continue.
*/
int cpu_id = 0;
unsigned int init_stack = 0x5000;
if (magic == BOOT_MAGIC_STUB16) {
cpu_id = _shared.num_active_cpus++;
init_stack = _shared.smpinit_stack;
_shared.smpinit_stack = 0;
__asm__ volatile("movl $0, (%0)" : : "m"(_shared.smpinit_lock));
}
/* Page table goes in CR3. This is a noop until paging is
* enabled later
*/
if (magic != BOOT_MAGIC_STUB16) {
_shared.base_cr3 = (unsigned int)init_page_tables();
}
SET_CR("cr3", _shared.base_cr3);
/* Enable PAE bit (5) in CR4, required because in long mode
* we'll be using the 64 bit page entry format. Likewise a
* noop until the CPU starts loading pages.
*/
SET_CR_BIT("cr4", 5);
/* Set LME (long mode enable) in IA32_EFER. Still not a mode
* transition, simply tells the CPU that, once paging is
* enabled, we should enter long mode. At that point the LMA
* bit (10) will be set to indicate that it's active.
*/
const int MSR_IA32_EFER = 0xc0000080;
set_msr_bit(MSR_IA32_EFER, 8);
/* NOW we transition by turning paging on. The CPU will start
* page translation (which has been carefully
* identity-mapped!) and enter the 32 bit compatibility
* submode of long mode. So we're reading 64 bit page tables
* but still executing 32 bit instructions.
*/
SET_CR_BIT("cr0", 31);
printf("Hello memory mapped world!\n");
/* Now we can enter true 64 bit long mode via a far call to a
* code segment with the 64 bit flag set. Allocate a 2-entry
* GDT (entry 0 is always a "null segment" architecturally and
* can't be used) here on the stack and throw it away after
* the jump. The 64 bit OS code will need to set the
* descriptors up for itself anyway
*/
struct gdt64 cs[] = {
{ },
{
.readable = 1,
.codeseg = 1,
.notsystem = 1,
.present = 1,
.long64 = 1,
},
};
/* The limit comes first, but is 16 bits. The dummy is there
* for alignment, though docs aren't clear on whether it's
* required or not
*/
struct {
unsigned short dummy;
unsigned short limit;
unsigned int addr;
} gdtp = { .limit = sizeof(cs), .addr = (int)&cs[0], };
printf("CS descriptor 0x%x 0x%x\n", cs[1].dwords[1], cs[1].dwords[0]);
__asm__ volatile("lgdt %0" : : "m"(gdtp.limit) : "memory");
/* Finally, make a far jump into the 64 bit world. The entry
* point is a 16-byte-aligned address that immediately follows
* our stub, and is exposed by our linkage as "_start64".
*
* Indirect far jumps have a similar crazy setup to descriptor
* tables, but here the segment selector comes last so no
* alignment worries.
*
* The 64 bit entry reuses the same stack we're on, and takes
* the cpu_id in its first argument.
*/
extern int _start64;
unsigned int jmpaddr = (unsigned int) &_start64;
struct {
unsigned int addr;
unsigned short segment;
} farjmp = { .segment = GDT_SELECTOR(1), .addr = jmpaddr };
printf("Making far jump to 64 bit mode @%xh...\n", &_start64);
__asm__ volatile("mov %0, %%esp; ljmp *%1" ::
"r"(init_stack), "m"(farjmp), "D"(cpu_id)
: "memory");
}

View file

@ -0,0 +1,21 @@
ENTRY(_start)
PHDRS {
stub32 PT_LOAD;
}
SECTIONS {
. = 0x100000;
_start = .;
.stub32 : {
*(.start32)
*(.text*)
*(.rodata*)
*(.data*)
*(.bss*)
*(COMMON)
} :stub32
. = ALIGN(16);
_start64 = .;
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
/* This "C" file exists solely to include the contents of
* separately-compiled binary stubs into the link. It's easier than
* trying to objcopy the contents into linkable object files,
* especially when combined with cmake's somewhat odd special-cased
* dependency handling (which works fine with C files, of course).
*/
/* The 32 bit stub is our entry point and goes into a separate linker
* section so it can be placed correctly
*/
__asm__(".section .xuk_stub32\n"
".incbin \"xuk-stub32.bin\"\n");
/* The 16 bit stub is the start of execution for auxiliary SMP CPUs
* (also for real mode traps if we ever want to expose that
* capability) and just lives in rodata. It has to be copied into low
* memory by the kernel once it is running.
*/
__asm__(".section .rodata\n"
".globl _xuk_stub16_start\n"
"_xuk_stub16_start:\n"
".incbin \"xuk-stub16.bin\"\n"
".globl _xuk_stub16_end\n"
"_xuk_stub16_end:\n");

629
arch/x86_64/core/xuk.c Normal file
View file

@ -0,0 +1,629 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "xuk-config.h"
#include "x86_64-hw.h"
#include "xuk.h"
#include "serial.h"
#ifdef CONFIG_XUK_DEBUG
#include "vgacon.h"
#include "printf.h"
#else
#define printf(...)
#endif
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
/* Defined at the linker level in xuk-stubs.c */
extern char _xuk_stub16_start, _xuk_stub16_end;
/* 64 bit entry point. Lives immediately after the 32 bit stub.
* Expects to have its stack already set up.
*/
__asm__(".pushsection .xuk_start64\n"
".align 16\n"
" jmp _cstart64\n"
".popsection\n");
/* Interrupt/exception entry points stored in the IDT.
*
* FIXME: the assembly below uses XCHG r/m, because I'm lazy and this
* was SO much easier than hand coding the musical chairs required to
* emulate it. But that instruction is outrageously slow (like 20+
* cycle latency on most CPUs!), and this is interrupt entry.
* Replace, once we have a test available to detect bad register
* contents
*/
extern char _isr_entry_err, _isr_entry_noerr;
__asm__(/* Exceptions that push an error code arrive here. */
".align 16\n"
"_isr_entry_err:\n"
" xchg %rdx, (%rsp)\n"
" jmp _isr_entry2\n"
/* IRQs with no error code land here, then fall through */
".align 16\n"
"_isr_entry_noerr:\n"
" push %rdx\n"
/* Arrive here with RDX already pushed to the stack below the
* interrupt frame and (if needed) populated with the error
* code from the exception. It will become the third argument
* to the C handler. Stuff the return address from the call
* in the stub table into RDI (the first argument).
*/
"_isr_entry2:\n"
" xchg %rdi, 8(%rsp)\n"
" push %rax\n"
" push %rcx\n"
" push %rsi\n"
" push %r8\n"
" push %r9\n"
" push %r10\n"
" push %r11\n"
" mov %rsp, %rsi\n" /* stack in second arg */
" call _isr_c_top\n"
/* We have pushed only the caller-save registers at this
* point. Check return value to see if we are returning back
* into the same context or if we need to do a full dump and
* restore.
*/
" test %rax, %rax\n"
" jnz _switch_bottom\n"
" pop %r11\n"
" pop %r10\n"
" pop %r9\n"
" pop %r8\n"
" pop %rsi\n"
" pop %rcx\n"
" pop %rax\n"
" pop %rdx\n"
" pop %rdi\n"
" iretq\n");
/* Top half of a context switch. Arrive here with the "CPU pushed"
* part of the exception frame (SS, RSP, RFLAGS, CS, RIP) already on
* the stack, the context pointer to which to switch stored in RAX and
* a pointer into which to store the current context in RDX (NOTE:
* this will be a pointer to a 32 bit memory location if we are in x32
* mode!). It will push the first half of the register set (the same
* caller-save registers pushed by an ISR) and then continue on to
* _switch_bottom to finish up.
*/
__asm__(".align 16\n"
".global _switch_top\n"
"_switch_top:\n"
" push %rdi\n"
" push %rdx\n"
" push %rax\n"
" push %rcx\n"
" push %rsi\n"
" push %r8\n"
" push %r9\n"
" push %r10\n"
" push %r11\n"
" mov %rsp, %r8\n"
" sub $48, %r8\n"
#ifdef CONFIG_XUK_64_BIT_ABI
" movq %r8, (%rdx)\n"
#else
" movl %r8d, (%rdx)\n"
#endif
/* Fall through... */
/* Bottom half of a switch, used by both ISR return and
* context switching. Arrive here with the exception frame
* and caller-saved registers already on the stack and the
* stack pointer to use for the restore in RAX. It will push
* the remaining registers and then restore.
*/
".align 16\n"
"_switch_bottom:\n"
" push %rbx\n"
" push %rbp\n"
" push %r12\n"
" push %r13\n"
" push %r14\n"
" push %r15\n"
" mov %rax, %rsp\n"
" pop %r15\n"
" pop %r14\n"
" pop %r13\n"
" pop %r12\n"
" pop %rbp\n"
" pop %rbx\n"
" pop %r11\n"
" pop %r10\n"
" pop %r9\n"
" pop %r8\n"
" pop %rsi\n"
" pop %rcx\n"
" pop %rax\n"
" pop %rdx\n"
" pop %rdi\n"
" iretq\n");
static unsigned int isr_stub_base;
struct vhandler {
void (*fn)(void*, int);
void *arg;
};
static struct vhandler *vector_handlers;
static void putchar(int c)
{
serial_putc(c);
#ifdef XUK_DEBUG
vgacon_putc(c);
#endif
}
long _isr_c_top(unsigned long vecret, unsigned long rsp,
unsigned long err)
{
/* The vector stubs are 8-byte-aligned, so to get the vector
* index from the return address we just shift off the bottom
* bits
*/
int vector = (vecret - isr_stub_base) >> 3;
struct vhandler *h = &vector_handlers[vector];
struct xuk_entry_frame *frame = (void *)rsp;
_isr_entry();
/* Set current priority in CR8 to the currently-serviced IRQ
* and re-enable interrupts
*/
unsigned long long cr8, cr8new = vector >> 4;
__asm__ volatile("movq %%cr8, %0;"
"movq %1, %%cr8;"
"sti"
: "=r"(cr8) : "r"(cr8new));
if (h->fn) {
h->fn(h->arg, err);
} else {
_unhandled_vector(vector, err, frame);
}
/* Mask interrupts to finish processing (they'll get restored
* in the upcoming IRET) and restore CR8
*/
__asm__ volatile("cli; movq %0, %%cr8" : : "r"(cr8));
/* Signal EOI if it's an APIC-managed interrupt */
if (vector > 0x1f) {
_apic.EOI = 0;
}
/* Subtle: for the "interrupted context pointer", we pass in
* the value our stack pointer WILL have once we finish
* spilling registers after this function returns. If this
* hook doesn't want to switch, it will return null and never
* save the value of the pointer.
*/
return (long)_isr_exit_restore_stack((void *)(rsp - 48));
}
static long choose_isr_entry(int vector)
{
/* Constructed with 1's in the vector indexes defined to
* generate an error code. Couldn't find a clean way to make
* the compiler generate this code
*/
const int mask = 0x27d00; /* 0b00100111110100000000 */
if (vector < 32 && ((1 << vector) & mask)) {
return (long)&_isr_entry_err;
} else {
return (long)&_isr_entry_noerr;
}
}
void xuk_set_isr(int interrupt, int priority,
void (*handler)(void *, int), void *arg)
{
int v = interrupt - 0x100;
/* Need to choose a vector number? Try all vectors at the
* specified priority. Clobber one if we have to.
*/
if (interrupt < 0x100 || interrupt > 0x1ff) {
for (int pi = 0; pi <= 0xf; pi++) {
v = (priority << 4) | pi;
if (!vector_handlers[v].fn) {
break;
}
}
}
/* Need to set up IO-APIC? Set it up to deliver to all CPUs
* here (another API later will probably allow for IRQ
* affinity). Do a read/write cycle to avoid clobbering
* settings like edge triggering & polarity that might have
* been set up by other platform layers. We only want to muck
* with routing.
*/
if (interrupt < 0x100) {
struct ioapic_red red;
int regidx = 0x10 + 2 * interrupt;
red.regvals[0] = ioapic_read(regidx);
red.regvals[1] = ioapic_read(regidx + 1);
red.vector = v;
red.logical = 0;
red.destination = 0xff;
red.masked = 1;
ioapic_write(regidx, red.regvals[0]);
ioapic_write(regidx + 1, red.regvals[1]);
}
/* Is it a special interrupt? */
if (interrupt == INT_APIC_LVT_TIMER) {
struct apic_lvt lvt = {
.vector = v,
.mode = ONESHOT,
};
_apic.LVT_TIMER = lvt;
}
printf("set_isr v %d\n", v);
vector_handlers[v].fn = handler;
vector_handlers[v].arg = arg;
}
/* Note: "raw vector" interrupt numbers cannot be masked, as the APIC
* doesn't have a per-vector mask bit. Only specific LVT interrupts
* (we handle timer below) and IOAPIC-generated interrupts can be
* masked on x86. In practice, this isn't a problem as that API is a
* special-purpose kind of thing. Real devices will always go through
* the supported channel.
*/
void xuk_set_isr_mask(int interrupt, int masked)
{
if (interrupt == INT_APIC_LVT_TIMER) {
struct apic_lvt lvt = _apic.LVT_TIMER;
lvt.masked = masked;
_apic.LVT_TIMER = lvt;
} else if (interrupt < 0x100) {
struct ioapic_red red;
int regidx = 0x10 + 2 * interrupt;
red.regvals[0] = ioapic_read(regidx);
red.regvals[1] = ioapic_read(regidx + 1);
red.masked = masked;
ioapic_write(regidx, red.regvals[0]);
ioapic_write(regidx + 1, red.regvals[1]);
}
}
/* Note: these base pointers live together in a big block. Eventually
* we will probably want one of them for userspace TLS, which means it
* will need to be retargetted to point somewhere within the
* application memory. But this is fine for now.
*/
static void setup_fg_segs(int cpu)
{
int fi = 3 + 2 * cpu, gi = 3 + 2 * cpu + 1;
struct gdt64 *fs = &_shared.gdt[fi];
struct gdt64 *gs = &_shared.gdt[gi];
gdt64_set_base(fs, (long)&_shared.fs_ptrs[cpu]);
gdt64_set_base(gs, (long)&_shared.gs_ptrs[cpu]);
int fsel = GDT_SELECTOR(fi), gsel = GDT_SELECTOR(gi);
__asm__("mov %0, %%fs; mov %1, %%gs" : : "r"(fsel), "r"(gsel));
}
static void init_gdt(void)
{
printf("Initializing 64 bit IDT\n");
/* Need a GDT for ourselves, not whatever the previous layer
* set up. The scheme is that segment zero is the null
* segment (required and enforced architecturally), segment
* one (selector 8) is the code segment, two (16) is a
* data/stack segment (ignored by code at runtime, but
* required to be present in the L/GDT when executing an
* IRET), and remaining segments come in pairs to provide
* FS/GS segment bases for each CPU.
*/
_shared.gdt[0] = (struct gdt64) {};
_shared.gdt[1] = (struct gdt64) {
.readable = 1,
.codeseg = 1,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
_shared.gdt[2] = (struct gdt64) {
.readable = 1,
.codeseg = 0,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
for (int i = 3; i < ARRAY_SIZE(_shared.gdt); i++) {
_shared.gdt[i] = (struct gdt64) {
.readable = 1,
.codeseg = 0,
.notsystem = 1,
.present = 1,
.long64 = 1,
};
}
}
static void init_idt(void)
{
printf("Initializing 64 bit IDT\n");
/* Make an IDT in the next unused page and fill in all 256
* entries
*/
struct idt64 *idt = alloc_page(0);
_shared.idt_addr = (unsigned int)(long)idt;
for (int i = 0; i < 256; i++) {
idt[i] = (struct idt64) {
.segment = GDT_SELECTOR(1),
.type = 14, /* == 64 bit interrupt gate */
.present = 1,
};
}
/* Hand-encode stubs for each vector that are a simple 5-byte
* CALL instruction to the single handler entry point. That's
* an opcode of 0xe8 followd by a 4-byte offset from the start
* of the next (!) instruction. The call is used to push a
* return address on the stack that points into the stub,
* allowing us to extract the vector index by what stub it
* points into.
*/
struct istub {
unsigned char opcode; /* 0xe8 == CALLQ */
int off;
unsigned char _unused[3];
} __attribute__((packed)) *stubtab = alloc_page(0);
isr_stub_base = (long)stubtab;
/* FIXME: on x32, the entries in this handlers table are half
* the size as a native 64 bit build, and could be packed into
* the same page as the stubs above, saving the page of low
* memory.
*/
vector_handlers = alloc_page(1);
for (int i = 0; i < 256; i++) {
struct istub *st = &stubtab[i];
st->opcode = 0xe8;
st->off = choose_isr_entry(i) - (long)st - 5;
idt64_set_isr(&idt[i], st);
}
}
static void smp_init(void)
{
/* Generate a GDT for the 16 bit stub to use when
* transitioning to 32 bit protected mode (so the poor thing
* doesn't have to do it itself). It can live right here on
* our stack.
*/
struct gdt64 gdt16[] = {
{},
{
.codeseg = 1,
.default_size = 1,
.readable = 1,
.notsystem = 1,
.present = 1,
.limit_lo16 = 0xffff,
.limit_hi4 = 0xf,
.page_granularity = 1,
},
{
.readable = 1,
.default_size = 1,
.notsystem = 1,
.present = 1,
.limit_lo16 = 0xffff,
.limit_hi4 = 0xf,
.page_granularity = 1,
},
};
struct {
short dummy;
short limit;
unsigned int addr;
} gdtp16 = { .limit = sizeof(gdt16), .addr = (long)&gdt16[0] };
_shared.gdt16_addr = (long)&gdtp16.limit;
/* FIXME: this is only used at startup, and only for a ~150
* byte chunk of code. Find a way to return it, or maybe put
* it in the low memory null guard instead?
*/
char *sipi_page = alloc_page(1);
int s16bytes = &_xuk_stub16_end - &_xuk_stub16_start;
printf("Copying %d bytes of 16 bit code into page %p\n",
s16bytes, (int)(long)sipi_page);
for (int i = 0; i < s16bytes; i++) {
sipi_page[i] = ((char *)&_xuk_stub16_start)[i];
}
/* First send an INIT interrupt to all CPUs. This resets them
* regardless of whatever they were doing and they enter a
* "wait for SIPI" state
*/
printf("Sending INIT IPI\n");
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = INIT,
.shorthand = NOTSELF,
};
while (_apic.ICR_LO.send_pending) {
}
/* Now send the startup IPI (SIPI) to all CPUs. They will
* begin executing in real mode with IP=0 and CS pointing to
* the page we allocated.
*/
_shared.smpinit_lock = 0;
_shared.smpinit_stack = 0;
_shared.num_active_cpus = 1;
printf("Sending SIPI IPI\n");
_apic.ICR_LO = (struct apic_icr_lo) {
.delivery_mode = STARTUP,
.shorthand = NOTSELF,
.vector = ((long)sipi_page) >> 12,
};
while (_apic.ICR_LO.send_pending) {
}
for (int i = 1; i < CONFIG_MP_NUM_CPUS; i++) {
_shared.smpinit_stack = _init_cpu_stack(i);
printf("Granting stack @ %xh to CPU %d\n",
_shared.smpinit_stack, i);
while (_shared.num_active_cpus <= i) {
__asm__("pause");
}
}
}
void _cstart64(int cpu_id)
{
if (cpu_id == 0) {
extern char __bss_start, __bss_end;
__builtin_memset(&__bss_start, 0, &__bss_end - &__bss_start);
}
#ifdef CONFIG_XUK_DEBUG
_putchar = putchar;
#endif
printf("\n==\nHello from 64 bit C code on CPU%d (stack ~%xh)\n",
cpu_id, (int)(long)&cpu_id);
printf("sizeof(int) = %d, sizeof(long) = %d, sizeof(void*) = %d\n",
sizeof(int), sizeof(long), sizeof(void *));
if (cpu_id == 0) {
init_gdt();
}
struct {
unsigned short dummy[3];
unsigned short limit;
unsigned long long addr;
} gdtp = { .limit = sizeof(_shared.gdt), .addr = (long)_shared.gdt };
printf("Loading 64 bit GDT\n");
__asm__ volatile("lgdt %0" : : "m"(gdtp.limit));
/* Need to actually set the data & stack segments with those
* indexes. Whatever we have in those hidden registers works
* for data access *now*, but the next interrupt will push
* whatever the selector index was, and we need to know that
* our table contains the same layout!
*/
int selector = GDT_SELECTOR(2);
__asm__ volatile("mov %0, %%ds; mov %0, %%ss" : : "r"(selector));
if (cpu_id == 0) {
init_idt();
}
struct {
unsigned short dummy[3];
unsigned short limit;
unsigned long long addr;
} idtp = { .limit = 4096, .addr = _shared.idt_addr };
printf("Loading IDT lim %d addr %xh\n", idtp.limit, idtp.addr);
__asm__ volatile("lidt %0" : : "m"(idtp.limit));
/* Classic PC architecture gotcha: disable 8259 PICs before
* they fires a timer interrupt into our exception table.
* Write 1's into the interrupt masks.
*/
if (cpu_id == 0) {
printf("Disabling 8259 PICs\n");
ioport_out8(0xa1, 0xff); /* slave */
ioport_out8(0x21, 0xff); /* master */
}
/* Enable APIC. Set both the MSR bit and the "software
* enable" bit in the spurious interrupt vector register.
*/
const unsigned int IA32_APIC_BASE = 0x1b;
printf("Enabling APIC id %xh ver %xh\n", _apic.ID, _apic.VER);
set_msr_bit(IA32_APIC_BASE, 11);
_apic.SPURIOUS |= 1<<8;
_apic.LDR = cpu_id << 24;
_apic.DIVIDE_CONF = APIC_DIVISOR(CONFIG_XUK_APIC_TSC_SHIFT);
printf("Initializing FS/GS segments for local CPU%d\n", cpu_id);
setup_fg_segs(cpu_id);
if (cpu_id == 0) {
printf("Brining up auxiliary CPUs...\n");
smp_init();
}
printf("Calling _cpu_start on CPU %d\n", cpu_id);
_cpu_start(cpu_id);
}
long xuk_setup_stack(long sp, void *fn, unsigned int eflags,
long *args, int nargs)
{
long long *f = (long long *)(sp & ~7) - 20;
/* FIXME: this should extend naturally to setting up usermode
* frames too: the frame should have a SS and RSP at the top
* that specifies the user stack into which to return (can be
* this same stack as long as the mapping is correct), and the
* CS should be a separate ring 3 segment.
*/
f[19] = GDT_SELECTOR(2);
f[18] = sp;
f[17] = eflags;
f[16] = GDT_SELECTOR(1);
f[15] = (long)fn;
f[14] = nargs >= 1 ? args[0] : 0; /* RDI */
f[13] = nargs >= 3 ? args[2] : 0; /* RDX */
f[12] = 0; /* RAX */
f[11] = nargs >= 4 ? args[3] : 0; /* RCX */
f[10] = nargs >= 2 ? args[1] : 0; /* RSI */
f[9] = nargs >= 5 ? args[4] : 0; /* R8 */
f[8] = nargs >= 6 ? args[5] : 0; /* R9 */
/* R10, R11, RBX, RBP, R12, R13, R14, R15 */
for (int i = 7; i >= 0; i--) {
f[i] = 0;
}
return (long)f;
}
int z_arch_printk_char_out(int c)
{
putchar(c);
return 0;
}

171
arch/x86_64/core/xuk.h Normal file
View file

@ -0,0 +1,171 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _XUK_H
#define _XUK_H
#include <xuk-switch.h>
#include "shared-page.h"
/*
* APIs exposed by the xuk layer to OS integration:
*/
/* Set a single CPU-specific pointer which can be retrieved (on that
* CPU!) with get_f_ptr()
*/
static inline void xuk_set_f_ptr(int cpu, void *p)
{
_shared.fs_ptrs[cpu] = (long)p;
}
/* Likewise, but "G" */
static inline void xuk_set_g_ptr(int cpu, void *p)
{
_shared.gs_ptrs[cpu] = (long)p;
}
/* Retrieves the pointer set by set_f_ptr() for the current CPU */
static inline void *xuk_get_f_ptr()
{
long long ret, off = 0;
__asm__("movq %%fs:(%1), %0" : "=r"(ret) : "r"(off));
return (void *)(long)ret;
}
/* Retrieves the pointer set by set_g_ptr() for the current CPU */
static inline void *xuk_get_g_ptr()
{
long long ret, off = 0;
__asm__("movq %%gs:(%1), %0" : "=r"(ret) : "r"(off));
return (void *)(long)ret;
}
/**
* @brief Sets a global handler for the specified interrupt.
*
* Interrupt numbers live in a partitioned space:
*
* + Values from 0 - 0xff are mapped to INTIx interrupts in the global
* index of IO-APIC inputs, which on many systems correspond to
* legacy IRQ0-IRQ15 interrupts at the bottom of the interrupt
* range. These handlers are not passed a meaningful value in their
* first argument, though the function pointer type declares one.
*
* + Values from 0x100 to 0x1ff are mapped to raw vectors 0x00-0xff
* and can be used for handling exceptions, for INT instructions, or
* for MSI- or IPI-directed interrupts that specifiy specific
* vectors.
*
* + Values outside this range may be exposed symbolically for other
* interrupts sources, for example local APIC LVT interrupts.
*
* If there is a pre-existing handler specified for a specified raw
* vector, this function will replace it.
*
* @param interrupt Interrupt number. See above for interpretation.
* @param priority Integer in the range 2-15. Higher-valued interrupts
* can interrupt lower ones. Ignored for raw vector
* numbers, as their priority is encoded in the top
* four bits of the vector number. A priority of zero
* is treated as "don't care" and the interrupt will
* be assigned the lowest available vector.
* @param handler Function pointer to invoke on interrupt receipt. It
* will be passed the specified argument as the first
* argument and the x86 exception error code (if any)
* in the second.
* @param arg Opaque value to pass to the handler when invoked.
*
*/
void xuk_set_isr(int interrupt, int priority,
void (*handler)(void *, int), void *arg);
#define INT_APIC_LVT_TIMER 0x200
#define XUK_INT_RAW_VECTOR(vector) ((vector)+0x100)
void xuk_set_isr_mask(int interrupt, int masked);
/* Stack frame on interrupt entry. Obviously they get pushed onto the
* stack in the opposite order than they appear here; the last few
* entries are the hardware frame. Note that not all registers are
* present, the ABI caller-save registers don't get pushed until after
* the handler as an optimization.
*/
struct xuk_entry_frame {
unsigned long long r11;
unsigned long long r10;
unsigned long long r9;
unsigned long long r8;
unsigned long long rsi;
unsigned long long rcx;
unsigned long long rax;
unsigned long long rdx;
unsigned long long rdi;
unsigned long long rip;
unsigned long long cs;
unsigned long long rflags;
unsigned long long rsp;
unsigned long long ss;
};
/* Full stack frame, i.e. the one used as the handles in xuk_switch().
* Once more, the registers declared here are NOT POPULATED during the
* execution of an interrupt service routine.
*/
struct xuk_stack_frame {
unsigned long long r15;
unsigned long long r14;
unsigned long long r13;
unsigned long long r12;
unsigned long long rbp;
unsigned long long rbx;
struct xuk_entry_frame entry;
};
/* Sets up a new stack. The sp argument should point to the quadword
* above (!) the allocated stack area (i.e. the frame will be pushed
* below it). The frame will be set up to enter the function in the
* specified code segment with the specified flags register. An array
* of up to 6 function arguments may also be provided. Returns a
* handle suitable for passing to switch() or for returning from
* isr_exit_restore_stack().
*/
long xuk_setup_stack(long sp, void *fn, unsigned int eflags,
long *args, int nargs);
/*
* OS-defined utilities required by the xuk layer:
*/
/* Returns the address of a stack pointer in 32 bit memory to be used
* by AP processor bootstraping and startup.
*/
unsigned int _init_cpu_stack(int cpu);
/* OS CPU startup entry point, running on the stack returned by
* init_cpu_stack()
*/
void _cpu_start(int cpu);
/* Called on receipt of an unregistered interrupt/exception. Passes
* the vector number and the CPU error code, if any.
*/
void _unhandled_vector(int vector, int err, struct xuk_entry_frame *f);
/* Called on ISR entry before nested interrupts are enabled so the OS
* can arrange bookeeping. Really should be exposed as an inline and
* not a function call; cycles on interrupt entry are precious.
*/
void _isr_entry(void);
/* Called on ISR exit to choose a next thread to run. The argument is
* a context pointer to the thread that was interrupted.
*/
void *_isr_exit_restore_stack(void *interrupted);
#endif /* _XUK_H */

15
arch/x86_64/core/xuk64.ld Normal file
View file

@ -0,0 +1,15 @@
SECTIONS {
. = 0x100000;
.text : {
*(.xuk_stub32)
. = ALIGN(16);
*(.xuk_start64*)
*(.text*)
}
.rodata : { *(.rodata*) }
.data : { *(.data*) }
__bss_start = .;
.bss : { *(.bss*) *(COMMON) }
__bss_end = .;
}

View file

@ -0,0 +1,11 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _KERNEL_ARCH_DATA_H
#define _KERNEL_ARCH_DATA_H
struct _kernel_arch { };
#endif /* _KERNEL_ARCH_DATA_H */

View file

@ -0,0 +1,105 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _KERNEL_ARCH_FUNC_H
#define _KERNEL_ARCH_FUNC_H
#include <irq.h>
#include <xuk-switch.h>
static inline void kernel_arch_init(void)
{
/* This is a noop, we already took care of things before
* _Cstart() is entered
*/
}
static inline struct _cpu *_arch_curr_cpu(void)
{
long long ret, off = 0;
/* The struct _cpu pointer for the current CPU lives at the
* start of the the FS segment
*/
__asm__("movq %%fs:(%1), %0" : "=r"(ret) : "r"(off));
return (struct _cpu *)(long)ret;
}
static inline unsigned int _arch_irq_lock(void)
{
unsigned long long key;
__asm__ volatile("pushfq; cli; popq %0" : "=r"(key));
return (int)key;
}
static inline void _arch_irq_unlock(unsigned int key)
{
if (key & 0x200) {
__asm__ volatile("sti");
}
}
static inline void arch_nop(void)
{
__asm__ volatile("nop");
}
void _arch_irq_disable(unsigned int irq);
void _arch_irq_enable(unsigned int irq);
/* Not a standard Zephyr function, but probably will be */
static inline unsigned long long _arch_k_cycle_get_64(void)
{
unsigned int hi, lo;
__asm__ volatile("rdtsc" : "=d"(hi), "=a"(lo));
return (((unsigned long long)hi) << 32) | lo;
}
static inline unsigned int _arch_k_cycle_get_32(void)
{
#ifdef CONFIG_HPET_TIMER
extern u32_t _timer_cycle_get_32(void);
return _timer_cycle_get_32();
#else
return (u32_t)_arch_k_cycle_get_64();
#endif
}
#define _is_in_isr() (_arch_curr_cpu()->nested != 0)
static inline void _arch_switch(void *switch_to, void **switched_from)
{
xuk_switch(switch_to, switched_from);
}
static inline u32_t x86_apic_scaled_tsc(void)
{
u32_t lo, hi;
u64_t tsc;
__asm__ volatile("rdtsc" : "=a"(lo), "=d"(hi));
tsc = (((u64_t)hi) << 32) | lo;
return (u32_t)(tsc >> CONFIG_XUK_APIC_TSC_SHIFT);
}
void x86_apic_set_timeout(u32_t cyc_from_now);
#define _ARCH_IRQ_CONNECT(irq, pri, isr, arg, flags) \
_arch_irq_connect_dynamic(irq, pri, isr, arg, flags)
extern int x86_64_except_reason;
/* Vector 5 is the "bounds" exception which is otherwise vestigial
* (BOUND is an illegal instruction in long mode)
*/
#define _ARCH_EXCEPT(reason) do { \
x86_64_except_reason = reason; \
__asm__ volatile("int $5"); \
} while (false)
#endif /* _KERNEL_ARCH_FUNC_H */

View file

@ -0,0 +1,18 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _KERNEL_ARCH_THREAD_H
#define _KERNEL_ARCH_THREAD_H
/* Vestigial boilerplate. This must exist to it can be included in
* kernel.h to define these structs to provide types for fields in the
* Zephyr thread struct. But we don't need that for this arch.
*/
struct _caller_saved { };
struct _callee_saved { };
struct _thread_arch { };
#endif /* _KERNEL_ARCH_THREAD_H */

View file

@ -0,0 +1,5 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/

View file

@ -0,0 +1,28 @@
/*
* Copyright (c) 2018 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef _XUK_SWITCH_H
#define _XUK_SWITCH_H
/* This lives separate from the rest of the xuk API, as it has
* to be inlined into Zephyr code.
*/
static inline void xuk_switch(void *switch_to, void **switched_from)
{
/* Constructs an IRETQ interrupt frame, the final CALL pushes
* the RIP to which to return
*/
__asm__ volatile("mov %%rsp, %%rcx;"
"pushq $0x10;" /* SS */
"pushq %%rcx;" /* RSP */
"pushfq;" /* RFLAGS */
"pushq $0x08;" /* CS */
"callq _switch_top"
: : "a"(switch_to), "d"(switched_from)
: "ecx", "memory");
}
#endif /* _XUK_SWITCH_H */