The timing_info benchmark on qemu_x86 shows this is a bit faster. Before: START - Time Measurement Timing results: Clock frequency: 1000 MHz Context switch : 896 cycles , 895 ns Interrupt latency : 768 cycles , 767 ns Tick overhead :14912 cycles , 14911 ns Thread creation :18688 cycles , 18687 ns Thread abort (non-running) :49216 cycles , 49215 ns Thread abort (_current) :55616 cycles , 55615 ns Thread suspend :11072 cycles , 11071 ns Thread resume :10272 cycles , 10271 ns Thread yield :12213 cycles , 12212 ns Thread sleep :17984 cycles , 17983 ns Heap malloc :21702 cycles , 21701 ns Heap free :15176 cycles , 15175 ns Semaphore take with context switch :19168 cycles , 19167 ns Semaphore give with context switch :18400 cycles , 18399 ns Semaphore take without context switch :2208 cycles , 2207 ns Semaphore give without context switch :4704 cycles , 4703 ns Mutex lock :1952 cycles , 1951 ns Mutex unlock :7936 cycles , 7935 ns Message queue put with context switch :20320 cycles , 20319 ns Message queue put without context switch :5792 cycles , 5791 ns Message queue get with context switch :22112 cycles , 22111 ns Message queue get without context switch :5312 cycles , 5311 ns Mailbox synchronous put :27936 cycles , 27935 ns Mailbox synchronous get :23392 cycles , 23391 ns Mailbox asynchronous put :11808 cycles , 11807 ns Mailbox get without context switch :20416 cycles , 20415 ns Drop to user mode :643712 cycles , 643711 ns User thread creation :652096 cycles , 652095 ns Syscall overhead :2720 cycles , 2719 ns Validation overhead k_object init :4256 cycles , 4255 ns Validation overhead k_object permission :4224 cycles , 4223 ns Time Measurement finished After: START - Time Measurement Timing results: Clock frequency: 1000 MHz Context switch : 896 cycles , 895 ns Interrupt latency : 768 cycles , 767 ns Tick overhead :14752 cycles , 14751 ns Thread creation :18464 cycles , 18463 ns Thread abort (non-running) :48992 cycles , 48991 ns Thread abort (_current) :55552 cycles , 55551 ns Thread suspend :10848 cycles , 10847 ns Thread resume :10048 cycles , 10047 ns Thread yield :12213 cycles , 12212 ns Thread sleep :17984 cycles , 17983 ns Heap malloc :21702 cycles , 21701 ns Heap free :15176 cycles , 15175 ns Semaphore take with context switch :19104 cycles , 19103 ns Semaphore give with context switch :18368 cycles , 18367 ns Semaphore take without context switch :1984 cycles , 1983 ns Semaphore give without context switch :4480 cycles , 4479 ns Mutex lock :1728 cycles , 1727 ns Mutex unlock :7712 cycles , 7711 ns Message queue put with context switch :20224 cycles , 20223 ns Message queue put without context switch :5568 cycles , 5567 ns Message queue get with context switch :22016 cycles , 22015 ns Message queue get without context switch :5088 cycles , 5087 ns Mailbox synchronous put :27840 cycles , 27839 ns Mailbox synchronous get :23296 cycles , 23295 ns Mailbox asynchronous put :11584 cycles , 11583 ns Mailbox get without context switch :20192 cycles , 20191 ns Drop to user mode :643616 cycles , 643615 ns User thread creation :651872 cycles , 651871 ns Syscall overhead :2464 cycles , 2463 ns Validation overhead k_object init :4032 cycles , 4031 ns Validation overhead k_object permission :4000 cycles , 3999 ns Time Measurement finished Signed-off-by: Daniel Leung <daniel.leung@intel.com>
319 lines
6.3 KiB
C
319 lines
6.3 KiB
C
/*
|
||
* Copyright (c) 2019 Intel Corp.
|
||
* SPDX-License-Identifier: Apache-2.0
|
||
*/
|
||
|
||
#ifndef ZEPHYR_INCLUDE_ARCH_X86_ARCH_H_
|
||
#define ZEPHYR_INCLUDE_ARCH_X86_ARCH_H_
|
||
|
||
#include <devicetree.h>
|
||
|
||
/* Changing this value will require manual changes to exception and IDT setup
|
||
* in locore.S for intel64
|
||
*/
|
||
#define Z_X86_OOPS_VECTOR 32
|
||
|
||
#if !defined(_ASMLANGUAGE)
|
||
|
||
#include <sys/sys_io.h>
|
||
#include <zephyr/types.h>
|
||
#include <stddef.h>
|
||
#include <stdbool.h>
|
||
#include <irq.h>
|
||
#include <arch/x86/mmustructs.h>
|
||
#include <arch/x86/thread_stack.h>
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif
|
||
|
||
static ALWAYS_INLINE void arch_irq_unlock(unsigned int key)
|
||
{
|
||
if ((key & 0x00000200U) != 0U) { /* 'IF' bit */
|
||
__asm__ volatile ("sti" ::: "memory");
|
||
}
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_out8(uint8_t data, io_port_t port)
|
||
{
|
||
__asm__ volatile("outb %b0, %w1" :: "a"(data), "Nd"(port));
|
||
}
|
||
|
||
static ALWAYS_INLINE uint8_t sys_in8(io_port_t port)
|
||
{
|
||
uint8_t ret;
|
||
|
||
__asm__ volatile("inb %w1, %b0" : "=a"(ret) : "Nd"(port));
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_out16(uint16_t data, io_port_t port)
|
||
{
|
||
__asm__ volatile("outw %w0, %w1" :: "a"(data), "Nd"(port));
|
||
}
|
||
|
||
static ALWAYS_INLINE uint16_t sys_in16(io_port_t port)
|
||
{
|
||
uint16_t ret;
|
||
|
||
__asm__ volatile("inw %w1, %w0" : "=a"(ret) : "Nd"(port));
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_out32(uint32_t data, io_port_t port)
|
||
{
|
||
__asm__ volatile("outl %0, %w1" :: "a"(data), "Nd"(port));
|
||
}
|
||
|
||
static ALWAYS_INLINE uint32_t sys_in32(io_port_t port)
|
||
{
|
||
uint32_t ret;
|
||
|
||
__asm__ volatile("inl %w1, %0" : "=a"(ret) : "Nd"(port));
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_write8(uint8_t data, mm_reg_t addr)
|
||
{
|
||
__asm__ volatile("movb %0, %1"
|
||
:
|
||
: "q"(data), "m" (*(volatile uint8_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
}
|
||
|
||
static ALWAYS_INLINE uint8_t sys_read8(mm_reg_t addr)
|
||
{
|
||
uint8_t ret;
|
||
|
||
__asm__ volatile("movb %1, %0"
|
||
: "=q"(ret)
|
||
: "m" (*(volatile uint8_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_write16(uint16_t data, mm_reg_t addr)
|
||
{
|
||
__asm__ volatile("movw %0, %1"
|
||
:
|
||
: "r"(data), "m" (*(volatile uint16_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
}
|
||
|
||
static ALWAYS_INLINE uint16_t sys_read16(mm_reg_t addr)
|
||
{
|
||
uint16_t ret;
|
||
|
||
__asm__ volatile("movw %1, %0"
|
||
: "=r"(ret)
|
||
: "m" (*(volatile uint16_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_write32(uint32_t data, mm_reg_t addr)
|
||
{
|
||
__asm__ volatile("movl %0, %1"
|
||
:
|
||
: "r"(data), "m" (*(volatile uint32_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
}
|
||
|
||
static ALWAYS_INLINE uint32_t sys_read32(mm_reg_t addr)
|
||
{
|
||
uint32_t ret;
|
||
|
||
__asm__ volatile("movl %1, %0"
|
||
: "=r"(ret)
|
||
: "m" (*(volatile uint32_t *)(uintptr_t) addr)
|
||
: "memory");
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_set_bit(mem_addr_t addr, unsigned int bit)
|
||
{
|
||
__asm__ volatile("btsl %1, %0"
|
||
: "+m" (*(volatile uint32_t *) (addr))
|
||
: "Ir" (bit)
|
||
: "memory");
|
||
}
|
||
|
||
static ALWAYS_INLINE void sys_clear_bit(mem_addr_t addr, unsigned int bit)
|
||
{
|
||
__asm__ volatile("btrl %1, %0"
|
||
: "+m" (*(volatile uint32_t *) (addr))
|
||
: "Ir" (bit));
|
||
}
|
||
|
||
static ALWAYS_INLINE int sys_test_bit(mem_addr_t addr, unsigned int bit)
|
||
{
|
||
int ret;
|
||
|
||
__asm__ volatile("btl %2, %1;"
|
||
"sbb %0, %0"
|
||
: "=r" (ret), "+m" (*(volatile uint32_t *) (addr))
|
||
: "Ir" (bit));
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE int sys_test_and_set_bit(mem_addr_t addr,
|
||
unsigned int bit)
|
||
{
|
||
int ret;
|
||
|
||
__asm__ volatile("btsl %2, %1;"
|
||
"sbb %0, %0"
|
||
: "=r" (ret), "+m" (*(volatile uint32_t *) (addr))
|
||
: "Ir" (bit));
|
||
|
||
return ret;
|
||
}
|
||
|
||
static ALWAYS_INLINE int sys_test_and_clear_bit(mem_addr_t addr,
|
||
unsigned int bit)
|
||
{
|
||
int ret;
|
||
|
||
__asm__ volatile("btrl %2, %1;"
|
||
"sbb %0, %0"
|
||
: "=r" (ret), "+m" (*(volatile uint32_t *) (addr))
|
||
: "Ir" (bit));
|
||
|
||
return ret;
|
||
}
|
||
|
||
#define sys_bitfield_set_bit sys_set_bit
|
||
#define sys_bitfield_clear_bit sys_clear_bit
|
||
#define sys_bitfield_test_bit sys_test_bit
|
||
#define sys_bitfield_test_and_set_bit sys_test_and_set_bit
|
||
#define sys_bitfield_test_and_clear_bit sys_test_and_clear_bit
|
||
|
||
/*
|
||
* Map of IRQ numbers to their assigned vectors. On IA32, this is generated
|
||
* at build time and defined via the linker script. On Intel64, it's an array.
|
||
*/
|
||
|
||
extern unsigned char _irq_to_interrupt_vector[];
|
||
|
||
#define Z_IRQ_TO_INTERRUPT_VECTOR(irq) \
|
||
((unsigned int) _irq_to_interrupt_vector[irq])
|
||
|
||
|
||
#endif /* _ASMLANGUAGE */
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif
|
||
|
||
#include <drivers/interrupt_controller/sysapic.h>
|
||
|
||
#ifdef CONFIG_X86_64
|
||
#include <arch/x86/intel64/arch.h>
|
||
#else
|
||
#include <arch/x86/ia32/arch.h>
|
||
#endif
|
||
|
||
#include <arch/common/ffs.h>
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif
|
||
|
||
#ifndef _ASMLANGUAGE
|
||
|
||
extern void arch_irq_enable(unsigned int irq);
|
||
extern void arch_irq_disable(unsigned int irq);
|
||
|
||
extern uint32_t z_timer_cycle_get_32(void);
|
||
|
||
static inline uint32_t arch_k_cycle_get_32(void)
|
||
{
|
||
return z_timer_cycle_get_32();
|
||
}
|
||
|
||
static ALWAYS_INLINE bool arch_irq_unlocked(unsigned int key)
|
||
{
|
||
return (key & 0x200) != 0;
|
||
}
|
||
|
||
/**
|
||
* @brief read timestamp register, 32-bits only, unserialized
|
||
*/
|
||
|
||
static ALWAYS_INLINE uint32_t z_do_read_cpu_timestamp32(void)
|
||
{
|
||
uint32_t rv;
|
||
|
||
__asm__ volatile("rdtsc" : "=a" (rv) : : "%edx");
|
||
|
||
return rv;
|
||
}
|
||
|
||
/**
|
||
* @brief read timestamp register ensuring serialization
|
||
*/
|
||
|
||
static inline uint64_t z_tsc_read(void)
|
||
{
|
||
union {
|
||
struct {
|
||
uint32_t lo;
|
||
uint32_t hi;
|
||
};
|
||
uint64_t value;
|
||
} rv;
|
||
|
||
#ifdef CONFIG_X86_64
|
||
/*
|
||
* According to Intel 64 and IA-32 Architectures Software
|
||
* Developer’s Manual, volume 3, chapter 8.2.5, LFENCE provides
|
||
* a more efficient method of controlling memory ordering than
|
||
* the CPUID instruction. So use LFENCE here, as all 64-bit
|
||
* CPUs have LFENCE.
|
||
*/
|
||
__asm__ volatile ("lfence");
|
||
#else
|
||
/* rdtsc & cpuid clobbers eax, ebx, ecx and edx registers */
|
||
__asm__ volatile (/* serialize */
|
||
"xorl %%eax,%%eax;"
|
||
"cpuid"
|
||
:
|
||
:
|
||
: "%eax", "%ebx", "%ecx", "%edx"
|
||
);
|
||
#endif
|
||
|
||
#ifdef CONFIG_X86_64
|
||
/*
|
||
* We cannot use "=A", since this would use %rax on x86_64 and
|
||
* return only the lower 32bits of the TSC
|
||
*/
|
||
__asm__ volatile ("rdtsc" : "=a" (rv.lo), "=d" (rv.hi));
|
||
#else
|
||
/* "=A" means that value is in eax:edx pair. */
|
||
__asm__ volatile ("rdtsc" : "=A" (rv.value));
|
||
#endif
|
||
|
||
return rv.value;
|
||
}
|
||
|
||
static ALWAYS_INLINE void arch_nop(void)
|
||
{
|
||
__asm__ volatile("nop");
|
||
}
|
||
|
||
#endif /* _ASMLANGUAGE */
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif
|
||
|
||
#endif /* ZEPHYR_INCLUDE_ARCH_X86_ARCH_H_ */
|