This header does not expose any public APIs, so move it under kernel/include and change files including it. Signed-off-by: Anas Nashif <anas.nashif@intel.com>
154 lines
3.7 KiB
C
154 lines
3.7 KiB
C
/*
|
|
* Copyright (c) 2019 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
#include <zephyr/kernel.h>
|
|
#include <zephyr/sys/printk.h>
|
|
#include <wait_q.h>
|
|
#include <ksched.h>
|
|
|
|
/* This is a scheduler microbenchmark, designed to measure latencies
|
|
* of specific low level scheduling primitives independent of overhead
|
|
* from application or API abstractions. It works very simply: a main
|
|
* thread creates a "partner" thread at a higher priority, the partner
|
|
* then sleeps using z_pend_curr_irqlock(). From this initial
|
|
* state:
|
|
*
|
|
* 1. The main thread calls z_unpend_first_thread()
|
|
* 2. The main thread calls z_ready_thread()
|
|
* 3. The main thread calls k_yield()
|
|
* (the kernel switches to the partner thread)
|
|
* 4. The partner thread then runs and calls z_pend_curr_irqlock() again
|
|
* (the kernel switches to the main thread)
|
|
* 5. The main thread returns from k_yield()
|
|
*
|
|
* It then iterates this many times, reporting timestamp latencies
|
|
* between each numbered step and for the whole cycle, and a running
|
|
* average for all cycles run.
|
|
*/
|
|
|
|
#define N_RUNS 1000
|
|
#define N_SETTLE 10
|
|
|
|
|
|
static K_THREAD_STACK_DEFINE(partner_stack, 1024);
|
|
static struct k_thread partner_thread;
|
|
|
|
_wait_q_t waitq;
|
|
|
|
enum {
|
|
UNPENDING,
|
|
UNPENDED_READYING,
|
|
READIED_YIELDING,
|
|
PARTNER_AWAKE_PENDING,
|
|
YIELDED,
|
|
NUM_STAMP_STATES
|
|
};
|
|
|
|
uint32_t stamps[NUM_STAMP_STATES];
|
|
|
|
static inline int _stamp(int state)
|
|
{
|
|
uint32_t t;
|
|
|
|
/* In theory the TSC has much lower overhead and higher
|
|
* precision. In practice it's VERY jittery in recent qemu
|
|
* versions and frankly too noisy to trust.
|
|
*/
|
|
#ifdef CONFIG_X86
|
|
__asm__ volatile("rdtsc" : "=a"(t) : : "edx");
|
|
#else
|
|
t = k_cycle_get_32();
|
|
#endif
|
|
|
|
stamps[state] = t;
|
|
return t;
|
|
}
|
|
|
|
/* #define stamp(s) printk("%s @ %d\n", #s, _stamp(s)) */
|
|
#define stamp(s) _stamp(s)
|
|
|
|
static void partner_fn(void *arg1, void *arg2, void *arg3)
|
|
{
|
|
ARG_UNUSED(arg1);
|
|
ARG_UNUSED(arg2);
|
|
ARG_UNUSED(arg3);
|
|
|
|
printk("Running %p\n", k_current_get());
|
|
|
|
while (true) {
|
|
unsigned int key = irq_lock();
|
|
|
|
z_pend_curr_irqlock(key, &waitq, K_FOREVER);
|
|
stamp(PARTNER_AWAKE_PENDING);
|
|
}
|
|
}
|
|
|
|
int main(void)
|
|
{
|
|
z_waitq_init(&waitq);
|
|
|
|
int main_prio = k_thread_priority_get(k_current_get());
|
|
int partner_prio = main_prio - 1;
|
|
|
|
k_tid_t th = k_thread_create(&partner_thread, partner_stack,
|
|
K_THREAD_STACK_SIZEOF(partner_stack),
|
|
partner_fn, NULL, NULL, NULL,
|
|
partner_prio, 0, K_NO_WAIT);
|
|
|
|
/* Let it start running and pend */
|
|
k_sleep(K_MSEC(100));
|
|
|
|
uint64_t tot = 0U;
|
|
uint32_t runs = 0U;
|
|
|
|
for (int i = 0; i < N_RUNS + N_SETTLE; i++) {
|
|
stamp(UNPENDING);
|
|
z_unpend_first_thread(&waitq);
|
|
stamp(UNPENDED_READYING);
|
|
z_ready_thread(th);
|
|
stamp(READIED_YIELDING);
|
|
|
|
/* z_ready_thread() does not reschedule, so this is
|
|
* guaranteed to be the point where we will yield to
|
|
* the new thread, which (being higher priority) will
|
|
* run immediately, and we'll wake up synchronously as
|
|
* soon as it pends.
|
|
*/
|
|
k_yield();
|
|
stamp(YIELDED);
|
|
|
|
uint32_t avg, whole = stamps[4] - stamps[0];
|
|
|
|
if (++runs > N_SETTLE) {
|
|
/* Only compute averages after the first ~10
|
|
* runs to let performance settle, cache
|
|
* effects in the host pollute the early
|
|
* data
|
|
*/
|
|
tot += whole;
|
|
avg = tot / (runs - 10);
|
|
} else {
|
|
tot = 0U;
|
|
avg = 0U;
|
|
}
|
|
|
|
/* For reference, an unmodified HEAD on qemu_x86 with
|
|
* !USERSPACE and SCHED_DUMB and using -icount
|
|
* shift=0,sleep=off,align=off, I get results of:
|
|
*
|
|
* unpend 132 ready 257 switch 278 pend 321 tot 988 (avg 900)
|
|
*/
|
|
printk("unpend %4d ready %4d switch %4d pend %4d tot %4d (avg %4d)\n",
|
|
stamps[1] - stamps[0],
|
|
stamps[2] - stamps[1],
|
|
stamps[3] - stamps[2],
|
|
stamps[4] - stamps[3],
|
|
whole, avg);
|
|
}
|
|
printk("fin\n");
|
|
return 0;
|
|
}
|