From b0693ee41b4b8ef175050955ba61b0dd392bbda8 Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Mon, 7 Apr 2025 09:52:57 -0700 Subject: [PATCH] xtensa: userspace: handle privileged stack in arch_cohere_stacks This extends arch_cohere_stacks() to handle privileged stacks of user threads when userspace is enabled. Signed-off-by: Daniel Leung --- arch/xtensa/include/kernel_arch_func.h | 92 ++++++++++++++++++++++++-- arch/xtensa/include/xtensa_asm2_s.h | 28 ++++++++ 2 files changed, 113 insertions(+), 7 deletions(-) diff --git a/arch/xtensa/include/kernel_arch_func.h b/arch/xtensa/include/kernel_arch_func.h index a7298497d29..8e0641f9d4b 100644 --- a/arch/xtensa/include/kernel_arch_func.h +++ b/arch/xtensa/include/kernel_arch_func.h @@ -93,6 +93,30 @@ static ALWAYS_INLINE void xtensa_cohere_stacks_cache_flush(size_t s_addr, size_t } } +/** + * @brief Flush and invalidate cache between two stack addresses. + * + * This flushes the cache lines between two stack addresses, + * beginning with the cache line including the start address, + * and ending with the cache line including the end address. + * Note that, contrary to xtensa_cohere_stacks_cache_invd(), + * the last cache line will be flushed and invalidated instead + * of being ignored. + * + * @param s_addr Starting address of memory region to have cache manipulated. + * @param e_addr Ending address of memory region to have cache manipulated. + */ +static ALWAYS_INLINE void xtensa_cohere_stacks_cache_flush_invd(size_t s_addr, size_t e_addr) +{ + const size_t first = ROUND_DOWN(s_addr, XCHAL_DCACHE_LINESIZE); + const size_t last = ROUND_UP(e_addr, XCHAL_DCACHE_LINESIZE); + size_t line; + + for (line = first; line < last; line += XCHAL_DCACHE_LINESIZE) { + __asm__ volatile("dhwbi %0, 0" :: "r"(line)); + } +} + static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread, void *old_switch_handle, struct k_thread *new_thread) @@ -120,9 +144,14 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread, size_t nend = nstack + new_thread->stack_info.size; size_t nsp = (size_t) new_thread->switch_handle; - int zero = 0; + uint32_t flush_end = 0; - __asm__ volatile("wsr %0, " ZSR_FLUSH_STR :: "r"(zero)); +#ifdef CONFIG_USERSPACE + /* End of old_thread privileged stack. */ + void *o_psp_end = old_thread->arch.psp; +#endif + + __asm__ volatile("wsr %0, " ZSR_FLUSH_STR :: "r"(flush_end)); if (old_switch_handle != NULL) { int32_t a0save; @@ -175,8 +204,13 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread, * to the stack top stashed in a special register. */ if (old_switch_handle != NULL) { - xtensa_cohere_stacks_cache_flush(osp, oend); - xtensa_cohere_stacks_cache_invd(ostack, osp); +#ifdef CONFIG_USERSPACE + if (o_psp_end == NULL) +#endif + { + xtensa_cohere_stacks_cache_flush(osp, oend); + xtensa_cohere_stacks_cache_invd(ostack, osp); + } } else { /* When in a switch, our current stack is the outbound * stack. Flush the single line containing the stack @@ -188,12 +222,56 @@ static ALWAYS_INLINE void arch_cohere_stacks(struct k_thread *old_thread, __asm__ volatile("mov %0, a1" : "=r"(osp)); osp -= 16; xtensa_cohere_stacks_cache_flush(osp, osp + 16); - xtensa_cohere_stacks_cache_invd(ostack, osp); - uint32_t end = oend; +#ifdef CONFIG_USERSPACE + if (o_psp_end == NULL) +#endif + { + xtensa_cohere_stacks_cache_invd(ostack, osp); - __asm__ volatile("wsr %0, " ZSR_FLUSH_STR :: "r"(end)); + flush_end = oend; + } } + +#ifdef CONFIG_USERSPACE + /* User threads need a bit more processing due to having + * privileged stack for handling syscalls. The privileged + * stack always immediately precedes the thread stack. + * + * Note that, with userspace enabled, we need to swap + * page table during context switch via function calls. + * This means that the stack is being actively used + * unlike the non-userspace case mentioned above. + * Therefore we need to set ZSR_FLUSH_STR to make sure + * we flush the cached data in the stack. + */ + if (o_psp_end != NULL) { + /* Start of old_thread privileged stack. + * + * struct xtensa_thread_stack_header wholly contains + * a array for the privileged stack, so we can use + * its size to calculate where the start is. + */ + size_t o_psp_start = (size_t)o_psp_end - sizeof(struct xtensa_thread_stack_header); + + if ((osp >= ostack) && (osp < oend)) { + /* osp in user stack. */ + xtensa_cohere_stacks_cache_invd(o_psp_start, osp); + + flush_end = oend; + } else if ((osp >= o_psp_start) && (osp < ostack)) { + /* osp in privileged stack. */ + xtensa_cohere_stacks_cache_flush(ostack, oend); + xtensa_cohere_stacks_cache_invd(o_psp_start, osp); + + flush_end = (size_t)old_thread->arch.psp; + } + } +#endif /* CONFIG_USERSPACE */ + + flush_end = ROUND_DOWN(flush_end, XCHAL_DCACHE_LINESIZE); + __asm__ volatile("wsr %0, " ZSR_FLUSH_STR :: "r"(flush_end)); + #endif /* !CONFIG_SCHED_CPU_MASK_PIN_ONLY */ } #endif diff --git a/arch/xtensa/include/xtensa_asm2_s.h b/arch/xtensa/include/xtensa_asm2_s.h index 8115c6f6869..5d819a3b351 100644 --- a/arch/xtensa/include/xtensa_asm2_s.h +++ b/arch/xtensa/include/xtensa_asm2_s.h @@ -532,6 +532,34 @@ _do_call_\@: SPILL_ALL_WINDOWS #endif +#if defined(CONFIG_KERNEL_COHERENCE) && \ + defined(CONFIG_USERSPACE) && \ + !defined(CONFIG_SCHED_CPU_MASK_PIN_ONLY) + + /* With userspace enabled, we need to swap page table via function calls + * above after returning from syscall handler above in CROSS_STACK_CALL. + * This means that the stack is being actively used, and so we need to + * flush the cached data in stack. + */ + + movi a2, 0 + xsr.ZSR_FLUSH a2 + beqz a2, _excint_noflush_\@ + + rsr.ZSR_CPU a3 + l32i a3, a3, \NEST_OFF + bnez a3, _excint_noflush_\@ + + mov a3, a1 + +_excint_flushloop_\@: + dhwb a3, 0 + addi a3, a3, XCHAL_DCACHE_LINESIZE + blt a3, a2, _excint_flushloop_\@ + +_excint_noflush_\@: +#endif /* CONFIG_KERNEL_COHERENCE && CONFIG_USERSPACE && !CONFIG_SCHED_CPU_MASK_PIN_ONLY */ + /* Restore A1 stack pointer from "next" handle. */ mov a1, a6