diff --git a/include/zephyr/arch/xtensa/arch.h b/include/zephyr/arch/xtensa/arch.h index 18b2a76f61e..06e60e8c73d 100644 --- a/include/zephyr/arch/xtensa/arch.h +++ b/include/zephyr/arch/xtensa/arch.h @@ -84,6 +84,142 @@ static ALWAYS_INLINE void arch_nop(void) } #endif + +#if defined(CONFIG_XTENSA_RPO_CACHE) +#if defined(CONFIG_ARCH_HAS_COHERENCE) +static inline bool arch_mem_coherent(void *ptr) +{ + size_t addr = (size_t) ptr; + + return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION; +} +#endif + +static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom) +{ + /* The math here is all compile-time: when the two regions + * differ by a power of two, we can convert between them by + * setting or clearing just one bit. Otherwise it needs two + * operations. + */ + uint32_t rxor = (rto ^ rfrom) << 29; + + rto <<= 29; + if (Z_IS_POW2(rxor)) { + if ((rxor & rto) == 0) { + return addr & ~rxor; + } else { + return addr | rxor; + } + } else { + return (addr & ~(7U << 29)) | rto; + } +} +/** + * @brief Return cached pointer to a RAM address + * + * The Xtensa coherence architecture maps addressable RAM twice, in + * two different 512MB regions whose L1 cache settings can be + * controlled independently. So for any given pointer, it is possible + * to convert it to and from a cached version. + * + * This function takes a pointer to any addressable object (either in + * cacheable memory or not) and returns a pointer that can be used to + * refer to the same memory through the L1 data cache. Data read + * through the resulting pointer will reflect locally cached values on + * the current CPU if they exist, and writes will go first into the + * cache and be written back later. + * + * @see arch_xtensa_uncached_ptr() + * + * @param ptr A pointer to a valid C object + * @return A pointer to the same object via the L1 dcache + */ +static inline void __sparse_cache *arch_xtensa_cached_ptr(void *ptr) +{ + return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr, + CONFIG_XTENSA_CACHED_REGION, + CONFIG_XTENSA_UNCACHED_REGION); +} + +/** + * @brief Return uncached pointer to a RAM address + * + * The Xtensa coherence architecture maps addressable RAM twice, in + * two different 512MB regions whose L1 cache settings can be + * controlled independently. So for any given pointer, it is possible + * to convert it to and from a cached version. + * + * This function takes a pointer to any addressable object (either in + * cacheable memory or not) and returns a pointer that can be used to + * refer to the same memory while bypassing the L1 data cache. Data + * in the L1 cache will not be inspected nor modified by the access. + * + * @see arch_xtensa_cached_ptr() + * + * @param ptr A pointer to a valid C object + * @return A pointer to the same object bypassing the L1 dcache + */ +static inline void *arch_xtensa_uncached_ptr(void __sparse_cache *ptr) +{ + return (void *)z_xtrpoflip((__sparse_force uint32_t)ptr, + CONFIG_XTENSA_UNCACHED_REGION, + CONFIG_XTENSA_CACHED_REGION); +} + +/* Utility to generate an unrolled and optimal[1] code sequence to set + * the RPO TLB registers (contra the HAL cacheattr macros, which + * generate larger code and can't be called from C), based on the + * KERNEL_COHERENCE configuration in use. Selects RPO attribute "2" + * for regions (including MMIO registers in region zero) which want to + * bypass L1, "4" for the cached region which wants writeback, and + * "15" (invalid) elsewhere. + * + * Note that on cores that have the "translation" option set, we need + * to put an identity mapping in the high bits. Also per spec + * changing the current code region (by definition cached) requires + * that WITLB be followed by an ISYNC and that both instructions live + * in the same cache line (two 3-byte instructions fit in an 8-byte + * aligned region, so that's guaranteed not to cross a cache line + * boundary). + * + * [1] With the sole exception of gcc's infuriating insistence on + * emitting a precomputed literal for addr + addrincr instead of + * computing it with a single ADD instruction from values it already + * has in registers. Explicitly assigning the variables to registers + * via an attribute works, but then emits needless MOV instructions + * instead. I tell myself it's just 32 bytes of .text, but... Sigh. + */ +#define _REGION_ATTR(r) \ + ((r) == 0 ? 2 : \ + ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \ + ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15))) + +#define _SET_ONE_TLB(region) do { \ + uint32_t attr = _REGION_ATTR(region); \ + if (XCHAL_HAVE_XLT_CACHEATTR) { \ + attr |= addr; /* RPO with translation */ \ + } \ + if (region != CONFIG_XTENSA_CACHED_REGION) { \ + __asm__ volatile("wdtlb %0, %1; witlb %0, %1" \ + :: "r"(attr), "r"(addr)); \ + } else { \ + __asm__ volatile("wdtlb %0, %1" \ + :: "r"(attr), "r"(addr)); \ + __asm__ volatile("j 1f; .align 8; 1:"); \ + __asm__ volatile("witlb %0, %1; isync" \ + :: "r"(attr), "r"(addr)); \ + } \ + addr += addrincr; \ +} while (0) + +#define ARCH_XTENSA_SET_RPO_TLB() do { \ + register uint32_t addr = 0, addrincr = 0x20000000; \ + FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \ +} while (0) + +#endif + #endif /* !defined(_ASMLANGUAGE) && !defined(__ASSEMBLER__) */ #endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_ARCH_H_ */ diff --git a/include/zephyr/arch/xtensa/cache.h b/include/zephyr/arch/xtensa/cache.h index 991fbafacaf..8c5ecf1401f 100644 --- a/include/zephyr/arch/xtensa/cache.h +++ b/include/zephyr/arch/xtensa/cache.h @@ -169,141 +169,6 @@ static ALWAYS_INLINE vid arch_icache_disable(void) -#if defined(CONFIG_XTENSA_RPO_CACHE) -#if defined(CONFIG_ARCH_HAS_COHERENCE) -static inline bool arch_mem_coherent(void *ptr) -{ - size_t addr = (size_t) ptr; - - return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION; -} -#endif - -static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom) -{ - /* The math here is all compile-time: when the two regions - * differ by a power of two, we can convert between them by - * setting or clearing just one bit. Otherwise it needs two - * operations. - */ - uint32_t rxor = (rto ^ rfrom) << 29; - - rto <<= 29; - if (Z_IS_POW2(rxor)) { - if ((rxor & rto) == 0) { - return addr & ~rxor; - } else { - return addr | rxor; - } - } else { - return (addr & ~(7U << 29)) | rto; - } -} -/** - * @brief Return cached pointer to a RAM address - * - * The Xtensa coherence architecture maps addressable RAM twice, in - * two different 512MB regions whose L1 cache settings can be - * controlled independently. So for any given pointer, it is possible - * to convert it to and from a cached version. - * - * This function takes a pointer to any addressable object (either in - * cacheable memory or not) and returns a pointer that can be used to - * refer to the same memory through the L1 data cache. Data read - * through the resulting pointer will reflect locally cached values on - * the current CPU if they exist, and writes will go first into the - * cache and be written back later. - * - * @see arch_xtensa_uncached_ptr() - * - * @param ptr A pointer to a valid C object - * @return A pointer to the same object via the L1 dcache - */ -static inline void __sparse_cache *arch_xtensa_cached_ptr(void *ptr) -{ - return (__sparse_force void __sparse_cache *)z_xtrpoflip((uint32_t) ptr, - CONFIG_XTENSA_CACHED_REGION, - CONFIG_XTENSA_UNCACHED_REGION); -} - -/** - * @brief Return uncached pointer to a RAM address - * - * The Xtensa coherence architecture maps addressable RAM twice, in - * two different 512MB regions whose L1 cache settings can be - * controlled independently. So for any given pointer, it is possible - * to convert it to and from a cached version. - * - * This function takes a pointer to any addressable object (either in - * cacheable memory or not) and returns a pointer that can be used to - * refer to the same memory while bypassing the L1 data cache. Data - * in the L1 cache will not be inspected nor modified by the access. - * - * @see arch_xtensa_cached_ptr() - * - * @param ptr A pointer to a valid C object - * @return A pointer to the same object bypassing the L1 dcache - */ -static inline void *arch_xtensa_uncached_ptr(void __sparse_cache *ptr) -{ - return (void *)z_xtrpoflip((__sparse_force uint32_t)ptr, - CONFIG_XTENSA_UNCACHED_REGION, - CONFIG_XTENSA_CACHED_REGION); -} - -/* Utility to generate an unrolled and optimal[1] code sequence to set - * the RPO TLB registers (contra the HAL cacheattr macros, which - * generate larger code and can't be called from C), based on the - * KERNEL_COHERENCE configuration in use. Selects RPO attribute "2" - * for regions (including MMIO registers in region zero) which want to - * bypass L1, "4" for the cached region which wants writeback, and - * "15" (invalid) elsewhere. - * - * Note that on cores that have the "translation" option set, we need - * to put an identity mapping in the high bits. Also per spec - * changing the current code region (by definition cached) requires - * that WITLB be followed by an ISYNC and that both instructions live - * in the same cache line (two 3-byte instructions fit in an 8-byte - * aligned region, so that's guaranteed not to cross a cache line - * boundary). - * - * [1] With the sole exception of gcc's infuriating insistence on - * emitting a precomputed literal for addr + addrincr instead of - * computing it with a single ADD instruction from values it already - * has in registers. Explicitly assigning the variables to registers - * via an attribute works, but then emits needless MOV instructions - * instead. I tell myself it's just 32 bytes of .text, but... Sigh. - */ -#define _REGION_ATTR(r) \ - ((r) == 0 ? 2 : \ - ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \ - ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15))) - -#define _SET_ONE_TLB(region) do { \ - uint32_t attr = _REGION_ATTR(region); \ - if (XCHAL_HAVE_XLT_CACHEATTR) { \ - attr |= addr; /* RPO with translation */ \ - } \ - if (region != CONFIG_XTENSA_CACHED_REGION) { \ - __asm__ volatile("wdtlb %0, %1; witlb %0, %1" \ - :: "r"(attr), "r"(addr)); \ - } else { \ - __asm__ volatile("wdtlb %0, %1" \ - :: "r"(attr), "r"(addr)); \ - __asm__ volatile("j 1f; .align 8; 1:"); \ - __asm__ volatile("witlb %0, %1; isync" \ - :: "r"(attr), "r"(addr)); \ - } \ - addr += addrincr; \ -} while (0) - -#define ARCH_XTENSA_SET_RPO_TLB() do { \ - register uint32_t addr = 0, addrincr = 0x20000000; \ - FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \ -} while (0) - -#endif - #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/soc/xtensa/intel_adsp/common/boot_complete.c b/soc/xtensa/intel_adsp/common/boot_complete.c index 241e51951c6..3c062c8c3f3 100644 --- a/soc/xtensa/intel_adsp/common/boot_complete.c +++ b/soc/xtensa/intel_adsp/common/boot_complete.c @@ -2,7 +2,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include +#include #include #include #include diff --git a/soc/xtensa/intel_adsp/common/include/cpu_init.h b/soc/xtensa/intel_adsp/common/include/cpu_init.h index 8560972c6ad..277f804311c 100644 --- a/soc/xtensa/intel_adsp/common/include/cpu_init.h +++ b/soc/xtensa/intel_adsp/common/include/cpu_init.h @@ -5,7 +5,7 @@ #define __INTEL_ADSP_CPU_INIT_H #include -#include +#include #include #include #include