/* * Copyright 2021 Intel Corporation * SPDX-License-Identifier: Apache-2.0 */ #ifndef ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ #define ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ #include #include #include #ifdef __cplusplus extern "C" { #endif #define Z_DCACHE_MAX (XCHAL_DCACHE_SIZE / XCHAL_DCACHE_WAYS) #define Z_IS_POW2(x) (((x) != 0) && (((x) & ((x)-1)) == 0)) #if XCHAL_DCACHE_SIZE BUILD_ASSERT(Z_IS_POW2(XCHAL_DCACHE_LINESIZE)); BUILD_ASSERT(Z_IS_POW2(Z_DCACHE_MAX)); #endif static ALWAYS_INLINE void z_xtensa_cache_flush(void *addr, size_t bytes) { #if XCHAL_DCACHE_SIZE size_t step = XCHAL_DCACHE_LINESIZE; size_t first = ROUND_DOWN(addr, step); size_t last = ROUND_UP(((long)addr) + bytes, step); size_t line; for (line = first; bytes && line < last; line += step) { __asm__ volatile("dhwb %0, 0" :: "r"(line)); } #endif } static ALWAYS_INLINE void z_xtensa_cache_flush_inv(void *addr, size_t bytes) { #if XCHAL_DCACHE_SIZE size_t step = XCHAL_DCACHE_LINESIZE; size_t first = ROUND_DOWN(addr, step); size_t last = ROUND_UP(((long)addr) + bytes, step); size_t line; for (line = first; bytes && line < last; line += step) { __asm__ volatile("dhwbi %0, 0" :: "r"(line)); } #endif } static ALWAYS_INLINE void z_xtensa_cache_inv(void *addr, size_t bytes) { #if XCHAL_DCACHE_SIZE size_t step = XCHAL_DCACHE_LINESIZE; size_t first = ROUND_DOWN(addr, step); size_t last = ROUND_UP(((long)addr) + bytes, step); size_t line; for (line = first; bytes && line < last; line += step) { __asm__ volatile("dhi %0, 0" :: "r"(line)); } #endif } static ALWAYS_INLINE void z_xtensa_cache_inv_all(void) { z_xtensa_cache_inv(NULL, Z_DCACHE_MAX); } static ALWAYS_INLINE void z_xtensa_cache_flush_all(void) { z_xtensa_cache_flush(NULL, Z_DCACHE_MAX); } static ALWAYS_INLINE void z_xtensa_cache_flush_inv_all(void) { z_xtensa_cache_flush_inv(NULL, Z_DCACHE_MAX); } #ifdef CONFIG_ARCH_HAS_COHERENCE static inline bool arch_mem_coherent(void *ptr) { size_t addr = (size_t) ptr; return (addr >> 29) == CONFIG_XTENSA_UNCACHED_REGION; } #endif static ALWAYS_INLINE uint32_t z_xtrpoflip(uint32_t addr, uint32_t rto, uint32_t rfrom) { /* The math here is all compile-time: when the two regions * differ by a power of two, we can convert between them by * setting or clearing just one bit. Otherwise it needs two * operations. */ uint32_t rxor = (rto ^ rfrom) << 29; rto <<= 29; if (Z_IS_POW2(rxor)) { if ((rxor & rto) == 0) { return addr & ~rxor; } else { return addr | rxor; } } else { return (addr & ~(7U << 29)) | rto; } } /** * @brief Return cached pointer to a RAM address * * The Xtensa coherence architecture maps addressable RAM twice, in * two different 512MB regions whose L1 cache settings can be * controlled independently. So for any given pointer, it is possible * to convert it to and from a cached version. * * This function takes a pointer to any addressible object (either in * cacheable memory or not) and returns a pointer that can be used to * refer to the same memory through the L1 data cache. Data read * through the resulting pointer will reflect locally cached values on * the current CPU if they exist, and writes will go first into the * cache and be written back later. * * @see arch_xtensa_uncached_ptr() * * @param ptr A pointer to a valid C object * @return A pointer to the same object via the L1 dcache */ static inline void *arch_xtensa_cached_ptr(void *ptr) { return (void *)z_xtrpoflip((uint32_t) ptr, CONFIG_XTENSA_CACHED_REGION, CONFIG_XTENSA_UNCACHED_REGION); } /** * @brief Return uncached pointer to a RAM address * * The Xtensa coherence architecture maps addressable RAM twice, in * two different 512MB regions whose L1 cache settings can be * controlled independently. So for any given pointer, it is possible * to convert it to and from a cached version. * * This function takes a pointer to any addressible object (either in * cacheable memory or not) and returns a pointer that can be used to * refer to the same memory while bypassing the L1 data cache. Data * in the L1 cache will not be inspected nor modified by the access. * * @see arch_xtensa_cached_ptr() * * @param ptr A pointer to a valid C object * @return A pointer to the same object bypassing the L1 dcache */ static inline void *arch_xtensa_uncached_ptr(void *ptr) { return (void *)z_xtrpoflip((uint32_t) ptr, CONFIG_XTENSA_UNCACHED_REGION, CONFIG_XTENSA_CACHED_REGION); } /* Utility to generate an unrolled and optimal[1] code sequence to set * the RPO TLB registers (contra the HAL cacheattr macros, which * generate larger code and can't be called from C), based on the * KERNEL_COHERENCE configuration in use. Selects RPO attribute "2" * for regions (including MMIO registers in region zero) which want to * bypass L1, "4" for the cached region which wants writeback, and * "15" (invalid) elsewhere. * * Note that on cores that have the "translation" option set, we need * to put an identity mapping in the high bits. Also per spec * changing the current code region (by definition cached) requires * that WITLB be followed by an ISYNC and that both instructions live * in the same cache line (two 3-byte instructions fit in an 8-byte * aligned region, so that's guaranteed not to cross a cache line * boundary). * * [1] With the sole exception of gcc's infuriating insistence on * emitting a precomputed literal for addr + addrincr instead of * computing it with a single ADD instruction from values it already * has in registers. Explicitly assigning the variables to registers * via an attribute works, but then emits needless MOV instructions * instead. I tell myself it's just 32 bytes of .text, but... Sigh. */ #define _REGION_ATTR(r) \ ((r) == 0 ? 2 : \ ((r) == CONFIG_XTENSA_CACHED_REGION ? 4 : \ ((r) == CONFIG_XTENSA_UNCACHED_REGION ? 2 : 15))) #define _SET_ONE_TLB(region) do { \ uint32_t attr = _REGION_ATTR(region); \ if (XCHAL_HAVE_XLT_CACHEATTR) { \ attr |= addr; /* RPO with translation */ \ } \ if (region != CONFIG_XTENSA_CACHED_REGION) { \ __asm__ volatile("wdtlb %0, %1; witlb %0, %1" \ :: "r"(attr), "r"(addr)); \ } else { \ __asm__ volatile("wdtlb %0, %1" \ :: "r"(attr), "r"(addr)); \ __asm__ volatile("j 1f; .align 8; 1:"); \ __asm__ volatile("witlb %0, %1; isync" \ :: "r"(attr), "r"(addr)); \ } \ addr += addrincr; \ } while (0) #define ARCH_XTENSA_SET_RPO_TLB() do { \ register uint32_t addr = 0, addrincr = 0x20000000; \ FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \ } while (0) #ifdef __cplusplus } /* extern "C" */ #endif #endif /* ZEPHYR_INCLUDE_ARCH_XTENSA_CACHE_H_ */