lib: os: add hash function specification and implementation
Hash functions are necessary for Hashmaps (a.k.a. Hash Tables). * Create a flexible hash function interface * Implementation 1. Murmur3 * Implementation 2. djb2 * Kconfig option for system-wide 32-bit hash Signed-off-by: Chris Friedt <cfriedt@meta.com>
This commit is contained in:
parent
edb5ee1575
commit
b75df60f0b
136
include/zephyr/sys/hash_function.h
Normal file
136
include/zephyr/sys/hash_function.h
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Meta
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#ifndef ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_
|
||||
#define ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <zephyr/sys/__assert.h>
|
||||
#include <zephyr/sys/util_macro.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief 32-bit Hash function interface
|
||||
*
|
||||
* Hash functions are used to map data from an arbitrarily large space to a
|
||||
* (typically smaller) fixed-size space. For a given input, a hash function will
|
||||
* consistently generate the same, semi-unique numerical value. Even for
|
||||
* marginally different data, a good hash function will distribute the entropy
|
||||
* almost evenly over all bits in the hashed value when combined with modulo
|
||||
* arithmetic over a finite-sized numeric field.
|
||||
*
|
||||
* @param str a string of input data
|
||||
* @param n the number of bytes in @p str
|
||||
*
|
||||
* @return the numeric hash associated with @p str
|
||||
*/
|
||||
typedef uint32_t (*sys_hash_func32_t)(const void *str, size_t n);
|
||||
|
||||
/**
|
||||
* @brief The naive identity hash function
|
||||
*
|
||||
* This hash function requires that @p n is equal to the size of a primitive
|
||||
* type, such as `[u]int8_t`, `[u]int16_t`, `[u]int32_t`, `[u]int64_t`,
|
||||
* `float`, `double`, or `void *`, and that the alignment of @p str agrees
|
||||
* with that of the respective native type.
|
||||
*
|
||||
* @note The identity hash function is used for testing @ref sys_hashmap.
|
||||
*
|
||||
* @param str a string of input data
|
||||
* @param n the number of bytes in @p str
|
||||
*
|
||||
* @return the numeric hash associated with @p str
|
||||
*/
|
||||
static inline uint32_t sys_hash32_identity(const void *str, size_t n)
|
||||
{
|
||||
switch (n) {
|
||||
case sizeof(uint8_t):
|
||||
return *(uint8_t *)str;
|
||||
case sizeof(uint16_t):
|
||||
return *(uint16_t *)str;
|
||||
case sizeof(uint32_t):
|
||||
return *(uint32_t *)str;
|
||||
case sizeof(uint64_t):
|
||||
return (uint32_t)(*(uint64_t *)str);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
__ASSERT(false, "invalid str length %zu", n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Daniel J. Bernstein's hash function
|
||||
*
|
||||
* Some notes:
|
||||
* - normally, this hash function is used on NUL-terminated strings
|
||||
* - it has been modified to support arbitrary sequences of bytes
|
||||
* - it has been modified to use XOR rather than addition
|
||||
*
|
||||
* @param str a string of input data
|
||||
* @param n the number of bytes in @p str
|
||||
*
|
||||
* @return the numeric hash associated with @p str
|
||||
*
|
||||
* @note enable with @kconfig{CONFIG_SYS_HASH_FUNC32_DJB2}
|
||||
*
|
||||
* @see https://theartincode.stanis.me/008-djb2/
|
||||
*/
|
||||
uint32_t sys_hash32_djb2(const void *str, size_t n);
|
||||
|
||||
/**
|
||||
* @brief Murmur3 hash function
|
||||
*
|
||||
* @param str a string of input data
|
||||
* @param n the number of bytes in @p str
|
||||
*
|
||||
* @return the numeric hash associated with @p str
|
||||
*
|
||||
* @note enable with @kconfig{CONFIG_SYS_HASH_FUNC32_MURMUR3}
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/MurmurHash
|
||||
*/
|
||||
uint32_t sys_hash32_murmur3(const void *str, size_t n);
|
||||
|
||||
/**
|
||||
* @brief System default 32-bit hash function
|
||||
*
|
||||
* @param str a string of input data
|
||||
* @param n the number of bytes in @p str
|
||||
*
|
||||
* @return the numeric hash associated with @p str
|
||||
*/
|
||||
static inline uint32_t sys_hash32(const void *str, size_t n)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_IDENTITY)) {
|
||||
return sys_hash32_identity(str, n);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_DJB2)) {
|
||||
return sys_hash32_djb2(str, n);
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_MURMUR3)) {
|
||||
return sys_hash32_murmur3(str, n);
|
||||
}
|
||||
|
||||
__ASSERT(0, "No default 32-bit hash. See CONFIG_SYS_HASH_FUNC32_CHOICE");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_ */
|
||||
@ -64,6 +64,9 @@ zephyr_sources_ifdef(CONFIG_SYS_MEM_BLOCKS mem_blocks.c)
|
||||
|
||||
zephyr_sources_ifdef(CONFIG_WINSTREAM winstream.c)
|
||||
|
||||
zephyr_sources_ifdef(CONFIG_SYS_HASH_FUNC32_DJB2 hash_func32_djb2.c)
|
||||
zephyr_sources_ifdef(CONFIG_SYS_HASH_FUNC32_MURMUR3 hash_func32_murmur3.c)
|
||||
|
||||
zephyr_library_include_directories(
|
||||
${ZEPHYR_BASE}/kernel/include
|
||||
${ZEPHYR_BASE}/arch/${ARCH}/include
|
||||
|
||||
@ -176,4 +176,6 @@ rsource "Kconfig.cbprintf"
|
||||
|
||||
rsource "Kconfig.heap"
|
||||
|
||||
rsource "Kconfig.hash_func"
|
||||
|
||||
endmenu
|
||||
|
||||
46
lib/os/Kconfig.hash_func
Normal file
46
lib/os/Kconfig.hash_func
Normal file
@ -0,0 +1,46 @@
|
||||
# Copyright (c) 2022 Meta
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
menu "Hash Function Support"
|
||||
|
||||
config SYS_HASH_FUNC32
|
||||
bool "Hash function support"
|
||||
help
|
||||
Enable this option to support hash functions.
|
||||
|
||||
if SYS_HASH_FUNC32
|
||||
|
||||
config SYS_HASH_FUNC32_DJB2
|
||||
bool "Daniel J. Bernstein's hash function (djb2)"
|
||||
|
||||
config SYS_HASH_FUNC32_MURMUR3
|
||||
bool "Murmur3 hash function"
|
||||
|
||||
choice SYS_HASH_FUNC32_CHOICE
|
||||
prompt "Default system-wide 32-bit hash function"
|
||||
default SYS_HASH_FUNC32_CHOICE_MURMUR3
|
||||
help
|
||||
The default system-wide 32-bit hash function is sys_hash32().
|
||||
|
||||
config SYS_HASH_FUNC32_CHOICE_DJB2
|
||||
bool "Default 32-bit hash is djb2"
|
||||
select SYS_HASH_FUNC32_DJB2
|
||||
|
||||
config SYS_HASH_FUNC32_CHOICE_MURMUR3
|
||||
bool "Default 32-bit hash is Murmur3"
|
||||
select SYS_HASH_FUNC32_MURMUR3
|
||||
|
||||
config SYS_HASH_FUNC32_CHOICE_IDENTITY
|
||||
bool "Default 32-bit hash is the identity"
|
||||
help
|
||||
This is the naive identity hash function. It only works for strings
|
||||
either 1, 2, 4, or 8 bytes in length and so is suitable for scalar
|
||||
values such as keys in a Hashmap. It is implemented as a static
|
||||
inline function.
|
||||
|
||||
endchoice # SYS_HASH_FUNC_CHOICE
|
||||
|
||||
endif # SYS_HASH_FUNC
|
||||
|
||||
endmenu
|
||||
48
lib/os/hash_func32_djb2.c
Normal file
48
lib/os/hash_func32_djb2.c
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 1991, Daniel J. Bernstein
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/*
|
||||
* This software is in the PD as of approximately 2007. The story
|
||||
* behind it (and other DJB software) is quite amazing. Thanks Dan!!
|
||||
*
|
||||
* Many distributors have relicensed it under e.g. BSD, MIT, and others.
|
||||
*
|
||||
* It is not clear what the original license name is or how to declare it
|
||||
* using SPDX terms, since it explicitly has no license. I think it makes
|
||||
* sense to use the default Zephyr licensing.
|
||||
*
|
||||
* Note: this is not a cryptographically strong hash algorithm.
|
||||
*
|
||||
* For details, please see
|
||||
* https://cr.yp.to/rights.html
|
||||
* https://cr.yp.to/distributors.html
|
||||
* http://thedjbway.b0llix.net/license_free.html
|
||||
*
|
||||
* https://groups.google.com/g/comp.lang.c/c/lSKWXiuNOAk
|
||||
* https://theartincode.stanis.me/008-djb2/
|
||||
* http://www.cse.yorku.ca/~oz/hash.html
|
||||
* https://bit.ly/3IxUVvC
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <zephyr/sys/hash_function.h>
|
||||
|
||||
uint32_t sys_hash32_djb2(const void *str, size_t n)
|
||||
{
|
||||
uint32_t hash;
|
||||
const uint8_t *d;
|
||||
|
||||
/* The number 5381 is the initializer for the djb2 hash */
|
||||
for (hash = 5381, d = str; n > 0; --n, ++d) {
|
||||
/* The djb2 hash multiplier is 33 (i.e. 2^5 + 1) */
|
||||
hash = (hash << 5) + hash;
|
||||
hash ^= *d;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
48
lib/os/hash_func32_murmur3.c
Normal file
48
lib/os/hash_func32_murmur3.c
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Meta
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uint32_t murmur_32_scramble(uint32_t k)
|
||||
{
|
||||
k *= 0xcc9e2d51;
|
||||
k = (k << 15) | (k >> 17);
|
||||
k *= 0x1b873593;
|
||||
|
||||
return k;
|
||||
}
|
||||
|
||||
uint32_t sys_hash32_murmur3(const char *str, size_t n)
|
||||
{
|
||||
uint32_t k;
|
||||
/* seed of 0 */
|
||||
uint32_t h = 0;
|
||||
const size_t len = n;
|
||||
|
||||
for (; n >= sizeof(uint32_t); n -= sizeof(uint32_t), str += sizeof(uint32_t)) {
|
||||
k = *(const uint32_t *)str;
|
||||
h ^= murmur_32_scramble(k);
|
||||
h = (h << 13) | (h >> 19);
|
||||
h = h * 5 + 0xe6546b64;
|
||||
}
|
||||
|
||||
for (k = 0; n != 0; --n, ++str) {
|
||||
k <<= 8;
|
||||
k |= *str;
|
||||
}
|
||||
|
||||
h ^= murmur_32_scramble(k);
|
||||
|
||||
h ^= len;
|
||||
h ^= h >> 16;
|
||||
h *= 0x85ebca6b;
|
||||
h ^= h >> 13;
|
||||
h *= 0xc2b2ae35;
|
||||
h ^= h >> 16;
|
||||
|
||||
return h;
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user