The mempool allocator implementation recursively breaks a memory block into 4 sub-blocks until it minimally fits the requested memory size. The size of each sub-blocks is rounded up to the next word boundary to preserve word alignment on the returned memory, and this is a problem. Let's consider max_sz = 2072 and n_max = 1. That's our level 0. At level 1, we get one level-0 block split in 4 sub-blocks whose size is WB_UP(2072 / 4) = 520. However 4 * 520 = 2080 so we must discard the 4th sub-block since it doesn't fit inside our 2072-byte parent block. We're down to 3 * 520 = 1560 bytes of usable memory. Our memory usage efficiency is now 1560 / 2072 = 75%. At level 2, we get 3 level-1 blocks, and each of them may be split in 4 sub-blocks whose size is WB_UP(520 / 4) = 132. But 4 * 132 = 528 so the 4th sub-block has to be discarded again. We're down to 9 * 132 = 1188 bytes of usable memory. Our memory usage efficiency is now 1188 / 2072 = 57%. At level 3, we get 9 level-2 blocks, each split into WB_UP(132 / 4) = 36 bytes. Again 4 * 36 = 144 so the 4th sub-block is discarded. We're down to 27 * 36 = 972 bytes of usable memory. Our memory usage efficiency is now 972 / 2072 = 47%. What should be done instead, is to round _down_ sub-block sizes not _up_. This way, sub-blocks still align to word boundaries, and they always fit within their parent block as the total size may no longer exceed the initial size. Using the same max_sz = 2072 would yield a memory usage efficiency of 99% at level 3, so let's demo a worst case 2044 instead. Level 1: 4 sub-blocks of WB_DN(2044 / 4) = 508 bytes. We're down to 4 * 508 = 2032 bytes of usable memory. Our memory usage efficiency is now 2032 / 2044 = 99%. Level 2: 4 * 4 sub-blocks of WB_DN(508 / 4) = 124 bytes. We're down to 16 * 124 = 1984 bytes of usable memory. Our memory usage efficiency is now 1984 / 2044 = 97%. Level 3: 16 * 4 sub-blocks of WB_DN(124 / 4) = 28 bytes. We're down to 64 * 28 = 1792 bytes of usable memory. Our memory usage efficiency is now 1792 / 2044 = 88%. Conclusion: if max_sz is a power of 2 then we get 100% efficiency at all levens in both cases. But if not, then the rounding-up method has a far worse degradation curve than the rounding-down method, wasting more than 50% of memory in some cases. So let's round sub-block sizes down rather than up, and remove block_fits() which purpose was to identify sub-blocks that didn't fit within their parent block and is now useless. Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
143 lines
2.7 KiB
C
143 lines
2.7 KiB
C
/*
|
|
* Copyright (c) 2018 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <zephyr.h>
|
|
#include <init.h>
|
|
#include <errno.h>
|
|
#include <sys/math_extras.h>
|
|
#include <sys/mempool.h>
|
|
#include <string.h>
|
|
#include <app_memory/app_memdomain.h>
|
|
|
|
#define LOG_LEVEL CONFIG_KERNEL_LOG_LEVEL
|
|
#include <logging/log.h>
|
|
LOG_MODULE_DECLARE(os);
|
|
|
|
#if (CONFIG_MINIMAL_LIBC_MALLOC_ARENA_SIZE > 0)
|
|
#ifdef CONFIG_USERSPACE
|
|
K_APPMEM_PARTITION_DEFINE(z_malloc_partition);
|
|
#define POOL_SECTION K_APP_DMEM_SECTION(z_malloc_partition)
|
|
#else
|
|
#define POOL_SECTION .data
|
|
#endif /* CONFIG_USERSPACE */
|
|
|
|
SYS_MEM_POOL_DEFINE(z_malloc_mem_pool, NULL, 16,
|
|
CONFIG_MINIMAL_LIBC_MALLOC_ARENA_SIZE, 1, 4, POOL_SECTION);
|
|
|
|
void *malloc(size_t size)
|
|
{
|
|
void *ret;
|
|
|
|
ret = sys_mem_pool_alloc(&z_malloc_mem_pool, size);
|
|
if (ret == NULL) {
|
|
errno = ENOMEM;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int malloc_prepare(struct device *unused)
|
|
{
|
|
ARG_UNUSED(unused);
|
|
|
|
sys_mem_pool_init(&z_malloc_mem_pool);
|
|
|
|
return 0;
|
|
}
|
|
|
|
SYS_INIT(malloc_prepare, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);
|
|
#else /* No malloc arena */
|
|
void *malloc(size_t size)
|
|
{
|
|
ARG_UNUSED(size);
|
|
|
|
LOG_DBG("CONFIG_MINIMAL_LIBC_MALLOC_ARENA_SIZE is 0");
|
|
errno = ENOMEM;
|
|
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
void free(void *ptr)
|
|
{
|
|
sys_mem_pool_free(ptr);
|
|
}
|
|
|
|
void *calloc(size_t nmemb, size_t size)
|
|
{
|
|
void *ret;
|
|
|
|
if (size_mul_overflow(nmemb, size, &size)) {
|
|
errno = ENOMEM;
|
|
return NULL;
|
|
}
|
|
|
|
ret = malloc(size);
|
|
|
|
if (ret != NULL) {
|
|
(void)memset(ret, 0, size);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void *realloc(void *ptr, size_t requested_size)
|
|
{
|
|
struct sys_mem_pool_block *blk;
|
|
size_t struct_blk_size = WB_UP(sizeof(struct sys_mem_pool_block));
|
|
size_t block_size, total_requested_size;
|
|
void *new_ptr;
|
|
|
|
if (ptr == NULL) {
|
|
return malloc(requested_size);
|
|
}
|
|
|
|
if (requested_size == 0) {
|
|
free(ptr);
|
|
return NULL;
|
|
}
|
|
|
|
/* Stored right before the pointer passed to the user */
|
|
blk = (struct sys_mem_pool_block *)((char *)ptr - struct_blk_size);
|
|
|
|
/* Determine size of previously allocated block by its level.
|
|
* Most likely a bit larger than the original allocation
|
|
*/
|
|
block_size = blk->pool->base.max_sz;
|
|
for (int i = 1; i <= blk->level; i++) {
|
|
block_size = WB_DN(block_size / 4);
|
|
}
|
|
|
|
/* We really need this much memory */
|
|
total_requested_size = requested_size + struct_blk_size;
|
|
|
|
if (block_size >= total_requested_size) {
|
|
/* Existing block large enough, nothing to do */
|
|
return ptr;
|
|
}
|
|
|
|
new_ptr = malloc(requested_size);
|
|
if (new_ptr == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
memcpy(new_ptr, ptr, block_size - struct_blk_size);
|
|
free(ptr);
|
|
|
|
return new_ptr;
|
|
}
|
|
|
|
|
|
void *reallocarray(void *ptr, size_t nmemb, size_t size)
|
|
{
|
|
if (size_mul_overflow(nmemb, size, &size)) {
|
|
errno = ENOMEM;
|
|
return NULL;
|
|
}
|
|
return realloc(ptr, size);
|
|
}
|