feat(newlib): riscv: add CONFIG_LIBC_OPTIMIZED_MISALIGNED_ACCESS config option
This option replaces implementations of functions from ROM: - memcpy - memcmp - memmove - str[n]cpy - str[n]cmp The functions used in the firmware will be better optimized for misaligned memory. Here are some measurements in CPU cycles for 4096-byte buffers: memcpy: 28676 -> 4128 memcmp: 49147 -> 14259 memmove: 33896 -> 8086 strcpy: 32771 -> 17313 strcmp: 32775 -> 13191
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <_ansi.h>
|
||||
#include <limits.h>
|
||||
/*
|
||||
Taken from glibc:
|
||||
Add the compiler optimization to inhibit loop transformation to library
|
||||
calls. This is used to avoid recursive calls in memset and memmove
|
||||
default implementations.
|
||||
*/
|
||||
# define __inhibit_loop_to_libcall \
|
||||
__attribute__ ((__optimize__ ("-fno-tree-loop-distribute-patterns")))
|
||||
|
||||
/* Nonzero if X is not aligned on a "long" boundary.
|
||||
* This macro is used to skip a few bytes to find an aligned pointer.
|
||||
* It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
|
||||
* to avoid small performance penalties (if they are not zero). */
|
||||
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
|
||||
|
||||
#define _HAVE_HW_MISALIGNED_ACCESS (__riscv_misaligned_fast || __riscv_misaligned_slow)
|
||||
|
||||
#if _HAVE_HW_MISALIGNED_ACCESS
|
||||
/* Hardware performs unaligned operations with little
|
||||
* to no penalty compared to byte-to-byte copy. */
|
||||
#define UNALIGNED_X_Y(X, Y) (0)
|
||||
#else /* _HAVE_HW_MISALIGNED_ACCESS */
|
||||
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
|
||||
#define UNALIGNED_X_Y(X, Y) \
|
||||
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
|
||||
#endif /* _HAVE_HW_MISALIGNED_ACCESS */
|
||||
|
||||
/* How many bytes are copied each iteration of the word copy loop. */
|
||||
#define LITTLE_BLOCK_SIZE (sizeof (long))
|
||||
|
||||
/* How many bytes are copied each iteration of the 4X unrolled loop. */
|
||||
#define BIG_BLOCK_SIZE (sizeof (long) << 2)
|
||||
|
||||
/* Threshold for punting to the little block byte copier. */
|
||||
#define TOO_SMALL_LITTLE_BLOCK(LEN) ((LEN) < LITTLE_BLOCK_SIZE)
|
||||
|
||||
/* Threshold for punting to the big block byte copier. */
|
||||
#define TOO_SMALL_BIG_BLOCK(LEN) ((LEN) < BIG_BLOCK_SIZE)
|
||||
|
||||
/* Macros for detecting endchar. */
|
||||
#if LONG_MAX == 2147483647L
|
||||
#define DETECT_NULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
|
||||
#else
|
||||
#if LONG_MAX == 9223372036854775807L
|
||||
/* Nonzero if X (a long int) contains a NULL byte. */
|
||||
#define DETECT_NULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
|
||||
#else
|
||||
#error long int is not a 32bit or 64bit type.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Returns nonzero if (long)X contains the byte used to fill (long)MASK. */
|
||||
#define DETECT_CHAR(X, MASK) (DETECT_NULL(X ^ MASK))
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
int
|
||||
memcmp(const void *m1,
|
||||
const void *m2,
|
||||
size_t n)
|
||||
{
|
||||
unsigned char *s1 = (unsigned char *) m1;
|
||||
unsigned char *s2 = (unsigned char *) m2;
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
/* If the size is too small, or either pointer is unaligned,
|
||||
then we punt to the byte compare loop. Hopefully this will
|
||||
not turn up in inner loops. */
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(n) && !UNALIGNED_X_Y(s1, s2)) {
|
||||
/* Otherwise, load and compare the blocks of memory one
|
||||
word at a time. */
|
||||
a1 = (unsigned long*) s1;
|
||||
a2 = (unsigned long*) s2;
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(n)) {
|
||||
if (*a1 != *a2) {
|
||||
break;
|
||||
}
|
||||
a1++;
|
||||
a2++;
|
||||
n -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* check m mod LITTLE_BLOCK_SIZE remaining characters */
|
||||
|
||||
s1 = (unsigned char*)a1;
|
||||
s2 = (unsigned char*)a2;
|
||||
}
|
||||
|
||||
while (n--) {
|
||||
if (*s1 != *s2) {
|
||||
return *s1 - *s2;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_memcmp_impl(void)
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <_ansi.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
void *
|
||||
__inhibit_loop_to_libcall
|
||||
memmove(void *dst_void,
|
||||
const void *src_void,
|
||||
size_t length)
|
||||
{
|
||||
char *dst = dst_void;
|
||||
const char *src = src_void;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
if (src < dst && dst < src + length) {
|
||||
/* Destructive overlap...have to copy backwards */
|
||||
src += length;
|
||||
dst += length;
|
||||
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
|
||||
*--aligned_dst = *--aligned_src;
|
||||
length -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (length--) {
|
||||
*--dst = *--src;
|
||||
}
|
||||
} else {
|
||||
/* Use optimizing algorithm for a non-destructive copy to closely
|
||||
match memcpy. If the size is small or either SRC or DST is unaligned,
|
||||
then punt into the byte copy loop. This should be rare. */
|
||||
if (!TOO_SMALL_LITTLE_BLOCK(length) && !UNALIGNED_X_Y(src, dst)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* Copy 4X long words at a time if possible. */
|
||||
while (!TOO_SMALL_BIG_BLOCK(length)) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= BIG_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Copy one long word at a time if possible. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(length)) {
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
length -= LITTLE_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Pick up any residual with a byte copier. */
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (length--) {
|
||||
*dst++ = *src++;
|
||||
}
|
||||
}
|
||||
|
||||
return dst_void;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_memmove_impl(void)
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
int
|
||||
strncmp(const char *s1,
|
||||
const char *s2,
|
||||
size_t n)
|
||||
{
|
||||
unsigned long *a1;
|
||||
unsigned long *a2;
|
||||
|
||||
if (n == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If s1 or s2 are unaligned, then compare bytes. */
|
||||
if (!UNALIGNED_X_Y(s1, s2)) {
|
||||
/* If s1 and s2 are word-aligned, compare them a word at a time. */
|
||||
a1 = (unsigned long*)s1;
|
||||
a2 = (unsigned long*)s2;
|
||||
while (n >= sizeof(long) && *a1 == *a2) {
|
||||
n -= sizeof(long);
|
||||
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *a1 == *a2. */
|
||||
if (n == 0 || DETECT_NULL(*a1)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
a1++;
|
||||
a2++;
|
||||
}
|
||||
|
||||
/* A difference was detected in last few bytes of s1, so search bytewise */
|
||||
s1 = (char*)a1;
|
||||
s2 = (char*)a2;
|
||||
}
|
||||
|
||||
while (n-- > 0 && *s1 == *s2) {
|
||||
/* If we've run out of bytes or hit a null, return zero
|
||||
since we already know *s1 == *s2. */
|
||||
if (n == 0 || *s1 == '\0') {
|
||||
return 0;
|
||||
}
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
return (*(unsigned char *) s1) - (*(unsigned char *) s2);
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_strncmp_impl(void)
|
||||
{
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: 1994-2009 Red Hat, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND Apache-2.0
|
||||
*
|
||||
* SPDX-FileContributor: 2025 Espressif Systems (Shanghai) CO LTD
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include "local.h"
|
||||
|
||||
__attribute__((optimize("-Os")))
|
||||
char *
|
||||
strncpy(char *__restrict dst0,
|
||||
const char *__restrict src0,
|
||||
size_t count)
|
||||
{
|
||||
char *dst = dst0;
|
||||
const char *src = src0;
|
||||
long *aligned_dst;
|
||||
const long *aligned_src;
|
||||
|
||||
/* If SRC and DEST is aligned and count large enough, then copy words. */
|
||||
if (!UNALIGNED_X_Y(src, dst) && !TOO_SMALL_LITTLE_BLOCK(count)) {
|
||||
aligned_dst = (long*)dst;
|
||||
aligned_src = (long*)src;
|
||||
|
||||
/* SRC and DEST are both "long int" aligned, try to do "long int"
|
||||
sized copies. */
|
||||
while (!TOO_SMALL_LITTLE_BLOCK(count) && !DETECT_NULL(*aligned_src)) {
|
||||
count -= sizeof(long int);
|
||||
*aligned_dst++ = *aligned_src++;
|
||||
}
|
||||
|
||||
dst = (char*)aligned_dst;
|
||||
src = (char*)aligned_src;
|
||||
}
|
||||
|
||||
while (count > 0) {
|
||||
--count;
|
||||
if ((*dst++ = *src++) == '\0') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
while (count-- > 0) {
|
||||
*dst++ = '\0';
|
||||
}
|
||||
|
||||
return dst0;
|
||||
}
|
||||
|
||||
// Hook to force the linker to include this file
|
||||
void esp_libc_include_strncpy_impl(void)
|
||||
{
|
||||
}
|
||||
Reference in New Issue
Block a user