Line data Source code
1 1 : /*
2 : * Copyright The Zephyr Project Contributors
3 : *
4 : * SPDX-License-Identifier: Apache-2.0
5 : */
6 :
7 : /**
8 : * @file
9 : * @brief UTF-8 utilities
10 : *
11 : * Misc UTF-8 utilities.
12 : */
13 :
14 : #ifndef ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_
15 : #define ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_
16 :
17 : #include <stddef.h>
18 :
19 : #ifdef __cplusplus
20 : extern "C" {
21 : #endif
22 :
23 : /**
24 : * @addtogroup sys-util
25 : * @{
26 : */
27 :
28 : /**
29 : * @brief Properly truncate a NULL-terminated UTF-8 string
30 : *
31 : * Take a NULL-terminated UTF-8 string and ensure that if the string has been
32 : * truncated (by setting the NULL terminator) earlier by other means, that
33 : * the string ends with a properly formatted UTF-8 character (1-4 bytes).
34 : *
35 : * Example:
36 : *
37 : * @code{.c}
38 : * char test_str[] = "€€€";
39 : * char trunc_utf8[8];
40 : *
41 : * printf("Original : %s\n", test_str); // €€€
42 : * strncpy(trunc_utf8, test_str, sizeof(trunc_utf8));
43 : * trunc_utf8[sizeof(trunc_utf8) - 1] = '\0';
44 : * printf("Bad : %s\n", trunc_utf8); // €€�
45 : * utf8_trunc(trunc_utf8);
46 : * printf("Truncated: %s\n", trunc_utf8); // €€
47 : * @endcode
48 : *
49 : * @param utf8_str NULL-terminated string
50 : *
51 : * @return Pointer to the @p utf8_str
52 : */
53 1 : char *utf8_trunc(char *utf8_str);
54 :
55 : /**
56 : * @brief Copies a UTF-8 encoded string from @p src to @p dst
57 : *
58 : * The resulting @p dst will always be NULL terminated if @p n is larger than 0,
59 : * and the @p dst string will always be properly UTF-8 truncated.
60 : *
61 : * @param dst The destination of the UTF-8 string.
62 : * @param src The source string
63 : * @param n The size of the @p dst buffer. Maximum number of characters copied
64 : * is @p n - 1. If 0 nothing will be done, and the @p dst will not be
65 : * NULL terminated.
66 : *
67 : * @return Pointer to the @p dst
68 : */
69 1 : char *utf8_lcpy(char *dst, const char *src, size_t n);
70 :
71 : /**
72 : * @brief Counts the characters in a UTF-8 encoded string @p s
73 : *
74 : * Counts the number of UTF-8 characters (code points) in a null-terminated string.
75 : * This function steps through each UTF-8 sequence by checking leading byte patterns.
76 : * It does not fully validate UTF-8 correctness, only counts characters.
77 : *
78 : * @param s The input string
79 : *
80 : * @return Number of UTF-8 characters in @p s on success or (negative) error code
81 : * otherwise.
82 : */
83 1 : int utf8_count_chars(const char *s);
84 :
85 : #ifdef __cplusplus
86 : }
87 : #endif
88 :
89 : /**
90 : * @}
91 : */
92 :
93 : #endif /* ZEPHYR_INCLUDE_SYS_UTIL_UFT8_H_ */
|