Commit daa4604c authored by Thomas Haller's avatar Thomas Haller

shared: add nm_utils_strsplit_set() helper

A replacement for g_strsplit_set(). While g_strsplit_set()
does (n+1) malloc and n slice allocations, this needs
roughtly (O(log(n))) mallocs.

Another difference from g_strsplit_set() is that this function
treats multiple delimiters as one (and thus never returns empty
words). While I can see that sometimes you may want to keep empty
words (like parsing a CSV file and preserve empty cells), we usually
use this function for splitting user input. In such case, we want
to treat multiple delimiters as one.
parent f6a72768
......@@ -98,6 +98,67 @@ test_nm_g_slice_free_fcn (void)
/*****************************************************************************/
static void
_do_test_nm_utils_strsplit_set (const char *str, ...)
{
gs_unref_ptrarray GPtrArray *args_array = g_ptr_array_new ();
const char *const*args;
gs_free const char **words = NULL;
const char *arg;
gsize i;
va_list ap;
va_start (ap, str);
while ((arg = va_arg (ap, const char *)))
g_ptr_array_add (args_array, (gpointer) arg);
va_end (ap);
g_ptr_array_add (args_array, NULL);
args = (const char *const*) args_array->pdata;
words = nm_utils_strsplit_set (str, " \t\n");
if (!args[0]) {
g_assert (!words);
g_assert ( !str
|| NM_STRCHAR_ALL (str, ch, NM_IN_SET (ch, ' ', '\t', '\n')));
return;
}
g_assert (words);
for (i = 0; args[i] || words[i]; i++) {
g_assert (args[i]);
g_assert (words[i]);
g_assert (args[i][0]);
g_assert (NM_STRCHAR_ALL (args[i], ch, !NM_IN_SET (ch, ' ', '\t', '\n')));
g_assert_cmpstr (args[i], ==, words[i]);
}
}
#define do_test_nm_utils_strsplit_set(str, ...) \
_do_test_nm_utils_strsplit_set (str, ##__VA_ARGS__, NULL)
static void
test_nm_utils_strsplit_set (void)
{
do_test_nm_utils_strsplit_set (NULL);
do_test_nm_utils_strsplit_set ("");
do_test_nm_utils_strsplit_set ("\t");
do_test_nm_utils_strsplit_set (" \t\n");
do_test_nm_utils_strsplit_set ("a", "a");
do_test_nm_utils_strsplit_set ("a b", "a", "b");
do_test_nm_utils_strsplit_set ("a\rb", "a\rb");
do_test_nm_utils_strsplit_set (" a\rb ", "a\rb");
do_test_nm_utils_strsplit_set (" a bbbd afds ere", "a", "bbbd", "afds", "ere");
do_test_nm_utils_strsplit_set ("1 2 3 4 5 6 7 8 9 0 "
"1 2 3 4 5 6 7 8 9 0 "
"1 2 3 4 5 6 7 8 9 0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
}
/*****************************************************************************/
typedef struct {
int val;
int idx;
......@@ -6201,6 +6262,7 @@ int main (int argc, char **argv)
g_test_add_func ("/core/general/test_c_list_sort", test_c_list_sort);
g_test_add_func ("/core/general/test_dedup_multi", test_dedup_multi);
g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe);
g_test_add_func ("/core/general/test_nm_utils_strsplit_set", test_nm_utils_strsplit_set);
g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set);
g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset);
g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items);
......
......@@ -324,6 +324,118 @@ _nm_utils_ascii_str_to_int64 (const char *str, guint base, gint64 min, gint64 ma
/*****************************************************************************/
/**
* nm_utils_strsplit_set:
* @str: the string to split.
* @delimiters: the set of delimiters. If %NULL, defaults to " \t\n",
* like bash's $IFS.
*
* This is a replacement for g_strsplit_set() which avoids copying
* each word once (the entire strv array), but instead copies it once
* and all words point into that internal copy.
*
* Another difference from g_strsplit_set() is that this never returns
* empty words. Multiple delimiters are combined and treated as one.
*
* Returns: %NULL if @str is %NULL or contains only delimiters.
* Otherwise, a %NULL terminated strv array containing non-empty
* words, split at the delimiter characters (delimiter characters
* are removed).
* The strings to which the result strv array points to are allocated
* after the returned result itself. Don't free the strings themself,
* but free everything with g_free().
*/
const char **
nm_utils_strsplit_set (const char *str, const char *delimiters)
{
const char **ptr, **ptr0;
gsize alloc_size, plen, i;
gsize str_len;
char *s0;
char *s;
guint8 delimiters_table[256];
if (!str)
return NULL;
/* initialize lookup table for delimiter */
if (!delimiters)
delimiters = " \t\n";
memset (delimiters_table, 0, sizeof (delimiters_table));
for (i = 0; delimiters[i]; i++)
delimiters_table[(guint8) delimiters[i]] = 1;
#define _is_delimiter(ch, delimiters_table) \
((delimiters_table)[(guint8) (ch)] != 0)
/* skip initial delimiters, and return of the remaining string is
* empty. */
while (_is_delimiter (str[0], delimiters_table))
str++;
if (!str[0])
return NULL;
str_len = strlen (str) + 1;
alloc_size = 8;
/* we allocate the buffer larger, so to copy @str at the
* end of it as @s0. */
ptr0 = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len);
s0 = (char *) &ptr0[alloc_size + 1];
memcpy (s0, str, str_len);
plen = 0;
s = s0;
ptr = ptr0;
while (TRUE) {
if (plen >= alloc_size) {
const char **ptr_old = ptr;
/* reallocate the buffer. Note that for now the string
* continues to be in ptr0/s0. We fix that at the end. */
alloc_size += 2;
ptr = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len);
memcpy (ptr, ptr_old, sizeof (const char *) * plen);
if (ptr_old != ptr0)
g_free (ptr_old);
}
ptr[plen++] = s;
nm_assert (s[0] && !_is_delimiter (s[0], delimiters_table));
while (TRUE) {
s++;
if (_is_delimiter (s[0], delimiters_table))
break;
if (s[0] == '\0')
goto done;
}
s[0] = '\0';
s++;
while (_is_delimiter (s[0], delimiters_table))
s++;
if (s[0] == '\0')
break;
}
done:
ptr[plen] = NULL;
if (ptr != ptr0) {
/* we reallocated the buffer. We must copy over the
* string @s0 and adjust the pointers. */
s = (char *) &ptr[alloc_size + 1];
memcpy (s, s0, str_len);
for (i = 0; i < plen; i++)
ptr[i] = &s[ptr[i] - s0];
g_free (ptr0);
}
return ptr;
}
/**
* nm_utils_strv_find_first:
* @list: the strv list to search
......
......@@ -153,6 +153,8 @@ void nm_utils_strbuf_append_str (char **buf, gsize *len, const char *str);
/*****************************************************************************/
const char **nm_utils_strsplit_set (const char *str, const char *delimiters);
gssize nm_utils_strv_find_first (char **list, gssize len, const char *needle);
char **_nm_utils_strv_cleanup (char **strv,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment