From 194acafd3a8ac4351a9c06a851b8cb9dd0c635b9 Mon Sep 17 00:00:00 2001 From: Evan Burkey Date: Sun, 3 Dec 2023 17:08:04 -0800 Subject: [PATCH] add string functions, cleanup vector docs --- CMakeLists.txt | 1 + docs/string.md | 40 ++++++++++++++++++++++++++++++++++++ docs/vector.md | 4 ++-- include/lfstring.h | 10 +++++++++ src/string.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ tests/tests.c | 30 +++++++++++++++++++++++++++ 6 files changed, 134 insertions(+), 2 deletions(-) create mode 100644 docs/string.md create mode 100644 include/lfstring.h create mode 100644 src/string.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 77fecb6..dcbac4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ set(SOURCES src/binarytree.c src/input.c src/math.c + src/string.c src/vector.c src/utility.c ) diff --git a/docs/string.md b/docs/string.md new file mode 100644 index 0000000..e9b4b37 --- /dev/null +++ b/docs/string.md @@ -0,0 +1,40 @@ +# string + +C string helpers + +## Functions + +### find_substrings +Finds the indexes of all locations of the given needle in the haystack. The returned array of `size_t` is allocated by +this function and memory cleanup is managed by the user. `num_substrings` is a pointer to a pre-allocated `size_t` that will +be modified to contain the number of found substrings and subsequently the size of the returned array of `size_t`. +Returns `NULL` and prints to `stderr` if there is an error. + +```c +size_t *find_substrings(const char* haystack, const char* needle, size_t *num_substrings); + +/* Usage */ +const char* haystack = "One two three two"; +const char* needle = "two"; +size_t subs_sz = 0; + +size_t *subs = find_substrings(haystack, needle, &subs_sz); +// subs: [ 4, 14 ] +// subs_sz: 2 + +free(subs); +``` + +### substr + +Extracts a substring at a specific index and length. This function returns a copy of the substring in a heap allocated +buffer that the user is responsible for freeing. Returns `NULL` and prints to `stderr` if there is an error. + +```c +const char* substr(const char* str, size_t idx, size_t len); + +/* Usage */ +const char *s = substr("One two three", 4, 3); +assert(strcmp(s, "two") == 0); +free(s); +``` \ No newline at end of file diff --git a/docs/vector.md b/docs/vector.md index 34c22e2..28a73a8 100644 --- a/docs/vector.md +++ b/docs/vector.md @@ -117,7 +117,7 @@ Shrinks the capacity of the vector down to the current length. Returns a non-zer int vec_shrink(Vector *vec); ``` -## vec_max +### vec_max Finds the largest value in the vector and returns a void pointer to the underlying data. Requires a comparison function to compare the data in the vector. This function must return `1` if `a > b`, `-1` if @@ -127,7 +127,7 @@ comparison function to compare the data in the vector. This function must return const void *vec_min(const Vector *vec, int(*cmp)(const void *a, const void *b)); ``` -## vec_min +### vec_min Finds the smallest value in the vector and returns a void pointer to the underlying data. Requires a comparison function to compare the data in the vector. This function must return `1` if `a > b`, `-1` if diff --git a/include/lfstring.h b/include/lfstring.h new file mode 100644 index 0000000..1f413f7 --- /dev/null +++ b/include/lfstring.h @@ -0,0 +1,10 @@ +#ifndef LIBFLINT_H_STRING +#define LIBFLINT_H_STRING + +#include + +size_t *find_substrings(const char* haystack, const char* needle, size_t *num_substrings); + +const char* substr(const char* str, size_t idx, size_t len); + +#endif // LIBFLINT_H_STRING diff --git a/src/string.c b/src/string.c new file mode 100644 index 0000000..d10a60c --- /dev/null +++ b/src/string.c @@ -0,0 +1,51 @@ +#include +#include +#include + +#include "lfstring.h" + +size_t *find_substrings(const char* haystack, const char* needle, size_t *num_substrings) { + size_t sz_h = strlen(haystack); + size_t sz_n = strlen(needle); + + *num_substrings = 0; + for (size_t i = 0; i <= sz_h - sz_n; ++i) { + if (strncmp(haystack + i, needle, sz_n) == 0) { + ++(*num_substrings); + } + } + + size_t *indicies = malloc(sizeof(size_t) * *num_substrings); + if (indicies == NULL) { + fprintf(stderr, "Memory allocation failed in find_substrings\n"); + return NULL; + } + + size_t idx = 0; + for (size_t i = 0; i <= sz_h - sz_n; ++i) { + if (strncmp(haystack + i, needle, sz_n) == 0) { + indicies[idx++] = i; + } + } + + return indicies; +} + +const char* substr(const char* str, size_t idx, size_t len) { + size_t sz_str = strlen(str); + if (sz_str < len || idx + len > sz_str) { + fprintf(stderr, "Improper size arguments in substr\n"); + return NULL; + } + + char *substr = malloc(sizeof(char) * len + 1); + if (substr == NULL) { + fprintf(stderr, "Memory allocation error in substr\n"); + return NULL; + } + + memcpy(substr, str + idx, len); + substr[len] = '\0'; + + return substr; +} diff --git a/tests/tests.c b/tests/tests.c index b0c9d93..ed37869 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -8,6 +8,7 @@ #include "lfbinarytree.h" #include "lfvector.h" #include "lfmath.h" +#include "lfstring.h" void print_ll(List *list) { LL_ITER(list) { @@ -251,6 +252,34 @@ void test_vector() { vec_destroy(v); free(v); } +void test_string() { + printf("\n--- STRING TEST ---\n"); + const char* haystack = "Test one two one and also maybe two but not Gabe's least favorite number, which is not one."; + const char* needles[] = { + "one", + "two", + "Gabe" + }; + + size_t sub_sz = 0; + size_t *subs = find_substrings(haystack, needles[0], &sub_sz); + assert(sub_sz == 3); + assert(subs[0] == 5); + assert(subs[1] == 13); + assert(subs[2] == 87); + + const char *s = substr(haystack, subs[0], strlen(needles[0])); + assert(strcmp(s, needles[0]) == 0); + + free(s); + free(subs); + + subs = find_substrings(haystack, needles[1], &sub_sz); + assert(sub_sz == 2); + assert(subs[0] == 9); + + printf("Passes all string tests\n"); +} int main() { test_ll(); @@ -259,5 +288,6 @@ int main() { test_bintree(); test_math(); test_vector(); + test_string(); return 0; } \ No newline at end of file