diff --git a/CMakeLists.txt b/CMakeLists.txt index 85b2586..93cec31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ set(SOURCES src/vector.c src/utility.c src/crypto.c + src/parsing.c ) if ((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")) diff --git a/docs/parsing.md b/docs/parsing.md new file mode 100644 index 0000000..bc5a176 --- /dev/null +++ b/docs/parsing.md @@ -0,0 +1,14 @@ +# parsing + +String parsing tools + +## Functions + +### simple_english_scoring + +Scores a string based on a simple ETAOIN SHRDLU scale. A simple, somewhat accurate way to determine if a string is +English. + +```c +int simple_english_scoring(const char *s); +``` diff --git a/include/lfparsing.h b/include/lfparsing.h new file mode 100644 index 0000000..0fab577 --- /dev/null +++ b/include/lfparsing.h @@ -0,0 +1,6 @@ +#ifndef LIBFLINT_H_PARSING +#define LIBFLINT_H_PARSING + +int simple_english_scoring(const char *s); + +#endif // LIBFLINT_H_PARSING diff --git a/src/parsing.c b/src/parsing.c new file mode 100644 index 0000000..d25e7a1 --- /dev/null +++ b/src/parsing.c @@ -0,0 +1,27 @@ +#include "lfparsing.h" + +static int ses_score_sw(char c) { + switch (c) { + case 'e': case 'E': return 12; + case 't': case 'T': return 11; + case 'a': case 'A': return 10; + case 'o': case 'O': return 9; + case 'i': case 'I': return 8; + case 'n': case 'N': return 7; + case 's': case 'S': return 6; + case 'h': case 'H': return 5; + case 'r': case 'R': return 4; + case 'd': case 'D': return 3; + case 'l': case 'L': return 2; + case 'u': case 'U': return 1; + default: return 0; + } +} + +int simple_english_scoring(const char *s) { + int score = 0; + for (const char *c = s; *c != '\0'; ++c) { + score += ses_score_sw(*c); + } + return score; +} diff --git a/tests/tests.c b/tests/tests.c index bcbca01..bae1c5a 100644 --- a/tests/tests.c +++ b/tests/tests.c @@ -11,6 +11,7 @@ #include "lfmath.h" #include "lfstring.h" #include "lfcrypto.h" +#include "lfparsing.h" #if defined(__APPLE__) || defined(__MACH__) #include "lfmacos.h" @@ -310,6 +311,8 @@ void test_string() { } void test_crypto() { + printf("\n--- CRYPTO TEST ---\n"); + char *in = "BUTT"; unsigned char *s = b64_encode(in, strlen(in)); assert(strcmp(s, "QlVUVA==") == 0); @@ -357,6 +360,18 @@ void test_crypto() { s = hex_to_str(hexsup, 4); assert(strcmp(s, "Sup?") == 0); free(s); + + printf("Passes all crypto tests\n"); +} + +void test_parsing() { + printf("\n--- PARSING TEST ---\n"); + + char *nonsense = "8d82jI|dms~<>s2d"; + char *english = "This is an English sentence!"; + assert(simple_english_scoring(english) > simple_english_scoring(nonsense)); + + printf("Passes all parsing tests\n"); } #if defined(__APPLE__) || defined(__MACH__) @@ -383,6 +398,7 @@ int main() { test_vector(); test_string(); test_crypto(); + test_parsing(); #if defined(__APPLE__) || defined(__MACH__) test_macos();