From e6632f4c75acbf6339bffd8d99db2ae9d481a519 Mon Sep 17 00:00:00 2001 From: Aryan Naraghi Date: Sat, 9 May 2026 18:20:31 -0600 Subject: [PATCH 01/13] Adds an MVP set of functions The purpose of this change is to start the conversation on the best interface for this library. --- include/beman/str_split/todo.hpp | 71 +++++++++++++++++++++++++++- tests/beman/str_split/CMakeLists.txt | 2 +- tests/beman/str_split/todo.test.cpp | 30 ++++++++++-- 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/include/beman/str_split/todo.hpp b/include/beman/str_split/todo.hpp index a4b9c5a..40e4269 100644 --- a/include/beman/str_split/todo.hpp +++ b/include/beman/str_split/todo.hpp @@ -5,6 +5,9 @@ #include +#include +#include + #if BEMAN_STR_SPLIT_USE_MODULES() && !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) import beman.str_split; @@ -13,7 +16,73 @@ import beman.str_split; namespace beman::str_split { -// TODO +//------------------------------------------------------------------------------ +// Patterns: +//------------------------------------------------------------------------------ + +// Splits by a substring. +struct split_by { + // Constructor for anything that can be converted to a `std::string_view`. + constexpr explicit split_by(std::string_view delimiter) : delimiter_(delimiter) {} + + // Constructor for the single character case. + constexpr explicit split_by(char delimiter) : delimiter_(1, delimiter) {} + + // Constructor for range of characters that are not `std::string_view` convertible. + template + requires std::same_as, char> && + (!std::convertible_to) && + (!std::same_as, split_by>) + explicit split_by(Range&& range) : delimiter_(std::ranges::begin(range), std::ranges::end(range)) {} + + // TODO(aryann): Here and below, implement a find member function that accepts the current "haystack" string and + // returns the position of the first match. We may also need to control the visibility of such function. + + private: + std::string delimiter_; +}; + +// Splits by the first matching character in a given character sequence. +struct split_by_first_of { + + private: + std::string chars_; +}; + +struct split_by_ascii_whitespace {}; + +//------------------------------------------------------------------------------ +// Split functions: +//------------------------------------------------------------------------------ + +// TODO(aryann): Consider an alternative approach where the split type is determined by the function name: +// +// * split(std::string_view): Equivalent to +// str_split(std::string_view, split_by_ascii_whitespace). +// +// * split(std::string_view, Range&&): Equivalent to +// str_split(std::string_view, split_by). +// +// * split_by_first_of(std::string_view, Range&&): Equivalent to +// str_split(std::string_view, split_by_first_of). +// +// I'm partial towards this alternative as it more closely mirrors `std::string::find` and `:find_first_of`. + +// TODO(aryann): We should structure the input similar to `split_by` by allowing both ranges and +// `std::string_view`-convertible types. + +template +constexpr Container str_split(std::string_view input, Pattern pattern) { + Container empty; + return empty; +} + +template +constexpr Container str_split(std::string_view input) { + return str_split(input, split_by_ascii_whitespace{}); +} + +// TODO(aryann): Add support for max splits. } // namespace beman::str_split diff --git a/tests/beman/str_split/CMakeLists.txt b/tests/beman/str_split/CMakeLists.txt index be75247..8a2ab2e 100644 --- a/tests/beman/str_split/CMakeLists.txt +++ b/tests/beman/str_split/CMakeLists.txt @@ -6,7 +6,7 @@ add_executable(beman.str_split.tests.todo) target_sources(beman.str_split.tests.todo PRIVATE todo.test.cpp) target_link_libraries( beman.str_split.tests.todo - PRIVATE beman::str_split GTest::gtest_main + PRIVATE beman::str_split GTest::gtest_main GTest::gmock ) if(BEMAN_EXEMPLAR_USE_MODULES) set_target_properties( diff --git a/tests/beman/str_split/todo.test.cpp b/tests/beman/str_split/todo.test.cpp index f1c0082..fa76ca1 100644 --- a/tests/beman/str_split/todo.test.cpp +++ b/tests/beman/str_split/todo.test.cpp @@ -1,10 +1,34 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include +#include + #include #include +#include #include -TEST(TodoTest, todo) { - const bool todo = true; - EXPECT_TRUE(todo); +namespace { + +using ::beman::str_split::split_by; +using ::beman::str_split::str_split; +using ::testing::ElementsAre; + +TEST(StrSplit, SplitBy) { + using Container = std::vector; + + // `std:string-view`-convertible inputs: + EXPECT_THAT(str_split("my string", split_by(" ")), ElementsAre()); + EXPECT_THAT(str_split("my string", split_by("string")), ElementsAre()); + EXPECT_THAT(str_split("my string", split_by(std::string("string"))), ElementsAre()); + EXPECT_THAT(str_split("my string", split_by(std::string_view("string"))), ElementsAre()); + + // Single char: + EXPECT_THAT(str_split("my string", split_by('s')), ElementsAre()); + + // Ranges: + EXPECT_THAT(str_split("my string", split_by(std::vector{'a', 'b', 'c'})), ElementsAre()); + EXPECT_THAT(str_split("my string", split_by(std::array{'a', 'b', 'c'})), ElementsAre()); } + +} // namespace \ No newline at end of file From b25dc17d4ff7a48a1008b81c67fd67fed488ef18 Mon Sep 17 00:00:00 2001 From: Aryan Naraghi Date: Sat, 9 May 2026 18:40:44 -0600 Subject: [PATCH 02/13] Adds more details to the concepts that govern the range constructor --- include/beman/str_split/todo.hpp | 44 +++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/include/beman/str_split/todo.hpp b/include/beman/str_split/todo.hpp index 40e4269..dda0ff1 100644 --- a/include/beman/str_split/todo.hpp +++ b/include/beman/str_split/todo.hpp @@ -16,6 +16,26 @@ import beman.str_split; namespace beman::str_split { +// TODO(aryann): Delete this file and move the contents to str_split.hpp once the we make more progress on the surface +// definition. + +//------------------------------------------------------------------------------ +// Concepts: +//------------------------------------------------------------------------------ + +// TODO(aryann): Should the concepts be placed in a private namespace? + +// A range of chars. +template +concept char_range = std::ranges::input_range && std::same_as, char>; + +// A type that cannot be converted to `std::string_view`. +template +concept not_string_view_convertible = !std::convertible_to; + +template +concept different_from = !std::same_as, Self>; + //------------------------------------------------------------------------------ // Patterns: //------------------------------------------------------------------------------ @@ -30,23 +50,31 @@ struct split_by { // Constructor for range of characters that are not `std::string_view` convertible. template - requires std::same_as, char> && - (!std::convertible_to) && - (!std::same_as, split_by>) - explicit split_by(Range&& range) : delimiter_(std::ranges::begin(range), std::ranges::end(range)) {} + requires( + // Ensures the range's value type is `char`. Notably, this rejects ranges of other values types such as + // `int` and `unsigned char`. This requirement prevents narrowing conversions. + char_range && + + // Ensures this constructor does not compete with the `std::string_view` overload. + not_string_view_convertible && + + // Ensures this constructor does not hijack copy and move construction which would fail to compile with a + // difficult-to-read wall of errors. + different_from) + constexpr explicit split_by(Range&& range) : delimiter_(std::ranges::begin(range), std::ranges::end(range)) {} // TODO(aryann): Here and below, implement a find member function that accepts the current "haystack" string and // returns the position of the first match. We may also need to control the visibility of such function. private: - std::string delimiter_; + const std::string delimiter_; }; // Splits by the first matching character in a given character sequence. struct split_by_first_of { private: - std::string chars_; + const std::string chars_; }; struct split_by_ascii_whitespace {}; @@ -84,6 +112,10 @@ constexpr Container str_split(std::string_view input) { // TODO(aryann): Add support for max splits. +// TODO(aryann): For now, the caller must pass the return type. We should decide whether to use implicit conversions to +// adapt to the user's declared return type (similar to absl's approach). If we retain the current structure, we should +// decide on a default for the common case (`std::vector`?). + } // namespace beman::str_split #endif // BEMAN_STR_SPLIT_USE_MODULES() && From 1d96659a1858388a4d8d07f7f5019bf72c6eec42 Mon Sep 17 00:00:00 2001 From: Aryan Naraghi Date: Sat, 9 May 2026 18:44:44 -0600 Subject: [PATCH 03/13] Adds missing newline --- tests/beman/str_split/todo.test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/beman/str_split/todo.test.cpp b/tests/beman/str_split/todo.test.cpp index fa76ca1..97005b4 100644 --- a/tests/beman/str_split/todo.test.cpp +++ b/tests/beman/str_split/todo.test.cpp @@ -31,4 +31,4 @@ TEST(StrSplit, SplitBy) { EXPECT_THAT(str_split("my string", split_by(std::array{'a', 'b', 'c'})), ElementsAre()); } -} // namespace \ No newline at end of file +} // namespace From 6a286decf47fdef8ff4593bc45386d12974cf732 Mon Sep 17 00:00:00 2001 From: Aryan Naraghi Date: Mon, 11 May 2026 08:32:18 -0700 Subject: [PATCH 04/13] Renames some of the files and fixes the includes --- examples/CMakeLists.txt | 2 +- examples/{todo.cpp => str_split.cpp} | 2 +- include/beman/str_split/CMakeLists.txt | 4 ++-- include/beman/str_split/str_split.hpp | 2 +- .../str_split/{todo.hpp => str_split_to.hpp} | 16 ++++++++++------ tests/beman/str_split/CMakeLists.txt | 10 +++++----- .../{todo.test.cpp => str_split_to.test.cpp} | 2 +- 7 files changed, 21 insertions(+), 17 deletions(-) rename examples/{todo.cpp => str_split.cpp} (75%) rename include/beman/str_split/{todo.hpp => str_split_to.hpp} (94%) rename tests/beman/str_split/{todo.test.cpp => str_split_to.test.cpp} (96%) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 3544044..4cb1c5f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -set(ALL_EXAMPLES todo) +set(ALL_EXAMPLES str_split) message("Examples to be built: ${ALL_EXAMPLES}") foreach(example ${ALL_EXAMPLES}) diff --git a/examples/todo.cpp b/examples/str_split.cpp similarity index 75% rename from examples/todo.cpp rename to examples/str_split.cpp index 276064d..ed193c3 100644 --- a/examples/todo.cpp +++ b/examples/str_split.cpp @@ -1,7 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include +#include int main() { // TODO diff --git a/include/beman/str_split/CMakeLists.txt b/include/beman/str_split/CMakeLists.txt index a4a50c2..58e6278 100644 --- a/include/beman/str_split/CMakeLists.txt +++ b/include/beman/str_split/CMakeLists.txt @@ -9,7 +9,7 @@ if(BEMAN_STR_SPLIT_USE_MODULES) FILES config.hpp str_split.hpp - todo.hpp + str_split_to.hpp "${PROJECT_BINARY_DIR}/include/beman/str_split/config_generated.hpp" ) else() @@ -20,7 +20,7 @@ else() FILES config.hpp str_split.hpp - todo.hpp + str_split_to.hpp "${PROJECT_BINARY_DIR}/include/beman/str_split/config_generated.hpp" ) endif() diff --git a/include/beman/str_split/str_split.hpp b/include/beman/str_split/str_split.hpp index 308ad99..81110b4 100644 --- a/include/beman/str_split/str_split.hpp +++ b/include/beman/str_split/str_split.hpp @@ -11,7 +11,7 @@ import beman.str_split; #else - #include + #include #endif // BEMAN_STR_SPLIT_USE_MODULES() && // !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) diff --git a/include/beman/str_split/todo.hpp b/include/beman/str_split/str_split_to.hpp similarity index 94% rename from include/beman/str_split/todo.hpp rename to include/beman/str_split/str_split_to.hpp index dda0ff1..916355c 100644 --- a/include/beman/str_split/todo.hpp +++ b/include/beman/str_split/str_split_to.hpp @@ -1,19 +1,23 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef BEMAN_STR_SPLIT_TODO_HPP -#define BEMAN_STR_SPLIT_TODO_HPP +#ifndef BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP +#define BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP #include -#include -#include - #if BEMAN_STR_SPLIT_USE_MODULES() && !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) import beman.str_split; #else + #if !BEMAN_STR_SPLIT_USE_MODULES() + + #include + #include + + #endif // !BEMAN_STR_SPLIT_USE_MODULES() + namespace beman::str_split { // TODO(aryann): Delete this file and move the contents to str_split.hpp once the we make more progress on the surface @@ -121,4 +125,4 @@ constexpr Container str_split(std::string_view input) { #endif // BEMAN_STR_SPLIT_USE_MODULES() && // !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) -#endif // BEMAN_STR_SPLIT_TODO_HPP +#endif // BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP diff --git a/tests/beman/str_split/CMakeLists.txt b/tests/beman/str_split/CMakeLists.txt index 8a2ab2e..72ead66 100644 --- a/tests/beman/str_split/CMakeLists.txt +++ b/tests/beman/str_split/CMakeLists.txt @@ -2,18 +2,18 @@ find_package(GTest REQUIRED) -add_executable(beman.str_split.tests.todo) -target_sources(beman.str_split.tests.todo PRIVATE todo.test.cpp) +add_executable(beman.str_split.tests.str_split_to) +target_sources(beman.str_split.tests.str_split_to PRIVATE str_split_to.test.cpp) target_link_libraries( - beman.str_split.tests.todo + beman.str_split.tests.str_split_to PRIVATE beman::str_split GTest::gtest_main GTest::gmock ) if(BEMAN_EXEMPLAR_USE_MODULES) set_target_properties( - beman.str_split.tests.todo + beman.str_split.tests.str_split_to PROPERTIES CXX_MODULE_STD ON ) endif() include(GoogleTest) -gtest_discover_tests(beman.str_split.tests.todo DISCOVERY_TIMEOUT 60) +gtest_discover_tests(beman.str_split.tests.str_split_to DISCOVERY_TIMEOUT 60) diff --git a/tests/beman/str_split/todo.test.cpp b/tests/beman/str_split/str_split_to.test.cpp similarity index 96% rename from tests/beman/str_split/todo.test.cpp rename to tests/beman/str_split/str_split_to.test.cpp index 97005b4..704a6e3 100644 --- a/tests/beman/str_split/todo.test.cpp +++ b/tests/beman/str_split/str_split_to.test.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace { From 557e286771a3f8d63007c8d72146b29da14f267e Mon Sep 17 00:00:00 2001 From: Aryan Naraghi Date: Mon, 11 May 2026 09:09:56 -0700 Subject: [PATCH 05/13] Replaces the split function with str_split_to --- examples/str_split.cpp | 15 +++++- include/beman/str_split/str_split_to.hpp | 35 +++++++------ tests/beman/str_split/str_split_to.test.cpp | 56 ++++++++++++++++----- 3 files changed, 78 insertions(+), 28 deletions(-) diff --git a/examples/str_split.cpp b/examples/str_split.cpp index ed193c3..fe94fe3 100644 --- a/examples/str_split.cpp +++ b/examples/str_split.cpp @@ -3,6 +3,19 @@ #include #include +#include +#include + +using namespace std::literals::string_view_literals; + +using ::beman::str_split::split_by_ascii_whitespace; +using ::beman::str_split::str_split_to; + int main() { - // TODO + constexpr std::string_view text = "The quick brown fox jumps over the lazy dog"; + + const std::vector parts = str_split_to(text, split_by_ascii_whitespace()); + for (std::string_view part : parts) { + std::cout << part << std::endl; + } } diff --git a/include/beman/str_split/str_split_to.hpp b/include/beman/str_split/str_split_to.hpp index 916355c..36fa500 100644 --- a/include/beman/str_split/str_split_to.hpp +++ b/include/beman/str_split/str_split_to.hpp @@ -15,14 +15,12 @@ import beman.str_split; #include #include + #include #endif // !BEMAN_STR_SPLIT_USE_MODULES() namespace beman::str_split { -// TODO(aryann): Delete this file and move the contents to str_split.hpp once the we make more progress on the surface -// definition. - //------------------------------------------------------------------------------ // Concepts: //------------------------------------------------------------------------------ @@ -103,22 +101,31 @@ struct split_by_ascii_whitespace {}; // TODO(aryann): We should structure the input similar to `split_by` by allowing both ranges and // `std::string_view`-convertible types. -template -constexpr Container str_split(std::string_view input, Pattern pattern) { - Container empty; - return empty; +template +auto str_split_to(std::basic_string_view text, Delimiter&& delimiter, OutputIt dest) -> OutputIt { + return dest; } -template -constexpr Container str_split(std::string_view input) { - return str_split(input, split_by_ascii_whitespace{}); +template +auto str_split_to(std::basic_string_view text, Delimiter&& delimiter) -> Container { + Container container; + return str_split_to(text, delimiter, container); } -// TODO(aryann): Add support for max splits. +template