-
Notifications
You must be signed in to change notification settings - Fork 0
Adds initial scaffolding for the split interface #2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
aryann
wants to merge
13
commits into
main
Choose a base branch
from
scaffolding
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
e6632f4
Adds an MVP set of functions
aryann b25dc17
Adds more details to the concepts that govern the range constructor
aryann 1d96659
Adds missing newline
aryann 6a286de
Renames some of the files and fixes the includes
aryann 557e286
Replaces the split function with str_split_to
aryann 1475000
Adds split_by_char
aryann ed47b98
Adds tests for different char types
aryann 0bfcecf
Removes some TODOs
aryann e96c545
Adds missing include
aryann bb60de3
Adds support for importing std types via modules
aryann 4223276
An attempt to fix redefinition errors in one of the gcc builds
aryann 236502b
Use textual includes for the test target since it's not setup to use …
aryann 0d0cdee
Fixes a typo and adds a missing return
aryann File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
| #include <beman/str_split/str_split.hpp> | ||
|
|
||
| #include <iostream> | ||
| #include <string_view> | ||
|
|
||
| using namespace std::literals::string_view_literals; | ||
|
|
||
| using ::beman::str_split::split_by_ascii_whitespace; | ||
| using ::beman::str_split::str_split_to; | ||
|
|
||
| int main() { | ||
| constexpr std::string_view text = "The quick brown fox jumps over the lazy dog"; | ||
|
|
||
| const std::vector<std::string_view> parts = str_split_to(text, split_by_ascii_whitespace()); | ||
| for (std::string_view part : parts) { | ||
| std::cout << part << std::endl; | ||
| } | ||
|
|
||
| return 0; | ||
| } |
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #ifndef BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
| #define BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
|
|
||
| #if BEMAN_STR_SPLIT_USE_MODULES() && !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) | ||
|
|
||
| import beman.str_split; | ||
|
|
||
| #else | ||
|
|
||
| #if !BEMAN_STR_SPLIT_USE_MODULES() | ||
|
|
||
| #include <string> | ||
| #include <string_view> | ||
| #include <vector> | ||
|
|
||
| #endif // !BEMAN_STR_SPLIT_USE_MODULES() | ||
|
|
||
| namespace beman::str_split { | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Concepts: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // TODO(aryann): Should the concepts be placed in a private namespace? | ||
|
|
||
| // A range of chars. | ||
| template <typename T> | ||
| concept char_range = std::ranges::input_range<T> && std::same_as<std::ranges::range_value_t<T>, char>; | ||
|
|
||
| // A type that cannot be converted to `std::string_view`. | ||
| template <typename T> | ||
| concept not_string_view_convertible = !std::convertible_to<T&&, std::string_view>; | ||
|
|
||
| template <typename T, typename Self> | ||
| concept different_from = !std::same_as<std::remove_cvref_t<T>, Self>; | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Patterns: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // Splits by a substring. | ||
| struct split_by { | ||
| public: | ||
| // Constructor for anything that can be converted to a `std::string_view`. | ||
| constexpr explicit split_by(std::string_view delimiter) : delimiter_(delimiter) {} | ||
|
|
||
| // Constructor for range of characters that are not `std::string_view` convertible. | ||
| template <std::ranges::input_range Range> | ||
| requires( | ||
| // Ensures the range's value type is `char`. Notably, this rejects ranges of other values types such as | ||
| // `int` and `unsigned char`. This requirement prevents narrowing conversions. | ||
| char_range<Range> && | ||
|
|
||
| // Ensures this constructor does not compete with the `std::string_view` overload. | ||
| not_string_view_convertible<Range> && | ||
|
|
||
| // Ensures this constructor does not hijack copy and move construction which would fail to compile with a | ||
| // difficult-to-read wall of errors. | ||
| different_from<Range, split_by>) | ||
| constexpr explicit split_by(Range&& range) : delimiter_(std::ranges::begin(range), std::ranges::end(range)) {} | ||
|
|
||
| // TODO(aryann): Here and below, implement a find member function that accepts the current "haystack" string and | ||
| // returns the position of the first match. We may also need to control the visibility of such function. | ||
|
|
||
| private: | ||
| const std::string delimiter_; | ||
| }; | ||
|
|
||
| // Splits by the first matching character in a given character sequence. | ||
| struct split_by_first_of { | ||
| private: | ||
| const std::string chars_; | ||
| }; | ||
|
|
||
| // Splits by character. | ||
| struct split_by_char { | ||
| public: | ||
| constexpr explicit split_by_char(char delimiter) : delimiter_(delimiter) {} | ||
|
|
||
| private: | ||
| const char delimiter_; | ||
| }; | ||
|
|
||
| struct split_by_ascii_whitespace {}; | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // Split functions: | ||
| //------------------------------------------------------------------------------ | ||
|
|
||
| // TODO(aryann): Consider an alternative approach where the split type is determined by the function name: | ||
| // | ||
| // * split(std::string_view): Equivalent to | ||
| // str_split(std::string_view, split_by_ascii_whitespace). | ||
| // | ||
| // * split(std::string_view, Range&&): Equivalent to | ||
| // str_split(std::string_view, split_by). | ||
| // | ||
| // * split_by_first_of(std::string_view, Range&&): Equivalent to | ||
| // str_split(std::string_view, split_by_first_of). | ||
| // | ||
|
|
||
| template <class OutputIt, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter, OutputIt dest) -> OutputIt { | ||
| return dest; | ||
| } | ||
|
|
||
| template <class Container, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) -> Container { | ||
| Container container; | ||
| return str_split_to(text, delimiter, container); | ||
| } | ||
|
|
||
| template <template <class...> class Container, class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) | ||
| -> Container<std::basic_string_view<CharT, Traits> > { | ||
| return str_split_to(text, delimiter); | ||
| } | ||
|
|
||
| template <class CharT, class Traits, class Delimiter> | ||
| auto str_split_to(std::basic_string_view<CharT, Traits> text, Delimiter&& delimiter) | ||
| -> std::vector<std::basic_string_view<CharT, Traits> > { | ||
| std::vector<std::basic_string_view<CharT, Traits> > result; | ||
| return result; | ||
| } | ||
|
|
||
| // TODO(aryann): Add support for max splits. | ||
|
|
||
| } // namespace beman::str_split | ||
|
|
||
| #endif // BEMAN_STR_SPLIT_USE_MODULES() && | ||
| // !defined(BEMAN_STR_SPLIT_INCLUDED_FROM_INTERFACE_UNIT) | ||
|
|
||
| #endif // BEMAN_STR_SPLIT_STR_SPLIT_TO_HPP | ||
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <array> | ||
| #include <deque> | ||
| #include <string_view> | ||
| #include <type_traits> | ||
| #include <vector> | ||
|
|
||
| #include <beman/str_split/config.hpp> | ||
| #include <beman/str_split/str_split_to.hpp> | ||
| #include <gmock/gmock.h> | ||
| #include <gtest/gtest.h> | ||
|
|
||
| namespace { | ||
|
|
||
| using namespace std::literals::string_view_literals; | ||
|
|
||
| using ::beman::str_split::split_by; | ||
| using ::beman::str_split::split_by_ascii_whitespace; | ||
| using ::beman::str_split::split_by_char; | ||
| using ::beman::str_split::str_split_to; | ||
| using ::testing::ElementsAre; | ||
|
|
||
| TEST(Delimiter, SplitBy) { | ||
| // `std:string-view`-convertible inputs: | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(" ")), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by("string")), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::string("string"))), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::string_view("string"))), ElementsAre()); | ||
|
|
||
| // Ranges: | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::vector<char>{'a', 'b', 'c'})), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by(std::array{'a', 'b', 'c'})), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(Delimiter, SplitByChar) { | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_char(' ')), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_char('s')), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(Delimiter, SplitByAsciiWhitespace) { | ||
| EXPECT_THAT(str_split_to(""sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringViewVector) { | ||
| static_assert(std::is_same_v<decltype(str_split_to("my string"sv, split_by_ascii_whitespace())), | ||
| std::vector<std::string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringViewContainer) { | ||
| static_assert(std::is_same_v<decltype(str_split_to<std::vector<std::string_view>>("my string"sv, | ||
| split_by_ascii_whitespace())), | ||
| std::vector<std::string_view>>); | ||
| static_assert(std::is_same_v<decltype(str_split_to<std::deque<std::string_view>>("my string"sv, | ||
| split_by_ascii_whitespace())), | ||
| std::deque<std::string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to<std::vector<std::string_view>>("my string"sv, split_by_ascii_whitespace()), | ||
| ElementsAre()); | ||
| EXPECT_THAT(str_split_to<std::deque<std::string_view>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, StringContainer) { | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to<std::vector<std::string>>("my string"sv, split_by_ascii_whitespace())), | ||
| std::vector<std::string>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to<std::deque<std::string>>("my string"sv, split_by_ascii_whitespace())), | ||
| std::deque<std::string>>); | ||
|
|
||
| EXPECT_THAT(str_split_to<std::vector<std::string>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to<std::deque<std::string>>("my string"sv, split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| TEST(StrSplitTo, CharTypes) { | ||
| static_assert(std::is_same_v<decltype(str_split_to(std::wstring_view(L"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::wstring_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u8string_view(u8"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u8string_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u16string_view(u"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u16string_view>>); | ||
| static_assert( | ||
| std::is_same_v<decltype(str_split_to(std::u32string_view(U"my string"), split_by_ascii_whitespace())), | ||
| std::vector<std::u32string_view>>); | ||
|
|
||
| EXPECT_THAT(str_split_to(std::wstring_view(L"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u8string_view(u8"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u16string_view(u"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| EXPECT_THAT(str_split_to(std::u32string_view(U"my string"), split_by_ascii_whitespace()), ElementsAre()); | ||
| } | ||
|
|
||
| } // namespace |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could constrain this with:
std::constructable_from<std::string, std::from_range_t, R>. We wouldn't need thestd::string_viewconstructor in that case.