Add StringRef::trim() functions

Add three functions that trim characters from the front & end of a
`StringRef`. All functions return a new `StringRef` that references a
sub-string of the original `StringRef`.

- `trim(chars_to_remove)`: strips all characters from the start and end
  that occur in `chars_to_remove`.
- `trim(char_to_remove)`: same, but with a single character to remove.
- `trim()`: remove leading & trailing whitespace, so same as
  `trim(" \r\n\t")`

Reviewed By: JacquesLucke

Differential Revision: https://developer.blender.org/D12031
This commit is contained in:
Sybren A. Stüvel
2021-07-27 17:30:33 +02:00
parent c6ba7359ae
commit 7e91a60be6
2 changed files with 80 additions and 0 deletions

View File

@@ -208,6 +208,18 @@ class StringRefBase {
constexpr int64_t find_first_not_of(char c, int64_t pos = 0) const;
constexpr int64_t find_last_not_of(StringRef chars, int64_t pos = INT64_MAX) const;
constexpr int64_t find_last_not_of(char c, int64_t pos = INT64_MAX) const;
/**
* Return a new StringRef that does not contain leading and trailing whitespace.
*/
constexpr StringRef trim() const;
/**
* Return a new StringRef that removes all the leading and trailing characters
* that occur in `characters_to_remove`.
*/
constexpr StringRef trim(StringRef characters_to_remove) const;
constexpr StringRef trim(char character_to_remove) const;
};
/**
@@ -540,4 +552,34 @@ constexpr inline int64_t StringRefBase::find_last_not_of(char c, int64_t pos) co
return this->find_last_not_of(StringRef(&c, 1), pos);
}
constexpr StringRef StringRefBase::trim() const
{
return this->trim(" \t\r\n");
}
constexpr StringRef StringRefBase::trim(const char character_to_remove) const
{
return this->trim(StringRef(&character_to_remove, 1));
}
/**
* Return a new StringRef that removes all the leading and trailing characters
* that occur in `characters_to_remove`.
*/
constexpr StringRef StringRefBase::trim(StringRef characters_to_remove) const
{
const int64_t find_front = this->find_first_not_of(characters_to_remove);
if (find_front == not_found) {
return StringRef();
}
const int64_t find_end = this->find_last_not_of(characters_to_remove);
/* `find_end` cannot be `not_found`, because that means the string is only
* `characters_to_remove`, in which case `find_front` would already have
* been `not_found`. */
BLI_assert_msg(find_end != not_found,
"forward search found characters-to-not-remove, but backward search did not");
const int64_t substr_len = find_end - find_front + 1;
return this->substr(find_front, substr_len);
}
} // namespace blender

View File

@@ -278,6 +278,44 @@ TEST(string_ref, DropSuffixLargeN)
EXPECT_EQ(ref2, "");
}
TEST(string_ref, TrimArbitrary)
{
StringRef ref1("test");
StringRef ref2(" test ");
StringRef ref3(" \t Urož with spaces ");
StringRef ref4("žžžžleepyžžž");
EXPECT_EQ(ref1.trim("t"), "es");
EXPECT_EQ(ref1.trim("te"), "s");
EXPECT_EQ(ref1.trim("test"), "");
EXPECT_EQ(ref2.trim("t"), " test ");
EXPECT_EQ(ref2.trim(""), " test ");
EXPECT_EQ(ref3.trim(" "), "\t Urož with spaces"); /* TAB should be kept. */
EXPECT_EQ(ref4.trim("ž"), "leepy");
}
TEST(string_ref, TrimWhitespace)
{
StringRef ref1("test");
StringRef ref2(" test ");
StringRef ref3(" \t Urož with spaces ");
StringRef ref4(" \t \n\r \t ");
EXPECT_EQ(ref1.trim(), "test");
EXPECT_EQ(ref2.trim(), "test");
EXPECT_EQ(ref3.trim(), "Urož with spaces");
EXPECT_EQ(ref4.trim(), "");
}
TEST(string_ref, TrimCharacter)
{
StringRef ref1("test");
StringRef ref2(" test ");
StringRef ref3("does this work?");
EXPECT_EQ(ref1.trim('t'), "es");
EXPECT_EQ(ref1.trim('p'), "test");
EXPECT_EQ(ref2.trim(' '), "test");
EXPECT_EQ(ref3.trim('\000'), "does this work?");
}
TEST(string_ref, Substr)
{
StringRef ref("hello world");