Files
TL/include/tl/algorithm/string.h
T
jeanlemotan 8297b0b45f First
2024-07-02 18:06:33 +02:00

623 lines
16 KiB
C++

#pragma once
#include "tl/detail/prologue.h"
#include "tl/functional.h"
#include <EASTL/vector.h>
#include <tl/fixed_vector.h>
#include "tl/string.h"
#include "tl/optional.h"
namespace tl
{
namespace algorithm
{
enum class empty_token_policy
{
discard,
keep
};
template <class DstContainer, class SrcContainer>
DstContainer to_lower_ascii_copy(const SrcContainer& str) noexcept
{
if constexpr (tl::is_same_v<DstContainer, string>)
{
if (str.empty())
return string();
const string::size_type s = str.size();
fixed_vector<char, 512> b(s);
char const* string = str.data();
for (string::size_type i = 0; i < s; i++)
b[i] = (char)ascii::tolower(string[i]);
return { b.data(), s };
}
else
{
DstContainer dst;
dst.resize(str.size());
eastl::transform(tl::begin(str), tl::end(str), tl::begin(dst), ascii::tolower);
return dst;
}}
template <class Container>
Container to_lower_ascii_copy(const Container& str) noexcept
{
return to_lower_ascii_copy<Container, Container>(str);
}
template <class DstContainer, class SrcContainer>
DstContainer to_upper_ascii_copy(const SrcContainer& str) noexcept
{
if constexpr (tl::is_same_v<DstContainer, string>)
{
if (str.empty())
return string();
const string::size_type s = str.size();
fixed_vector<char, 512> b(s);
char const* string = str.data();
for (string::size_type i = 0; i < s; i++)
b[i] = (char)ascii::toupper(string[i]);
return { b.data(), s };
}
else
{
DstContainer dst;
dst.resize(str.size());
eastl::transform(tl::begin(str), tl::end(str), tl::begin(dst), ascii::toupper);
return dst;
}
}
template <class Container>
Container to_upper_ascii_copy(const Container& str) noexcept
{
return to_upper_ascii_copy<Container, Container>(str);
}
template <class Container>
void to_lower_ascii(Container& str) noexcept
{
eastl::transform(tl::begin(str), tl::end(str), tl::begin(str), ascii::tolower);
}
template <>
inline void to_lower_ascii(string& str) noexcept
{
str = to_lower_ascii_copy(str);
}
template <class Container>
void to_upper_ascii(Container& str) noexcept
{
eastl::transform(tl::begin(str), tl::end(str), tl::begin(str), ascii::toupper);
}
template <>
inline void to_upper_ascii(string& str) noexcept
{
str = to_upper_ascii_copy(str);
}
template <class String>
tl::vector<String> split_on_any(const String& str, const char* separators, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
return split_on_any(str, String(separators), token_policy);
}
template <class String, class Delim>
tl::vector<String> split_on_any(const String& str, const Delim& separators, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
tl::vector<String> dst;
dst.reserve(32);
split_on_any(str,
separators,
[&dst](const String& token)
{
dst.push_back(token);
},
token_policy);
return dst;
}
template <class String, class Delim>
tl::vector<String> split_on_all(const String& str, const Delim& separator, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
tl::vector<String> dst;
dst.reserve(32);
split_on_all(str,
separator,
[&dst](const String& token)
{
dst.push_back(token);
},
token_policy);
return dst;
}
template <class String, class Func>
void split_on_any(const String& str, const char* delimiters, const Func& functor, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
return split_on_any(str, String(delimiters), functor, token_policy);
}
template <class String, class Func>
void split_on_any(const String& str, char delimiter, const Func& functor, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
String token;
const char* cstr = str.data();
size_t offset = 0;
const size_t endOffset = str.size();
while (offset <= endOffset)
{
const char* nextCstr = static_cast<const char*>(::memchr(cstr + offset, delimiter, endOffset - offset));
const size_t nextOffset = nextCstr ? nextCstr - cstr : endOffset;
if (offset < nextOffset || token_policy == empty_token_policy::keep)
{
token.clear();
token.append(cstr + offset, cstr + nextOffset);
functor(token);
}
offset = nextOffset + 1;
}
}
template <class String, class Delim, class Func>
void split_on_any(const String& str, const Delim& delimiters, const Func& functor, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
const size_t k_delims_size = delimiters.size();
if (k_delims_size == 1)
return split_on_any(str, delimiters[0], functor, token_policy);
if (k_delims_size == 0)
{
TL_PLAIN_FAIL("'delimiters' cannot be empty");
return;
}
String token;
const char* cstr = str.data();
const char* delimsStr = delimiters.data();
size_t offset = 0;
const size_t endOffset = str.size();
while (offset <= endOffset)
{
size_t nextOffset = offset;
for (; nextOffset < endOffset; ++nextOffset)
{
if (::memchr(delimsStr, cstr[nextOffset], k_delims_size) != nullptr)
break;
}
if (offset < nextOffset || token_policy == empty_token_policy::keep)
{
token.clear();
token.append(cstr + offset, cstr + nextOffset);
functor(token);
}
offset = nextOffset + 1;
}
}
template <class String, class Delim, class Func>
void split_on_all(const String& str, const Delim& sep, const Func& functor, empty_token_policy token_policy = empty_token_policy::discard) noexcept
{
String separator(sep);
if (separator.empty())
{
TL_PLAIN_FAIL("'delimiters' cannot be empty");
return;
}
size_t p0 = 0;
size_t p1 = 0;
String token;
while ((p1 = str.find(separator, p0)) != String::npos)
{
if (p1 > p0)
{
token.clear();
token.append(str, p0, p1 - p0);
functor(token);
}
else if (token_policy == empty_token_policy::keep)
functor(String{});
p0 = p1 + separator.size();
}
if (str.size() > p0)
{
token.clear();
token.append(str, p0, String::npos);
functor(token);
}
else if (token_policy == empty_token_policy::keep)
functor(String{});
}
template <class String>
String left(const String& text, size_t length) noexcept
{
size_t textSize = text.size();
return text.substr(0, length > textSize ? textSize : length);
}
template <class String>
String right(const String& text, size_t length) noexcept
{
const size_t textSize = text.size();
size_t startPos = textSize >= length ? textSize - length : 0;
return text.substr(startPos);
}
template <class String>
bool starts_with(const String& text, const String& prefix) noexcept
{
const size_t textSize = text.size();
const size_t prefixSize = prefix.size();
if (textSize < prefixSize)
return false;
return memcmp(text.data(), prefix.data(), prefixSize) == 0;
}
template <class String>
bool starts_with_ci(const String& text, const String& prefix) noexcept
{
const size_t textSize = text.size();
const size_t prefixSize = prefix.size();
if (textSize < prefixSize)
return false;
return ascii::memicmp(text.data(), prefix.data(), prefixSize) == 0;
}
template <class String>
bool ends_with(const String& text, const String& suffix) noexcept
{
const size_t textSize = text.size();
const size_t suffixSize = suffix.size();
if (textSize < suffixSize)
return false;
return memcmp(text.data() + (textSize - suffixSize), suffix.data(), suffixSize) == 0;
}
template <class String>
bool ends_with_ci(const String& text, const String& suffix) noexcept
{
const size_t textSize = text.size();
const size_t suffixSize = suffix.size();
if (textSize < suffixSize)
return false;
return ascii::memicmp(text.data() + (textSize - suffixSize), suffix.data(), suffixSize) == 0;
}
template <class String>
String remove_prefix(const String& text, const String& prefix) noexcept
{
if (starts_with(text, prefix))
return text.substr(prefix.size(), text.size() - prefix.size());
return text;
}
template <class String>
String remove_prefix_ci(const String& text, const String& prefix) noexcept
{
if (starts_with_ci(text, prefix))
return text.substr(prefix.size(), text.size() - prefix.size());
return text;
}
template <class String>
String remove_suffix(const String& text, const String& suffix) noexcept
{
if (ends_with(text, suffix))
return text.substr(0, text.size() - suffix.size());
return text;
}
template <class String>
String remove_suffix_ci(const String& text, const String& suffix) noexcept
{
if (ends_with_ci(text, suffix))
return text.substr(0, text.size() - suffix.size());
return text;
}
template <class String>
eastl::optional<size_t> contains(const String& text, const String& subText) noexcept
{
const size_t offset = text.find(subText);
if (offset != String::npos)
return offset;
return nullopt;
}
template <class String>
eastl::optional<size_t> contains_ci(const String& text, const String& subText) noexcept
{
const size_t offset = text.find_ci(subText);
if (offset != String::npos)
return offset;
return nullopt;
}
template <class String>
String quote(const String& text, const String& quote) noexcept
{
String result = quote;
result += text;
result += quote;
return result;
}
template <class String>
String unquote(const String& text, const String& quote) noexcept
{
if (starts_with(text, quote) && ends_with(text, quote) && text.size() >= quote.size() * 2)
return text.substr(quote.size(), text.size() - quote.size() * 2);
return text;
}
static const char* s_defaultWhiteSpaceCStr = " \t\f\v\n\r";
template <class String>
String trim_left_any_of(const String& text, const String& characters) noexcept
{
const typename String::size_type startPos = text.find_first_not_of(characters);
if (startPos == String::npos)
return String();
return text.substr(startPos);
}
template <class String>
String trim_left_any_of_ci(const String& text, const String& characters) noexcept
{
const typename String::size_type startPos = text.find_first_not_of_ci(characters);
if (startPos == String::npos)
return String();
return text.substr(startPos);
}
template <class String>
String trim_left(const String& text) noexcept
{
static String s_defaultWhiteSpace(s_defaultWhiteSpaceCStr);
return trim_left_any_of(text, s_defaultWhiteSpace);
}
template <class String>
String trim_left_ci(const String& text) noexcept
{
static String s_defaultWhiteSpace(s_defaultWhiteSpaceCStr);
return trim_left_any_of_ci(text, s_defaultWhiteSpace);
}
template <class String>
String trim_right_any_of(const String& text, const String& characters) noexcept
{
const typename String::size_type startPos = text.find_last_not_of(characters);
if (startPos == String::npos)
return String();
return text.substr(0, startPos + 1);
}
template <class String>
String trim_right_any_of_ci(const String& text, const String& characters) noexcept
{
const typename String::size_type startPos = text.find_last_not_of_ci(characters);
if (startPos == String::npos)
return String();
return text.substr(0, startPos + 1);
}
template <class String>
String trim_right(const String& text) noexcept
{
static String s_defaultWhiteSpace(s_defaultWhiteSpaceCStr);
return trim_right_any_of(text, String(s_defaultWhiteSpace));
}
template <class String>
String trim_right_ci(const String& text) noexcept
{
static String s_defaultWhiteSpace(s_defaultWhiteSpaceCStr);
return trim_right_any_of_ci(text, String(s_defaultWhiteSpace));
}
template <class String>
String trim(const String& text) noexcept
{
return trim_right(trim_left(text));
}
template <class String>
String trim_ci(const String& text) noexcept
{
return trim_right_ci(trim_left_ci(text));
}
template <class String>
String trim_any_of(const String& text, const String& characters) noexcept
{
return trim_right_any_of(trim_left_any_of(text, characters), characters);
}
template <class String>
String trim_any_of_ci(const String& text, const String& characters) noexcept
{
return trim_right_any_of_ci(trim_left_any_of_ci(text, characters), characters);
}
template <class String, class RepString>
String replace(const String& text, const String& target, const function<eastl::optional<RepString>(size_t, size_t)>& functor) noexcept
{
if (target.empty())
return text;
eastl::fixed_vector<size_t, 512> positions;
size_t position = 0;
while ((position = text.find(target, position)) != String::npos)
{
positions.push_back(position);
position += target.length();
}
String result;
result.reserve(text.size());
auto start = text.begin();
for (auto it = positions.begin(); it != positions.end(); ++it)
{
const size_t position = *it;
size_t index = eastl::distance(positions.begin(), it);
eastl::optional<RepString> replacementString = functor(index, positions.size());
if (!replacementString)
continue;
auto end = text.begin() + position;
result.append(start, end); //copy up to the result
result.append(*replacementString);
start = end + target.size();
}
if (start != text.end())
result.append(start, text.end()); //copy the end run
return result;
}
template <class String, typename Fun>
String replace(const String& text, const String& target, Fun f) noexcept
{
return replace(text, target, function<eastl::optional<String>(size_t, size_t)>(f));
}
template <class Func>
size_t replace_first(const string& text, const string& target, const string& replacement, size_t startingOffset, const Func& functor) noexcept
{
if (target.empty())
{
functor(text);
return string::npos;
}
const size_t startPos = text.find(target, startingOffset);
if (startPos == string::npos)
{
functor(text);
return string::npos;
}
eastl::string result(text.eastl_str());
string res2(result.replace(startPos, target.length(), eastl::string(replacement.eastl_str())));
functor(res2);
return startPos + replacement.length();
}
namespace detail
{
template <class Container, class String>
String join_worker(const Container& container, const String& delimiter) noexcept
{
String dst;
//reserve some space based on some heuristics:
// We'll have size - 1 delimiters
// And an average string size of 2 chars. The small choice is made to help with the small string case only where the allocator overhead is significant.
dst.reserve((delimiter.size() + 2) * container.size());
auto end = std::end(container);
auto it = std::begin(container);
while (true)
{
dst += *it;
++it;
if (it == end)
break;
dst += delimiter;
}
return dst;
}
template <class Container>
string join_worker(const Container& container, const string& delimiter) noexcept
{
eastl::string buffer;
//reserve some space based on some heuristics:
// We'll have size - 1 delimiters
// And an average string size of 2 chars. The small choice is made to help with the small string case only where the allocator overhead is significant.
buffer.reserve((delimiter.size() + 2) * container.size());
auto end = std::end(container);
auto it = std::begin(container);
while (true)
{
const string& s = *it;
buffer.append(s.data(), s.size());
++it;
if (it == end)
break;
buffer.append(delimiter.data(), delimiter.size());
}
return string(buffer);
}
//joiner used when the Delim and String are of the same type
template <class Container, class Delim, class String>
std::enable_if_t<
std::is_same_v<
std::remove_const_t<std::remove_reference_t<String>>,
std::remove_const_t<std::remove_reference_t<Delim>>>,
String> join(const Container& container, const Delim& delimiter) noexcept
{
return join_worker(container, delimiter);
}
//joiner used when the Delim and String are different types
template <class Container, class Delim, class String>
std::enable_if_t<
!std::is_same_v<
std::remove_const_t<std::remove_reference_t<String>>,
std::remove_const_t<std::remove_reference_t<Delim>>>,
String> join(const Container& container, const Delim& delimiter) noexcept
{
return join_worker(container, String(delimiter));
}
} //namespace detail
template <class Container, class Delim, class String>
String join(const Container& container, const Delim& delimiter) noexcept
{
if (container.empty())
return String();
return detail::join<Container, Delim, String>(container, delimiter);
}
}
}