StringAlgorithms: Refactor cmTokenize() function

- Refactor and optimize the loop to make it shorter and faster
- Make it push elements into an arbitrary (templated) output iterator
- Make it a template on a separator type with the most used defaults
- Add a backward compatible signature to return `std::vector<std::string>`
- Add an alternative function `cmTokenizedView()` to return a vector of string views
This commit is contained in:
Alex Turbov 2024-10-25 03:10:14 +04:00
parent 9c02fad126
commit f3f70c2f90
No known key found for this signature in database
GPG Key ID: 8BEDB7D11F95D5E3
14 changed files with 157 additions and 96 deletions

View File

@ -50,14 +50,13 @@ void cmWIXAccessControlList::CreatePermissionElement(std::string const& entry)
user = user_and_domain;
}
std::vector<std::string> permissions = cmTokenize(permission_string, ",");
this->SourceWriter.BeginElement("Permission");
this->SourceWriter.AddAttribute("User", std::string(user));
if (!domain.empty()) {
this->SourceWriter.AddAttribute("Domain", std::string(domain));
}
for (std::string const& permission : permissions) {
for (auto permission :
cmTokenizedView(permission_string, ',', cmTokenizerMode::New)) {
this->EmitBooleanAttribute(entry, cmTrimWhitespace(permission));
}
this->SourceWriter.EndElement("Permission");

View File

@ -980,7 +980,7 @@ bool cmCPackInnoSetupGenerator::BuildDownloadedComponentArchive(
"Problem running certutil command: " << hashCmd
<< std::endl);
}
*hash = cmTrimWhitespace(cmTokenize(hashOutput, "\n").at(1));
*hash = cmTrimWhitespace(cmTokenizedView(hashOutput, '\n').at(1));
if (hash->length() != 64) {
cmCPackLogger(cmCPackLog::LOG_WARNING,

View File

@ -257,7 +257,7 @@ const LinkLibraryFeatureAttributeSet& GetLinkLibraryFeatureAttributes(
if (processingOption.match(1) == "LIBRARY_TYPE") {
featureAttributes.LibraryTypes.clear();
for (auto const& value :
cmTokenize(processingOption.match(2), ","_s)) {
cmTokenize(processingOption.match(2), ',')) {
if (value == "STATIC") {
featureAttributes.LibraryTypes.emplace(
cmStateEnums::STATIC_LIBRARY);
@ -292,7 +292,8 @@ const LinkLibraryFeatureAttributeSet& GetLinkLibraryFeatureAttributes(
}
} else if (processingOption.match(1) == "OVERRIDE") {
featureAttributes.Override.clear();
auto values = cmTokenize(processingOption.match(2), ","_s);
std::vector<std::string> values =
cmTokenize(processingOption.match(2), ',');
featureAttributes.Override.insert(values.begin(), values.end());
}
} else {
@ -668,13 +669,14 @@ cmComputeLinkDepends::cmComputeLinkDepends(const cmGeneratorTarget* target,
*linkLibraryOverride, target->GetLocalGenerator(), config, target, &dag,
target, linkLanguage);
auto overrideList = cmTokenize(overrideValue, ","_s);
std::vector<std::string> overrideList =
cmTokenize(overrideValue, ',', cmTokenizerMode::New);
if (overrideList.size() >= 2) {
auto const& feature = overrideList.front();
for_each(overrideList.cbegin() + 1, overrideList.cend(),
[this, &feature](std::string const& item) {
this->LinkLibraryOverride.emplace(item, feature);
});
std::for_each(overrideList.cbegin() + 1, overrideList.cend(),
[this, &feature](std::string const& item) {
this->LinkLibraryOverride.emplace(item, feature);
});
}
}
}

View File

@ -565,11 +565,11 @@ std::vector<BT<std::string>>& cmGeneratorTarget::ResolveLinkerWrapper(
cmSystemTools::ParseUnixCommandLine(
value.c_str() + LINKER_SHELL.length(), linkerOptions);
} else {
linkerOptions = cmTokenize(value.substr(LINKER.length()), ",");
linkerOptions =
cmTokenize(value.substr(LINKER.length()), ',', cmTokenizerMode::New);
}
if (linkerOptions.empty() ||
(linkerOptions.size() == 1 && linkerOptions.front().empty())) {
if (linkerOptions.empty()) {
continue;
}

View File

@ -339,12 +339,13 @@ bool cmGlobalVisualStudio10Generator::SetGeneratorToolset(
bool cmGlobalVisualStudio10Generator::ParseGeneratorToolset(
std::string const& ts, cmMakefile* mf)
{
std::vector<std::string> const fields = cmTokenize(ts, ",");
auto fi = fields.begin();
if (fi == fields.end()) {
std::vector<std::string> const fields =
cmTokenize(ts, ',', cmTokenizerMode::New);
if (fields.empty()) {
return true;
}
auto fi = fields.begin();
// The first field may be the VS platform toolset.
if (fi->find('=') == fi->npos) {
this->GeneratorToolset = *fi;

View File

@ -128,12 +128,13 @@ bool cmGlobalVisualStudio8Generator::ParseGeneratorPlatform(
{
this->GeneratorPlatform.clear();
std::vector<std::string> const fields = cmTokenize(p, ",");
auto fi = fields.begin();
if (fi == fields.end()) {
std::vector<std::string> const fields =
cmTokenize(p, ',', cmTokenizerMode::New);
if (fields.empty()) {
return true;
}
auto fi = fields.begin();
// The first field may be the VS platform.
if (fi->find('=') == fi->npos) {
this->GeneratorPlatform = *fi;

View File

@ -584,12 +584,13 @@ bool cmGlobalVisualStudioVersionedGenerator::ParseGeneratorInstance(
this->GeneratorInstance.clear();
this->GeneratorInstanceVersion.clear();
std::vector<std::string> const fields = cmTokenize(is, ",");
auto fi = fields.begin();
if (fi == fields.end()) {
std::vector<std::string> const fields =
cmTokenize(is, ',', cmTokenizerMode::New);
if (fields.empty()) {
return true;
}
auto fi = fields.begin();
// The first field may be the VS instance.
if (fi->find('=') == fi->npos) {
this->GeneratorInstance = *fi;

View File

@ -355,12 +355,13 @@ bool cmGlobalXCodeGenerator::SetGeneratorToolset(std::string const& ts,
bool cmGlobalXCodeGenerator::ParseGeneratorToolset(std::string const& ts,
cmMakefile* mf)
{
std::vector<std::string> const fields = cmTokenize(ts, ",");
auto fi = fields.cbegin();
if (fi == fields.cend()) {
std::vector<std::string> const fields =
cmTokenize(ts, ',', cmTokenizerMode::New);
if (fields.empty()) {
return true;
}
auto fi = fields.cbegin();
// The first field may be the Xcode GCC_VERSION.
if (fi->find('=') == fi->npos) {
this->GeneratorToolset = *fi;
@ -4457,7 +4458,7 @@ cmXCodeObject* cmGlobalXCodeGenerator::CreateOrGetPBXGroup(
if (it != this->TargetGroup.end()) {
tgroup = it->second;
} else {
std::vector<std::string> tgt_folders = cmTokenize(target, "/");
std::vector<std::string> const tgt_folders = cmTokenize(target, '/');
std::string curr_tgt_folder;
for (std::vector<std::string>::size_type i = 0; i < tgt_folders.size();
i++) {
@ -4490,7 +4491,7 @@ cmXCodeObject* cmGlobalXCodeGenerator::CreateOrGetPBXGroup(
// It's a recursive folder structure, let's find the real parent group
if (sg->GetFullName() != sg->GetName()) {
std::string curr_folder = cmStrCat(target, '/');
for (auto const& folder : cmTokenize(sg->GetFullName(), "\\")) {
for (auto const& folder : cmTokenize(sg->GetFullName(), '\\')) {
curr_folder += folder;
auto const i_folder = this->GroupNameMap.find(curr_folder);
// Create new folder

View File

@ -2387,13 +2387,9 @@ cmSourceGroup* cmMakefile::GetOrCreateSourceGroup(
cmSourceGroup* cmMakefile::GetOrCreateSourceGroup(const std::string& name)
{
std::string delimiters;
if (cmValue p = this->GetDefinition("SOURCE_GROUP_DELIMITER")) {
delimiters = *p;
} else {
delimiters = "/\\";
}
return this->GetOrCreateSourceGroup(cmTokenize(name, delimiters));
auto p = this->GetDefinition("SOURCE_GROUP_DELIMITER");
return this->GetOrCreateSourceGroup(
cmTokenize(name, p ? cm::string_view(*p) : R"(\/)"_s));
}
/**

View File

@ -29,11 +29,6 @@ const std::string kFilesOptionName = "FILES";
const std::string kRegexOptionName = "REGULAR_EXPRESSION";
const std::string kSourceGroupOptionName = "<sg_name>";
std::vector<std::string> tokenizePath(const std::string& path)
{
return cmTokenize(path, "\\/");
}
std::set<std::string> getSourceGroupFilesPaths(
const std::string& root, const std::vector<std::string>& files)
{
@ -95,31 +90,28 @@ bool addFilesToItsSourceGroups(const std::string& root,
cmSourceGroup* sg;
for (std::string const& sgFilesPath : sgFilesPaths) {
std::vector<std::string> tokenizedPath = cmTokenize(
prefix.empty() ? sgFilesPath : cmStrCat(prefix, '/', sgFilesPath),
R"(\/)", cmTokenizerMode::New);
std::vector<std::string> tokenizedPath;
if (!prefix.empty()) {
tokenizedPath = tokenizePath(cmStrCat(prefix, '/', sgFilesPath));
} else {
tokenizedPath = tokenizePath(sgFilesPath);
if (tokenizedPath.empty()) {
continue;
}
tokenizedPath.pop_back();
if (tokenizedPath.empty()) {
tokenizedPath.emplace_back();
}
if (!tokenizedPath.empty()) {
tokenizedPath.pop_back();
sg = makefile.GetOrCreateSourceGroup(tokenizedPath);
if (tokenizedPath.empty()) {
tokenizedPath.emplace_back();
}
sg = makefile.GetOrCreateSourceGroup(tokenizedPath);
if (!sg) {
errorMsg = "Could not create source group for file: " + sgFilesPath;
return false;
}
const std::string fullPath =
cmSystemTools::CollapseFullPath(sgFilesPath, root);
sg->AddGroupFile(fullPath);
if (!sg) {
errorMsg = "Could not create source group for file: " + sgFilesPath;
return false;
}
const std::string fullPath =
cmSystemTools::CollapseFullPath(sgFilesPath, root);
sg->AddGroupFile(fullPath);
}
return true;

View File

@ -55,30 +55,6 @@ std::string cmEscapeQuotes(cm::string_view str)
return result;
}
std::vector<std::string> cmTokenize(cm::string_view str, cm::string_view sep)
{
std::vector<std::string> tokens;
cm::string_view::size_type tokend = 0;
do {
cm::string_view::size_type tokstart = str.find_first_not_of(sep, tokend);
if (tokstart == cm::string_view::npos) {
break; // no more tokens
}
tokend = str.find_first_of(sep, tokstart);
if (tokend == cm::string_view::npos) {
tokens.emplace_back(str.substr(tokstart));
} else {
tokens.emplace_back(str.substr(tokstart, tokend - tokstart));
}
} while (tokend != cm::string_view::npos);
if (tokens.empty()) {
tokens.emplace_back();
}
return tokens;
}
namespace {
template <std::size_t N, typename T>
inline void MakeDigits(cm::string_view& view, char (&digits)[N],

View File

@ -24,7 +24,7 @@ using cmStringRange = cmRange<std::vector<std::string>::const_iterator>;
/** Returns length of a literal string. */
template <size_t N>
constexpr size_t cmStrLen(const char (&/*str*/)[N])
constexpr size_t cmStrLen(const char (&)[N])
{
return N - 1;
}
@ -91,12 +91,12 @@ std::string cmJoinStrings(Range const& rng, cm::string_view separator,
}
std::string result;
result.reserve(
std::accumulate(std::begin(rng), std::end(rng),
initial.size() + (rng.size() - 1) * separator.size(),
[](std::size_t sum, const std::string& item) {
return sum + item.size();
}));
result.reserve(std::accumulate(
std::begin(rng), std::end(rng),
initial.size() + (rng.size() - 1) * separator.size(),
[](std::size_t sum, typename Range::value_type const& item) {
return sum + item.size();
}));
result.append(std::begin(initial), std::end(initial));
auto begin = std::begin(rng);
@ -122,8 +122,81 @@ std::string cmJoin(std::vector<std::string> const& rng,
std::string cmJoin(cmStringRange const& rng, cm::string_view separator,
cm::string_view initial = {});
/** Extract tokens that are separated by any of the characters in @a sep. */
std::vector<std::string> cmTokenize(cm::string_view str, cm::string_view sep);
enum class cmTokenizerMode
{
/// A backward-compatible behavior when in the case of no
/// tokens have found in an input text it'll return one empty
/// token in the result container (vector).
Legacy,
/// The new behavior is to return an empty vector.
New
};
/**
* \brief A generic version of a tokenizer.
*
* Extract tokens from the input string separated by any
* of the characters in `sep` and assign them to the
* given output iterator.
*
* The `mode` parameter defines the behavior in the case when
* no tokens have found in the input text.
*
*/
template <typename StringT, typename OutIt, typename Sep = char>
void cmTokenize(OutIt outIt, cm::string_view str, Sep sep,
cmTokenizerMode mode)
{
auto hasTokens = false;
// clang-format off
for (auto start = str.find_first_not_of(sep)
, end = str.find_first_of(sep, start)
; start != cm::string_view::npos
; start = str.find_first_not_of(sep, end)
, end = str.find_first_of(sep, start)
, hasTokens = true
) {
*outIt++ = StringT{ str.substr(start, end - start) };
}
// clang-format on
if (!hasTokens && mode == cmTokenizerMode::Legacy) {
*outIt = {};
}
}
/**
* \brief Extract tokens that are separated by any of the
* characters in `sep`.
*
* Backward compatible signature.
*
* \return A vector of strings.
*/
template <typename Sep = char>
std::vector<std::string> cmTokenize(
cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
{
using StringType = std::string;
std::vector<StringType> tokens;
cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
return tokens;
}
/**
* \brief Extract tokens that are separated by any of the
* characters in `sep`.
*
* \return A vector of string views.
*/
template <typename Sep = char>
std::vector<cm::string_view> cmTokenizedView(
cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
{
using StringType = cm::string_view;
std::vector<StringType> tokens;
cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
return tokens;
}
/** Concatenate string pieces into a single string. */
std::string cmCatViews(

View File

@ -1131,7 +1131,7 @@ void cmake::SetArgs(const std::vector<std::string>& args)
"--debug-find-pkg", "Provide a package argument for --debug-find-pkg",
CommandArgument::Values::One, CommandArgument::RequiresSeparator::Yes,
[](std::string const& value, cmake* state) -> bool {
std::vector<std::string> find_pkgs(cmTokenize(value, ","));
std::vector<std::string> find_pkgs(cmTokenize(value, ','));
std::cout << "Running with debug output on for the 'find' commands "
"for package(s)";
for (auto const& v : find_pkgs) {
@ -1145,7 +1145,7 @@ void cmake::SetArgs(const std::vector<std::string>& args)
"--debug-find-var", CommandArgument::Values::One,
CommandArgument::RequiresSeparator::Yes,
[](std::string const& value, cmake* state) -> bool {
std::vector<std::string> find_vars(cmTokenize(value, ","));
std::vector<std::string> find_vars(cmTokenize(value, ','));
std::cout << "Running with debug output on for the variable(s)";
for (auto const& v : find_vars) {
std::cout << ' ' << v;

View File

@ -10,6 +10,7 @@
#include <vector>
#include <cm/string_view>
#include <cmext/string_view>
#include "cmStringAlgorithms.h"
@ -103,7 +104,9 @@ int testStringAlgorithms(int /*unused*/, char* /*unused*/[])
{
typedef std::vector<std::string> VT;
assert_ok(cmTokenize("", ";") == VT{ "" }, "cmTokenize empty");
assert_ok(cmTokenize(";", ";") == VT{ "" }, "cmTokenize sep");
assert_ok(cmTokenize(";", ";") == VT{ "" }, "cmTokenize sep (char*)");
assert_ok(cmTokenize(";", ";"_s) == VT{ "" },
"cmTokenize sep (string_view)");
assert_ok(cmTokenize("abc", ";") == VT{ "abc" }, "cmTokenize item");
assert_ok(cmTokenize("abc;", ";") == VT{ "abc" }, "cmTokenize item sep");
assert_ok(cmTokenize(";abc", ";") == VT{ "abc" }, "cmTokenize sep item");
@ -112,6 +115,22 @@ int testStringAlgorithms(int /*unused*/, char* /*unused*/[])
assert_ok(cmTokenize("a1;a2;a3;a4", ";") == VT{ "a1", "a2", "a3", "a4" },
"cmTokenize multiple items");
}
{
typedef std::vector<cm::string_view> VT;
assert_ok(cmTokenizedView("", ';') == VT{ "" }, "cmTokenizedView empty");
assert_ok(cmTokenizedView(";", ';') == VT{ "" }, "cmTokenizedView sep");
assert_ok(cmTokenizedView("abc", ';') == VT{ "abc" },
"cmTokenizedView item");
assert_ok(cmTokenizedView("abc;", ';') == VT{ "abc" },
"cmTokenizedView item sep");
assert_ok(cmTokenizedView(";abc", ';') == VT{ "abc" },
"cmTokenizedView sep item");
assert_ok(cmTokenizedView("abc;;efg", ';') == VT{ "abc", "efg" },
"cmTokenizedView item sep sep item");
assert_ok(cmTokenizedView("a1;a2;a3;a4", ';') ==
VT{ "a1", "a2", "a3", "a4" },
"cmTokenizedView multiple items");
}
// ----------------------------------------------------------------------
// Test cmStrCat