StringAlgorithms: Refactor cmTokenize()
function
- Refactor and optimize the loop to make it shorter and faster - Make it push elements into an arbitrary (templated) output iterator - Make it a template on a separator type with the most used defaults - Add a backward compatible signature to return `std::vector<std::string>` - Add an alternative function `cmTokenizedView()` to return a vector of string views
This commit is contained in:
parent
9c02fad126
commit
f3f70c2f90
@ -50,14 +50,13 @@ void cmWIXAccessControlList::CreatePermissionElement(std::string const& entry)
|
||||
user = user_and_domain;
|
||||
}
|
||||
|
||||
std::vector<std::string> permissions = cmTokenize(permission_string, ",");
|
||||
|
||||
this->SourceWriter.BeginElement("Permission");
|
||||
this->SourceWriter.AddAttribute("User", std::string(user));
|
||||
if (!domain.empty()) {
|
||||
this->SourceWriter.AddAttribute("Domain", std::string(domain));
|
||||
}
|
||||
for (std::string const& permission : permissions) {
|
||||
for (auto permission :
|
||||
cmTokenizedView(permission_string, ',', cmTokenizerMode::New)) {
|
||||
this->EmitBooleanAttribute(entry, cmTrimWhitespace(permission));
|
||||
}
|
||||
this->SourceWriter.EndElement("Permission");
|
||||
|
@ -980,7 +980,7 @@ bool cmCPackInnoSetupGenerator::BuildDownloadedComponentArchive(
|
||||
"Problem running certutil command: " << hashCmd
|
||||
<< std::endl);
|
||||
}
|
||||
*hash = cmTrimWhitespace(cmTokenize(hashOutput, "\n").at(1));
|
||||
*hash = cmTrimWhitespace(cmTokenizedView(hashOutput, '\n').at(1));
|
||||
|
||||
if (hash->length() != 64) {
|
||||
cmCPackLogger(cmCPackLog::LOG_WARNING,
|
||||
|
@ -257,7 +257,7 @@ const LinkLibraryFeatureAttributeSet& GetLinkLibraryFeatureAttributes(
|
||||
if (processingOption.match(1) == "LIBRARY_TYPE") {
|
||||
featureAttributes.LibraryTypes.clear();
|
||||
for (auto const& value :
|
||||
cmTokenize(processingOption.match(2), ","_s)) {
|
||||
cmTokenize(processingOption.match(2), ',')) {
|
||||
if (value == "STATIC") {
|
||||
featureAttributes.LibraryTypes.emplace(
|
||||
cmStateEnums::STATIC_LIBRARY);
|
||||
@ -292,7 +292,8 @@ const LinkLibraryFeatureAttributeSet& GetLinkLibraryFeatureAttributes(
|
||||
}
|
||||
} else if (processingOption.match(1) == "OVERRIDE") {
|
||||
featureAttributes.Override.clear();
|
||||
auto values = cmTokenize(processingOption.match(2), ","_s);
|
||||
std::vector<std::string> values =
|
||||
cmTokenize(processingOption.match(2), ',');
|
||||
featureAttributes.Override.insert(values.begin(), values.end());
|
||||
}
|
||||
} else {
|
||||
@ -668,13 +669,14 @@ cmComputeLinkDepends::cmComputeLinkDepends(const cmGeneratorTarget* target,
|
||||
*linkLibraryOverride, target->GetLocalGenerator(), config, target, &dag,
|
||||
target, linkLanguage);
|
||||
|
||||
auto overrideList = cmTokenize(overrideValue, ","_s);
|
||||
std::vector<std::string> overrideList =
|
||||
cmTokenize(overrideValue, ',', cmTokenizerMode::New);
|
||||
if (overrideList.size() >= 2) {
|
||||
auto const& feature = overrideList.front();
|
||||
for_each(overrideList.cbegin() + 1, overrideList.cend(),
|
||||
[this, &feature](std::string const& item) {
|
||||
this->LinkLibraryOverride.emplace(item, feature);
|
||||
});
|
||||
std::for_each(overrideList.cbegin() + 1, overrideList.cend(),
|
||||
[this, &feature](std::string const& item) {
|
||||
this->LinkLibraryOverride.emplace(item, feature);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -565,11 +565,11 @@ std::vector<BT<std::string>>& cmGeneratorTarget::ResolveLinkerWrapper(
|
||||
cmSystemTools::ParseUnixCommandLine(
|
||||
value.c_str() + LINKER_SHELL.length(), linkerOptions);
|
||||
} else {
|
||||
linkerOptions = cmTokenize(value.substr(LINKER.length()), ",");
|
||||
linkerOptions =
|
||||
cmTokenize(value.substr(LINKER.length()), ',', cmTokenizerMode::New);
|
||||
}
|
||||
|
||||
if (linkerOptions.empty() ||
|
||||
(linkerOptions.size() == 1 && linkerOptions.front().empty())) {
|
||||
if (linkerOptions.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -339,12 +339,13 @@ bool cmGlobalVisualStudio10Generator::SetGeneratorToolset(
|
||||
bool cmGlobalVisualStudio10Generator::ParseGeneratorToolset(
|
||||
std::string const& ts, cmMakefile* mf)
|
||||
{
|
||||
std::vector<std::string> const fields = cmTokenize(ts, ",");
|
||||
auto fi = fields.begin();
|
||||
if (fi == fields.end()) {
|
||||
std::vector<std::string> const fields =
|
||||
cmTokenize(ts, ',', cmTokenizerMode::New);
|
||||
if (fields.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto fi = fields.begin();
|
||||
// The first field may be the VS platform toolset.
|
||||
if (fi->find('=') == fi->npos) {
|
||||
this->GeneratorToolset = *fi;
|
||||
|
@ -128,12 +128,13 @@ bool cmGlobalVisualStudio8Generator::ParseGeneratorPlatform(
|
||||
{
|
||||
this->GeneratorPlatform.clear();
|
||||
|
||||
std::vector<std::string> const fields = cmTokenize(p, ",");
|
||||
auto fi = fields.begin();
|
||||
if (fi == fields.end()) {
|
||||
std::vector<std::string> const fields =
|
||||
cmTokenize(p, ',', cmTokenizerMode::New);
|
||||
if (fields.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto fi = fields.begin();
|
||||
// The first field may be the VS platform.
|
||||
if (fi->find('=') == fi->npos) {
|
||||
this->GeneratorPlatform = *fi;
|
||||
|
@ -584,12 +584,13 @@ bool cmGlobalVisualStudioVersionedGenerator::ParseGeneratorInstance(
|
||||
this->GeneratorInstance.clear();
|
||||
this->GeneratorInstanceVersion.clear();
|
||||
|
||||
std::vector<std::string> const fields = cmTokenize(is, ",");
|
||||
auto fi = fields.begin();
|
||||
if (fi == fields.end()) {
|
||||
std::vector<std::string> const fields =
|
||||
cmTokenize(is, ',', cmTokenizerMode::New);
|
||||
if (fields.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto fi = fields.begin();
|
||||
// The first field may be the VS instance.
|
||||
if (fi->find('=') == fi->npos) {
|
||||
this->GeneratorInstance = *fi;
|
||||
|
@ -355,12 +355,13 @@ bool cmGlobalXCodeGenerator::SetGeneratorToolset(std::string const& ts,
|
||||
bool cmGlobalXCodeGenerator::ParseGeneratorToolset(std::string const& ts,
|
||||
cmMakefile* mf)
|
||||
{
|
||||
std::vector<std::string> const fields = cmTokenize(ts, ",");
|
||||
auto fi = fields.cbegin();
|
||||
if (fi == fields.cend()) {
|
||||
std::vector<std::string> const fields =
|
||||
cmTokenize(ts, ',', cmTokenizerMode::New);
|
||||
if (fields.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
auto fi = fields.cbegin();
|
||||
// The first field may be the Xcode GCC_VERSION.
|
||||
if (fi->find('=') == fi->npos) {
|
||||
this->GeneratorToolset = *fi;
|
||||
@ -4457,7 +4458,7 @@ cmXCodeObject* cmGlobalXCodeGenerator::CreateOrGetPBXGroup(
|
||||
if (it != this->TargetGroup.end()) {
|
||||
tgroup = it->second;
|
||||
} else {
|
||||
std::vector<std::string> tgt_folders = cmTokenize(target, "/");
|
||||
std::vector<std::string> const tgt_folders = cmTokenize(target, '/');
|
||||
std::string curr_tgt_folder;
|
||||
for (std::vector<std::string>::size_type i = 0; i < tgt_folders.size();
|
||||
i++) {
|
||||
@ -4490,7 +4491,7 @@ cmXCodeObject* cmGlobalXCodeGenerator::CreateOrGetPBXGroup(
|
||||
// It's a recursive folder structure, let's find the real parent group
|
||||
if (sg->GetFullName() != sg->GetName()) {
|
||||
std::string curr_folder = cmStrCat(target, '/');
|
||||
for (auto const& folder : cmTokenize(sg->GetFullName(), "\\")) {
|
||||
for (auto const& folder : cmTokenize(sg->GetFullName(), '\\')) {
|
||||
curr_folder += folder;
|
||||
auto const i_folder = this->GroupNameMap.find(curr_folder);
|
||||
// Create new folder
|
||||
|
@ -2387,13 +2387,9 @@ cmSourceGroup* cmMakefile::GetOrCreateSourceGroup(
|
||||
|
||||
cmSourceGroup* cmMakefile::GetOrCreateSourceGroup(const std::string& name)
|
||||
{
|
||||
std::string delimiters;
|
||||
if (cmValue p = this->GetDefinition("SOURCE_GROUP_DELIMITER")) {
|
||||
delimiters = *p;
|
||||
} else {
|
||||
delimiters = "/\\";
|
||||
}
|
||||
return this->GetOrCreateSourceGroup(cmTokenize(name, delimiters));
|
||||
auto p = this->GetDefinition("SOURCE_GROUP_DELIMITER");
|
||||
return this->GetOrCreateSourceGroup(
|
||||
cmTokenize(name, p ? cm::string_view(*p) : R"(\/)"_s));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -29,11 +29,6 @@ const std::string kFilesOptionName = "FILES";
|
||||
const std::string kRegexOptionName = "REGULAR_EXPRESSION";
|
||||
const std::string kSourceGroupOptionName = "<sg_name>";
|
||||
|
||||
std::vector<std::string> tokenizePath(const std::string& path)
|
||||
{
|
||||
return cmTokenize(path, "\\/");
|
||||
}
|
||||
|
||||
std::set<std::string> getSourceGroupFilesPaths(
|
||||
const std::string& root, const std::vector<std::string>& files)
|
||||
{
|
||||
@ -95,31 +90,28 @@ bool addFilesToItsSourceGroups(const std::string& root,
|
||||
cmSourceGroup* sg;
|
||||
|
||||
for (std::string const& sgFilesPath : sgFilesPaths) {
|
||||
std::vector<std::string> tokenizedPath = cmTokenize(
|
||||
prefix.empty() ? sgFilesPath : cmStrCat(prefix, '/', sgFilesPath),
|
||||
R"(\/)", cmTokenizerMode::New);
|
||||
|
||||
std::vector<std::string> tokenizedPath;
|
||||
if (!prefix.empty()) {
|
||||
tokenizedPath = tokenizePath(cmStrCat(prefix, '/', sgFilesPath));
|
||||
} else {
|
||||
tokenizedPath = tokenizePath(sgFilesPath);
|
||||
if (tokenizedPath.empty()) {
|
||||
continue;
|
||||
}
|
||||
tokenizedPath.pop_back();
|
||||
|
||||
if (tokenizedPath.empty()) {
|
||||
tokenizedPath.emplace_back();
|
||||
}
|
||||
|
||||
if (!tokenizedPath.empty()) {
|
||||
tokenizedPath.pop_back();
|
||||
sg = makefile.GetOrCreateSourceGroup(tokenizedPath);
|
||||
|
||||
if (tokenizedPath.empty()) {
|
||||
tokenizedPath.emplace_back();
|
||||
}
|
||||
|
||||
sg = makefile.GetOrCreateSourceGroup(tokenizedPath);
|
||||
|
||||
if (!sg) {
|
||||
errorMsg = "Could not create source group for file: " + sgFilesPath;
|
||||
return false;
|
||||
}
|
||||
const std::string fullPath =
|
||||
cmSystemTools::CollapseFullPath(sgFilesPath, root);
|
||||
sg->AddGroupFile(fullPath);
|
||||
if (!sg) {
|
||||
errorMsg = "Could not create source group for file: " + sgFilesPath;
|
||||
return false;
|
||||
}
|
||||
const std::string fullPath =
|
||||
cmSystemTools::CollapseFullPath(sgFilesPath, root);
|
||||
sg->AddGroupFile(fullPath);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -55,30 +55,6 @@ std::string cmEscapeQuotes(cm::string_view str)
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> cmTokenize(cm::string_view str, cm::string_view sep)
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
cm::string_view::size_type tokend = 0;
|
||||
|
||||
do {
|
||||
cm::string_view::size_type tokstart = str.find_first_not_of(sep, tokend);
|
||||
if (tokstart == cm::string_view::npos) {
|
||||
break; // no more tokens
|
||||
}
|
||||
tokend = str.find_first_of(sep, tokstart);
|
||||
if (tokend == cm::string_view::npos) {
|
||||
tokens.emplace_back(str.substr(tokstart));
|
||||
} else {
|
||||
tokens.emplace_back(str.substr(tokstart, tokend - tokstart));
|
||||
}
|
||||
} while (tokend != cm::string_view::npos);
|
||||
|
||||
if (tokens.empty()) {
|
||||
tokens.emplace_back();
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <std::size_t N, typename T>
|
||||
inline void MakeDigits(cm::string_view& view, char (&digits)[N],
|
||||
|
@ -24,7 +24,7 @@ using cmStringRange = cmRange<std::vector<std::string>::const_iterator>;
|
||||
|
||||
/** Returns length of a literal string. */
|
||||
template <size_t N>
|
||||
constexpr size_t cmStrLen(const char (&/*str*/)[N])
|
||||
constexpr size_t cmStrLen(const char (&)[N])
|
||||
{
|
||||
return N - 1;
|
||||
}
|
||||
@ -91,12 +91,12 @@ std::string cmJoinStrings(Range const& rng, cm::string_view separator,
|
||||
}
|
||||
|
||||
std::string result;
|
||||
result.reserve(
|
||||
std::accumulate(std::begin(rng), std::end(rng),
|
||||
initial.size() + (rng.size() - 1) * separator.size(),
|
||||
[](std::size_t sum, const std::string& item) {
|
||||
return sum + item.size();
|
||||
}));
|
||||
result.reserve(std::accumulate(
|
||||
std::begin(rng), std::end(rng),
|
||||
initial.size() + (rng.size() - 1) * separator.size(),
|
||||
[](std::size_t sum, typename Range::value_type const& item) {
|
||||
return sum + item.size();
|
||||
}));
|
||||
result.append(std::begin(initial), std::end(initial));
|
||||
|
||||
auto begin = std::begin(rng);
|
||||
@ -122,8 +122,81 @@ std::string cmJoin(std::vector<std::string> const& rng,
|
||||
std::string cmJoin(cmStringRange const& rng, cm::string_view separator,
|
||||
cm::string_view initial = {});
|
||||
|
||||
/** Extract tokens that are separated by any of the characters in @a sep. */
|
||||
std::vector<std::string> cmTokenize(cm::string_view str, cm::string_view sep);
|
||||
enum class cmTokenizerMode
|
||||
{
|
||||
/// A backward-compatible behavior when in the case of no
|
||||
/// tokens have found in an input text it'll return one empty
|
||||
/// token in the result container (vector).
|
||||
Legacy,
|
||||
/// The new behavior is to return an empty vector.
|
||||
New
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief A generic version of a tokenizer.
|
||||
*
|
||||
* Extract tokens from the input string separated by any
|
||||
* of the characters in `sep` and assign them to the
|
||||
* given output iterator.
|
||||
*
|
||||
* The `mode` parameter defines the behavior in the case when
|
||||
* no tokens have found in the input text.
|
||||
*
|
||||
*/
|
||||
template <typename StringT, typename OutIt, typename Sep = char>
|
||||
void cmTokenize(OutIt outIt, cm::string_view str, Sep sep,
|
||||
cmTokenizerMode mode)
|
||||
{
|
||||
auto hasTokens = false;
|
||||
// clang-format off
|
||||
for (auto start = str.find_first_not_of(sep)
|
||||
, end = str.find_first_of(sep, start)
|
||||
; start != cm::string_view::npos
|
||||
; start = str.find_first_not_of(sep, end)
|
||||
, end = str.find_first_of(sep, start)
|
||||
, hasTokens = true
|
||||
) {
|
||||
*outIt++ = StringT{ str.substr(start, end - start) };
|
||||
}
|
||||
// clang-format on
|
||||
if (!hasTokens && mode == cmTokenizerMode::Legacy) {
|
||||
*outIt = {};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Extract tokens that are separated by any of the
|
||||
* characters in `sep`.
|
||||
*
|
||||
* Backward compatible signature.
|
||||
*
|
||||
* \return A vector of strings.
|
||||
*/
|
||||
template <typename Sep = char>
|
||||
std::vector<std::string> cmTokenize(
|
||||
cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
|
||||
{
|
||||
using StringType = std::string;
|
||||
std::vector<StringType> tokens;
|
||||
cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Extract tokens that are separated by any of the
|
||||
* characters in `sep`.
|
||||
*
|
||||
* \return A vector of string views.
|
||||
*/
|
||||
template <typename Sep = char>
|
||||
std::vector<cm::string_view> cmTokenizedView(
|
||||
cm::string_view str, Sep sep, cmTokenizerMode mode = cmTokenizerMode::Legacy)
|
||||
{
|
||||
using StringType = cm::string_view;
|
||||
std::vector<StringType> tokens;
|
||||
cmTokenize<StringType>(std::back_inserter(tokens), str, sep, mode);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/** Concatenate string pieces into a single string. */
|
||||
std::string cmCatViews(
|
||||
|
@ -1131,7 +1131,7 @@ void cmake::SetArgs(const std::vector<std::string>& args)
|
||||
"--debug-find-pkg", "Provide a package argument for --debug-find-pkg",
|
||||
CommandArgument::Values::One, CommandArgument::RequiresSeparator::Yes,
|
||||
[](std::string const& value, cmake* state) -> bool {
|
||||
std::vector<std::string> find_pkgs(cmTokenize(value, ","));
|
||||
std::vector<std::string> find_pkgs(cmTokenize(value, ','));
|
||||
std::cout << "Running with debug output on for the 'find' commands "
|
||||
"for package(s)";
|
||||
for (auto const& v : find_pkgs) {
|
||||
@ -1145,7 +1145,7 @@ void cmake::SetArgs(const std::vector<std::string>& args)
|
||||
"--debug-find-var", CommandArgument::Values::One,
|
||||
CommandArgument::RequiresSeparator::Yes,
|
||||
[](std::string const& value, cmake* state) -> bool {
|
||||
std::vector<std::string> find_vars(cmTokenize(value, ","));
|
||||
std::vector<std::string> find_vars(cmTokenize(value, ','));
|
||||
std::cout << "Running with debug output on for the variable(s)";
|
||||
for (auto const& v : find_vars) {
|
||||
std::cout << ' ' << v;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <cm/string_view>
|
||||
#include <cmext/string_view>
|
||||
|
||||
#include "cmStringAlgorithms.h"
|
||||
|
||||
@ -103,7 +104,9 @@ int testStringAlgorithms(int /*unused*/, char* /*unused*/[])
|
||||
{
|
||||
typedef std::vector<std::string> VT;
|
||||
assert_ok(cmTokenize("", ";") == VT{ "" }, "cmTokenize empty");
|
||||
assert_ok(cmTokenize(";", ";") == VT{ "" }, "cmTokenize sep");
|
||||
assert_ok(cmTokenize(";", ";") == VT{ "" }, "cmTokenize sep (char*)");
|
||||
assert_ok(cmTokenize(";", ";"_s) == VT{ "" },
|
||||
"cmTokenize sep (string_view)");
|
||||
assert_ok(cmTokenize("abc", ";") == VT{ "abc" }, "cmTokenize item");
|
||||
assert_ok(cmTokenize("abc;", ";") == VT{ "abc" }, "cmTokenize item sep");
|
||||
assert_ok(cmTokenize(";abc", ";") == VT{ "abc" }, "cmTokenize sep item");
|
||||
@ -112,6 +115,22 @@ int testStringAlgorithms(int /*unused*/, char* /*unused*/[])
|
||||
assert_ok(cmTokenize("a1;a2;a3;a4", ";") == VT{ "a1", "a2", "a3", "a4" },
|
||||
"cmTokenize multiple items");
|
||||
}
|
||||
{
|
||||
typedef std::vector<cm::string_view> VT;
|
||||
assert_ok(cmTokenizedView("", ';') == VT{ "" }, "cmTokenizedView empty");
|
||||
assert_ok(cmTokenizedView(";", ';') == VT{ "" }, "cmTokenizedView sep");
|
||||
assert_ok(cmTokenizedView("abc", ';') == VT{ "abc" },
|
||||
"cmTokenizedView item");
|
||||
assert_ok(cmTokenizedView("abc;", ';') == VT{ "abc" },
|
||||
"cmTokenizedView item sep");
|
||||
assert_ok(cmTokenizedView(";abc", ';') == VT{ "abc" },
|
||||
"cmTokenizedView sep item");
|
||||
assert_ok(cmTokenizedView("abc;;efg", ';') == VT{ "abc", "efg" },
|
||||
"cmTokenizedView item sep sep item");
|
||||
assert_ok(cmTokenizedView("a1;a2;a3;a4", ';') ==
|
||||
VT{ "a1", "a2", "a3", "a4" },
|
||||
"cmTokenizedView multiple items");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Test cmStrCat
|
||||
|
Loading…
Reference in New Issue
Block a user