regex: Match ^ at most once in repeated searches

When doing successive matches, track the input start and current search
start positions separately to prevent the `^` anchor from matching in
the middle of the string.  Add policy CMP0186 to provide compatibility.

Issue: #26629
Fixes: #16899
This commit is contained in:
Nikita Nemkin 2025-01-26 21:55:08 +05:00
parent 90625865e1
commit 5d039f3be3
26 changed files with 301 additions and 28 deletions

View File

@ -246,6 +246,11 @@ For more information on regular expressions look under
<replace_expression> ...)
:target: TRANSFORM_REPLACE
.. versionchanged:: 4.1
The ``^`` anchor now matches only at the beginning of the input
element instead of the beginning of each repeated search.
See policy :policy:`CMP0186`.
``<SELECTOR>`` determines which elements of the list will be transformed.
Only one type of selector can be specified at a time.
When given, ``<SELECTOR>`` must be one of the following:

View File

@ -117,6 +117,11 @@ Search and Replace With Regular Expressions
two backslashes (``\\1``) are required in CMake code to get a backslash
through argument parsing.
.. versionchanged:: 4.1
The ``^`` anchor now matches only at the beginning of the input
string instead of the beginning of each repeated search.
See policy :policy:`CMP0186`.
.. _`Regex Specification`:
Regex Specification

View File

@ -532,6 +532,11 @@ List Transformations
$<LIST:TRANSFORM,list,REPLACE,regular_expression,replace_expression[,SELECTOR]>
.. versionchanged:: 4.1
The ``^`` anchor now matches only at the beginning of the input
element instead of the beginning of each repeated search.
See policy :policy:`CMP0186`.
``SELECTOR`` determines which items of the list will be transformed.
Only one type of selector can be specified at a time. When given,
``SELECTOR`` must be one of the following:

View File

@ -92,6 +92,14 @@ Supported Policies
The following policies are supported.
Policies Introduced by CMake 4.1
--------------------------------
.. toctree::
:maxdepth: 1
CMP0186: Regular expressions match ^ at most once in repeated searches. </policy/CMP0186>
Policies Introduced by CMake 4.0
--------------------------------

43
Help/policy/CMP0186.rst Normal file
View File

@ -0,0 +1,43 @@
CMP0186
-------
.. versionadded:: 4.1
Regular expressions match ``^`` at most once in repeated searches.
This policy affects commands that perform multiple regular expression
searches:
* :command:`string(REGEX MATCHALL)`
* :command:`string(REGEX REPLACE)`
* :command:`list(TRANSFORM REPLACE)`
and the generator expression :genex:`$<LIST:TRANSFORM,list,REPLACE>`.
CMake 4.0 and below match the ``^`` anchor at the start of every
successive search, leading to multiple matches:
.. code-block:: cmake
string(REGEX REPLACE "^a" "b" result "aaaa") # result="bbbb"
string(REGEX MATCHALL "^a" result "aaaa") # result="a;a;a;a"
CMake 4.1 and above prefer to match the ``^`` anchor at most once,
at the start of the input string:
.. code-block:: cmake
string(REGEX REPLACE "^a" "b" result "aaaa") # result="abbb"
string(REGEX MATCHALL "^a" result "aaaa") # result="a"
This policy provides compatibility for projects that have not been updated.
The ``OLD`` behavior for this policy is to match ``^`` multiple times,
at the start of each search. The ``NEW`` behavior for this policy is
to match ``^`` at most once, at the start of the input string.
.. |INTRODUCED_IN_CMAKE_VERSION| replace:: 4.1
.. |WARNS_OR_DOES_NOT_WARN| replace:: does *not* warn
.. include:: STANDARD_ADVICE.txt
.. include:: DEPRECATED.txt

View File

@ -0,0 +1,5 @@
regex-fixes
-----------
* Regular expressions match the ``^`` anchor at most once in repeated
searches, at the start of the input. See policy :policy:`CMP0186`.

View File

@ -1706,6 +1706,11 @@ static const struct ListNode : public cmGeneratorExpressionNode
return std::string{};
}
if (!selector) {
selector = cmList::TransformSelector::New();
}
selector->Makefile = ctx->LG->GetMakefile();
return list
.transform(descriptor->Action, arguments,
std::move(selector))

View File

@ -523,8 +523,8 @@ public:
std::string const& replace) override
{
TransformAction::Initialize(selector);
this->ReplaceHelper =
cm::make_unique<cmStringReplaceHelper>(regex, replace);
this->ReplaceHelper = cm::make_unique<cmStringReplaceHelper>(
regex, replace, selector->Makefile);
if (!this->ReplaceHelper->IsRegularExpressionValid()) {
throw transform_error(
@ -643,6 +643,11 @@ ActionDescriptorSet::iterator TransformConfigure(
}
}
std::unique_ptr<cmList::TransformSelector> cmList::TransformSelector::New()
{
return cm::make_unique<TransformNoSelector>();
}
std::unique_ptr<cmList::TransformSelector> cmList::TransformSelector::NewAT(
std::initializer_list<index_type> indexes)
{

View File

@ -23,6 +23,7 @@
template <typename T>
class BT;
class cmMakefile;
/**
* CMake lists management
@ -893,6 +894,7 @@ public:
// cmList::TransformSelector::New<AT>({1, 2, 5, 6});
// or
// cmList::TransformSelector::New<REGEX>("^XX.*");
static std::unique_ptr<TransformSelector> New();
template <typename Type>
static std::unique_ptr<TransformSelector> New(
std::initializer_list<index_type>);
@ -907,6 +909,8 @@ public:
template <typename Type>
static std::unique_ptr<TransformSelector> New(std::string&&);
cmMakefile* Makefile = nullptr;
private:
static std::unique_ptr<TransformSelector> NewAT(
std::initializer_list<index_type> init);

View File

@ -678,6 +678,11 @@ bool HandleTransformCommand(std::vector<std::string> const& args,
return true;
}
if (!selector) {
selector = cmList::TransformSelector::New();
}
selector->Makefile = &status.GetMakefile();
list->transform(descriptor->Action, arguments, std::move(selector));
status.GetMakefile().AddDefinition(outputName, list->to_string());
return true;

View File

@ -555,7 +555,10 @@ class cmMakefile;
WARN) \
SELECT(POLICY, CMP0185, \
"FindRuby no longer provides upper-case RUBY_* variables.", 4, 0, 0, \
WARN)
WARN) \
SELECT(POLICY, CMP0186, \
"Regular expressions match ^ at most once in repeated searches.", 4, \
1, 0, WARN)
#define CM_SELECT_ID(F, A1, A2, A3, A4, A5, A6) F(A1)
#define CM_FOR_EACH_POLICY_ID(POLICY) \

View File

@ -29,6 +29,7 @@
#include "cmGeneratorExpression.h"
#include "cmMakefile.h"
#include "cmMessageType.h"
#include "cmPolicies.h"
#include "cmRange.h"
#include "cmStringAlgorithms.h"
#include "cmStringReplaceHelper.h"
@ -288,10 +289,16 @@ bool RegexMatchAll(std::vector<std::string> const& args,
// Concatenate all the last arguments together.
std::string input = cmJoin(cmMakeRange(args).advance(4), std::string());
unsigned optAnchor = 0;
if (status.GetMakefile().GetPolicyStatus(cmPolicies::CMP0186) !=
cmPolicies::NEW) {
optAnchor = cmsys::RegularExpression::BOL_AT_OFFSET;
}
// Scan through the input for all matches.
std::string output;
char const* p = input.c_str();
while (re.find(p)) {
std::string::size_type base = 0;
while (re.find(input, base, optAnchor)) {
status.GetMakefile().ClearMatches();
status.GetMakefile().StoreMatches(re);
std::string::size_type l = re.start();
@ -305,8 +312,8 @@ bool RegexMatchAll(std::vector<std::string> const& args,
if (!output.empty()) {
output += ";";
}
output += std::string(p + l, r - l);
p += r;
output += re.match();
base = r;
}
// Store the output in the provided variable.

View File

@ -7,6 +7,7 @@
#include <utility>
#include "cmMakefile.h"
#include "cmPolicies.h"
cmStringReplaceHelper::cmStringReplaceHelper(std::string const& regex,
std::string replace_expr,
@ -24,9 +25,16 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
{
output.clear();
unsigned optAnchor = 0;
if (this->Makefile &&
this->Makefile->GetPolicyStatus(cmPolicies::CMP0186) !=
cmPolicies::NEW) {
optAnchor = cmsys::RegularExpression::BOL_AT_OFFSET;
}
// Scan through the input for all matches.
std::string::size_type base = 0;
while (this->RegularExpression.find(input.c_str() + base)) {
while (this->RegularExpression.find(input, base, optAnchor)) {
if (this->Makefile) {
this->Makefile->ClearMatches();
this->Makefile->StoreMatches(this->RegularExpression);
@ -35,7 +43,7 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
auto r = this->RegularExpression.end();
// Concatenate the part of the input that was not matched.
output += input.substr(base, l2);
output += input.substr(base, l2 - base);
// Make sure the match had some text.
if (r - l2 == 0) {
@ -54,11 +62,8 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
// Replace with part of the match.
auto n = replacement.Number;
auto start = this->RegularExpression.start(n);
auto end = this->RegularExpression.end(n);
auto len = input.length() - base;
if ((start != std::string::npos) && (end != std::string::npos) &&
(start <= len) && (end <= len)) {
output += input.substr(base + start, end - start);
if (start != std::string::npos) {
output += this->RegularExpression.match(n);
} else {
std::ostringstream error;
error << "replace expression \"" << this->ReplaceExpression
@ -71,11 +76,11 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
}
// Move past the match.
base += r;
base = r;
}
// Concatenate the text after the last match.
output += input.substr(base, input.length() - base);
output += input.substr(base);
return true;
}

View File

@ -740,7 +740,7 @@ bool testTransform()
cmList list({ "ABC", "BBCB", "BCCCBC", "BCBCDD", "EBCBCEBC" });
list.transform(cmList::TransformAction::REPLACE, "^BC|BC$", "X");
if (list.to_string() != "AX;BBCB;XCCX;XXDD;EBCBCEX") {
if (list.to_string() != "AX;BBCB;XCCX;XBCDD;EBCBCEX") {
result = false;
}
}

View File

@ -60,7 +60,7 @@ function(toExpectedContentList FILE_NO CONTENT_VAR)
unset(filtered_)
foreach(part_ IN LISTS prepared_)
string(REGEX REPLACE "^/" "" part_ "${part_}")
string(REGEX REPLACE "^/+" "" part_ "${part_}")
if(part_)
list(APPEND filtered_ "${prefix_}${part_}")

View File

@ -0,0 +1,11 @@
set(expected "
000;1001;002
x000;1001;x002
x000;x01;x002
")
file(READ "${RunCMake_TEST_BINARY_DIR}/generated.txt" generated)
if(NOT generated STREQUAL expected)
set(RunCMake_TEST_FAILED "generated:${generated}\nexpected:${expected}")
endif()

View File

@ -0,0 +1,5 @@
file(GENERATE OUTPUT "generated.txt" CONTENT "
$<LIST:TRANSFORM,0000;1001;0002,REPLACE,^0,>
$<LIST:TRANSFORM,0000;1001;0002,REPLACE,^(a|0),x>
$<LIST:TRANSFORM,0000;1001;0002,REPLACE,(1|^)0,x>
")

View File

@ -0,0 +1,11 @@
set(expected "
;1001;2
xxxx;1001;xxx2
xxxx;xx1;xxx2
")
file(READ "${RunCMake_TEST_BINARY_DIR}/generated.txt" generated)
if(NOT generated STREQUAL expected)
set(RunCMake_TEST_FAILED "generated:${generated}\nexpected:${expected}")
endif()

View File

@ -0,0 +1 @@
include(CMP0186-NEW.cmake)

View File

@ -128,3 +128,6 @@ check_list_execution (TRANSFORM-PREPEND)
check_list_execution (TRANSFORM-REPLACE)
check_list_execution (REVERSE)
check_list_execution (SORT)
run_cmake_with_options(CMP0186-OLD -DCMAKE_POLICY_DEFAULT_CMP0186=OLD)
run_cmake_with_options(CMP0186-NEW -DCMAKE_POLICY_DEFAULT_CMP0186=NEW)

View File

@ -0,0 +1,43 @@
set(mylist 0000 1001 0002)
# OLD
cmake_policy(SET CMP0186 OLD)
unset(output)
list(TRANSFORM mylist REPLACE "^0" "" OUTPUT_VARIABLE output)
if (NOT output STREQUAL ";1001;2")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \";1001;2\"")
endif()
unset(output)
list(TRANSFORM mylist REPLACE "^(a|0)" "x" OUTPUT_VARIABLE output)
if (NOT output STREQUAL "xxxx;1001;xxx2")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \"xxxx;1001;xxx2\"")
endif()
unset(output)
list(TRANSFORM mylist REPLACE "(1|^)0" "x" OUTPUT_VARIABLE output)
if (NOT output STREQUAL "xxxx;xx1;xxx2")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \"xxxx;xx1;xxx2\"")
endif()
# NEW, same cases as above
cmake_policy(SET CMP0186 NEW)
unset(output)
list(TRANSFORM mylist REPLACE "^0" "" OUTPUT_VARIABLE output)
if (NOT output STREQUAL "000;1001;002")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \"000;1001;002\"")
endif()
unset(output)
list(TRANSFORM mylist REPLACE "^(a|0)" "x" OUTPUT_VARIABLE output)
if (NOT output STREQUAL "x000;1001;x002")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \"x000;1001;x002\"")
endif()
unset(output)
list(TRANSFORM mylist REPLACE "(1|^)0" "x" OUTPUT_VARIABLE output)
if (NOT output STREQUAL "x000;x01;x002")
message(FATAL_ERROR "TRANSFORM(REPLACE) is \"${output}\", expected is \"x000;xx1;x002\"")
endif()

View File

@ -90,6 +90,7 @@ run_cmake(TRANSFORM-GENEX_STRIP)
run_cmake(TRANSFORM-APPEND)
run_cmake(TRANSFORM-PREPEND)
run_cmake(TRANSFORM-REPLACE)
run_cmake(CMP0186)
# argument tests
run_cmake(SORT-WrongOption)

View File

@ -0,0 +1,90 @@
function(check_output name expected)
set(output "${${name}}")
if(NOT output STREQUAL expected)
message(FATAL_ERROR "\"string(REGEX)\" set ${name} to \"${output}\", expected \"${expected}\"")
endif()
endfunction()
# OLD
cmake_policy(SET CMP0186 OLD)
string(REGEX MATCHALL "^0" out "0000")
check_output(out "0;0;0;0")
string(REGEX MATCHALL "^0+" out "0000")
check_output(out "0000")
string(REGEX MATCHALL "^(0|a)" out "0000" )
check_output(out "0;0;0;0")
string(REGEX MATCHALL "^(0|a)" out "aaaa")
check_output(out "a;a;a;a")
string(REGEX MATCHALL "^(0|a)" out "a0a0")
check_output(out "a;0;a;0")
string(REGEX MATCHALL "(^|a)0" out "00a0")
check_output(out "0;0;a0")
string(REGEX REPLACE "^0" "" out "0000")
check_output(out "")
string(REGEX REPLACE "^0" "x" out "0000")
check_output(out "xxxx")
string(REGEX REPLACE "^0+" "x" out "0000")
check_output(out "x")
string(REGEX REPLACE "^(0|a)" "x" out "0000")
check_output(out "xxxx")
string(REGEX REPLACE "^(0|a)" "x" out "aaaa")
check_output(out "xxxx")
string(REGEX REPLACE "^(0|a)" "x" out "a0a0")
check_output(out "xxxx")
string(REGEX REPLACE "(^|a)0" "x" out "00a0")
check_output(out "xxx")
# NEW, same cases as above
cmake_policy(SET CMP0186 NEW)
string(REGEX MATCHALL "^0" out "0000")
check_output(out "0")
string(REGEX MATCHALL "^0+" out "0000")
check_output(out "0000")
string(REGEX MATCHALL "^(0|a)" out "0000")
check_output(out "0")
string(REGEX MATCHALL "^(0|a)" out "aaaa")
check_output(out "a")
string(REGEX MATCHALL "^(0|a)" out "a0a0")
check_output(out "a")
string(REGEX MATCHALL "(^|a)0" out "00a0")
check_output(out "0;a0")
string(REGEX REPLACE "^0" "" out "0000")
check_output(out "000")
string(REGEX REPLACE "^0" "x" out "0000")
check_output(out "x000")
string(REGEX REPLACE "^0+" "x" out "0000")
check_output(out "x")
string(REGEX REPLACE "^(0|a)" "x" out "0000")
check_output(out "x000")
string(REGEX REPLACE "^(0|a)" "x" out "aaaa")
check_output(out "xaaa")
string(REGEX REPLACE "^(0|a)" "x" out "a0a0")
check_output(out "x0a0")
string(REGEX REPLACE "(^|a)0" "x" out "00a0")
check_output(out "x0x")

View File

@ -1,12 +1,12 @@
^matches: Some::;Scope
^matches: Some::
results from: string\(REGEX MATCHALL\)
CMAKE_MATCH_0: -->Scope<--
CMAKE_MATCH_1: -->Scope<--
CMAKE_MATCH_2: --><--
CMAKE_MATCH_COUNT: -->1<--
replace: \[Some\]\[Scope\]
CMAKE_MATCH_0: -->Some::<--
CMAKE_MATCH_1: -->Some<--
CMAKE_MATCH_2: -->::<--
CMAKE_MATCH_COUNT: -->2<--
replace: \[Some\]Scope
results from: string\(REGEX REPLACE\)
CMAKE_MATCH_0: -->Scope<--
CMAKE_MATCH_1: -->Scope<--
CMAKE_MATCH_2: --><--
CMAKE_MATCH_COUNT: -->1<--$
CMAKE_MATCH_0: -->Some::<--
CMAKE_MATCH_1: -->Some<--
CMAKE_MATCH_2: -->::<--
CMAKE_MATCH_COUNT: -->2<--$

View File

@ -1,3 +1,5 @@
cmake_policy(SET CMP0186 NEW)
function (output_results msg)
message("results from: ${msg}")
message("CMAKE_MATCH_0: -->${CMAKE_MATCH_0}<--")

View File

@ -35,6 +35,7 @@ run_cmake(UuidBadType)
run_cmake(RegexClear)
run_cmake(RegexMultiMatchClear)
run_cmake(CMP0186)
run_cmake(UTF-16BE)
run_cmake(UTF-16LE)