Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

libs/regex/test/regress/test_backrefs.cpp

/*
 *
 * Copyright (c) 2004
 * John Maddock
 *
 * Use, modification and distribution are subject to the 
 * Boost Software License, Version 1.0. (See accompanying file 
 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
 *
 */

#include "test.hpp"

#ifdef BOOST_MSVC
#pragma warning(disable:4127)
#endif

void test_backrefs()
{
   using namespace boost::regex_constants;
   TEST_INVALID_REGEX("a(b)\\2c", perl);
#ifdef BOOST_REGEX_CXX03
   TEST_INVALID_REGEX("a(b\\1)c", perl);
#endif
   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(.)\\1", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a([bc])\\1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   TEST_REGEX_SEARCH("a\\([bc]\\)\\1d", basic, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
#ifndef BOOST_REGEX_CXX03
   TEST_REGEX_SEARCH("(\\2two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
   TEST_INVALID_REGEX("(\\3two|(one))+", perl);
#endif
   // strictly speaking this is at best ambiguous, at worst wrong, this is what most
   // re implimentations will match though.
   TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
   TEST_REGEX_SEARCH("a(([bc])\\2)*d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a((b)*\\2)*d", perl, "abbbd", match_default, make_array(0, 5, 1, 4, 2, 3, -2, -2));
   TEST_REGEX_SEARCH("(ab*)[ab]*\\1", perl, "ababaaa", match_default, make_array(0, 4, 0, 2, -2, 4, 7, 4, 5, -2, -2));
   TEST_REGEX_SEARCH("(a)\\1bcd", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabd", match_default, make_array(0, 4, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("(a)\\1bc*d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("(a)\\1bc*[ce]d", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("^(a)\\1b(c)*cd$", perl, "aabcccd", match_default, make_array(0, 7, 0, 1, 4, 5, -2, -2));
   TEST_REGEX_SEARCH("a\\(b*\\)c\\1d", basic, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a\\(b*\\)c\\1d", basic, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a\\(b*\\)c\\1d", basic, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^\\(.\\)\\1", basic, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a\\([bc]\\)\\1d", basic, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   // strictly speaking this is at best ambiguous, at worst wrong, this is what most
   // re implimentations will match though.
   TEST_REGEX_SEARCH("a\\(\\([bc]\\)\\2\\)*d", basic, "abbccd", match_default, make_array(0, 6, 3, 5, 3, 4, -2, -2));
   TEST_REGEX_SEARCH("a\\(\\([bc]\\)\\2\\)*d", basic, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a\\(\\(b\\)*\\2\\)*d", basic, "abbbd", match_default, make_array(0, 5, 1, 4, 2, 3, -2, -2));
   TEST_REGEX_SEARCH("\\(a\\)\\1bcd", basic, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("\\(a\\)\\1bc*d", basic, "aabcd", match_default, make_array(0, 5, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("\\(a\\)\\1bc*d", basic, "aabd", match_default, make_array(0, 4, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("\\(a\\)\\1bc*d", basic, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("\\(a\\)\\1bc*[ce]d", basic, "aabcccd", match_default, make_array(0, 7, 0, 1, -2, -2));
   TEST_REGEX_SEARCH("^\\(a\\)\\1b\\(c\\)*cd$", basic, "aabcccd", match_default, make_array(0, 7, 0, 1, 4, 5, -2, -2));
   TEST_REGEX_SEARCH("\\(ab*\\)[ab]*\\1", basic, "ababaaa", match_default, make_array(0, 7, 0, 1, -2, -2));
   //
   // Now test the \g version:
   //
   TEST_INVALID_REGEX("a(b)\\g2c", perl);
#ifdef BOOST_REGEX_CXX03
   TEST_INVALID_REGEX("a(b\\g1)c", perl);
#endif
   TEST_INVALID_REGEX("a(b\\g0)c", perl);
   TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   TEST_INVALID_REGEX("a(b)\\g{2}c", perl);
#ifdef BOOST_REGEX_CXX03
   TEST_INVALID_REGEX("a(b\\g{1})c", perl);
#endif
   TEST_INVALID_REGEX("a(b\\g{0})c", perl);
#ifndef BOOST_REGEX_CXX03
   TEST_REGEX_SEARCH("(\\g{2}two|(one))+", perl, "oneonetwo", match_default, make_array(0, 9, 3, 9, 0, 3, -2, -2));
   TEST_INVALID_REGEX("(\\g{3}two|(one))+", perl);
#endif
   TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g{1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(.)\\g{1}", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a([bc])\\g{1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   // And again but with negative indexes:
   TEST_INVALID_REGEX("a(b)\\g-2c", perl);
#ifdef BOOST_REGEX_CXX03
   TEST_INVALID_REGEX("a(b\\g-1)c", perl);
#endif
   TEST_INVALID_REGEX("a(b\\g-0)c", perl);
   TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g-1d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(.)\\g1", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a([bc])\\g1d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   TEST_INVALID_REGEX("a(b)\\g{-2}c", perl);
#ifdef BOOST_REGEX_CXX03
   TEST_INVALID_REGEX("a(b\\g{-1})c", perl);
#endif
   TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(b*)c\\g{-1}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(.)\\g{-1}", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a([bc])\\g{-1}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));
   
   // And again but with named subexpressions:
   TEST_REGEX_SEARCH("a(?<foo>(?<bar>(?<bb>(?<aa>b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(?<foo>(?<bar>(?<bb>(?<aa>b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(?<foo>(?<bar>(?<bb>(?<aa>b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(?<foo>.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(?<foo>[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));

   TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbd", match_default, make_array(0, 7, 1, 3, 1, 3, 1, 3, 1, 3, -2, -2));
   TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(?'foo'(?'bar'(?'bb'(?'aa'b*))))c\\g{foo}d", perl, "abbcbbbd", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("^(?'foo'.)\\g{foo}", perl, "abc", match_default, make_array(-2, -2));
   TEST_REGEX_SEARCH("a(?'foo'[bc])\\g{foo}d", perl, "abcdabbd", match_default, make_array(4, 8, 5, 6, -2, -2));

   // Bug cases from https://github.com/boostorg/regex/issues/75
   TEST_REGEX_SEARCH("(?:(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)\\g{-1}|WORKING)", perl, "WORKING", match_default, make_array(0, 7, -2, -2));
   TEST_REGEX_SEARCH("(?:(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)(z)\\g{-1}|WORKING)", perl, "WORKING", match_default, make_array(0, 7, -2, -2));

}