...one of the most highly
regarded and expertly designed C++ library projects in the
world.
— Herb Sutter and Andrei
Alexandrescu, C++
Coding Standards
Type u32regex_iterator
is in all respects the same as regex_iterator
except that since
the regular expression type is always u32regex
it only takes one template parameter (the iterator type). It also calls
u32regex_search
internally,
allowing it to interface correctly with UTF-8, UTF-16, and UTF-32 data:
template <class BidirectionalIterator> class u32regex_iterator { // for members see regex_iterator }; typedef u32regex_iterator<const char*> utf8regex_iterator; typedef u32regex_iterator<const UChar*> utf16regex_iterator; typedef u32regex_iterator<const UChar32*> utf32regex_iterator;
In order to simplify the construction of a u32regex_iterator
from a string, there are a series of non-member helper functions called
make_u32regex_iterator:
u32regex_iterator<const char*> make_u32regex_iterator(const char* s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_iterator<const wchar_t*> make_u32regex_iterator(const wchar_t* s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_iterator<const UChar*> make_u32regex_iterator(const UChar* s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class Traits, class Alloc> u32regex_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_iterator(const std::basic_string<charT, Traits, Alloc>& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_iterator<const UChar*> make_u32regex_iterator(const UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default);
Each of these overloads returns an iterator that enumerates all occurrences of expression e, in text s, using match_flags m.
Example: search for international currency symbols, along with their associated numeric value:
void enumerate_currencies(const std::string& text) { // enumerate and print all the currency symbols, along // with any associated numeric values: const char* re = "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" "(?(1)" "|(?(2)" "[[:Cf:][:Cc:][:Z*:]]*" ")" "[[:Sc:]]" ")"; boost::u32regex r = boost::make_u32regex(re); boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j; while(i != j) { std::cout << (*i)[0] << std::endl; ++i; } }
Calling
enumerate_currencies(" $100.23 or £198.12 ");
Yields the output:
$100.23 £198.12
Provided of course that the input is encoded as UTF-8.
Type u32regex_token_iterator
is in all respects the same as regex_token_iterator
except
that since the regular expression type is always u32regex
it only takes one template parameter (the iterator type). It also calls
u32regex_search
internally,
allowing it to interface correctly with UTF-8, UTF-16, and UTF-32 data:
template <class BidirectionalIterator> class u32regex_token_iterator { // for members see regex_token_iterator }; typedef u32regex_token_iterator<const char*> utf8regex_token_iterator; typedef u32regex_token_iterator<const UChar*> utf16regex_token_iterator; typedef u32regex_token_iterator<const UChar32*> utf32regex_token_iterator;
In order to simplify the construction of a u32regex_token_iterator
from a string, there are a series of non-member helper functions called
make_u32regex_token_iterator
:
u32regex_token_iterator<const char*> make_u32regex_token_iterator( const char* s, const u32regex& e, int sub, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator( const wchar_t* s, const u32regex& e, int sub, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UChar* s, const u32regex& e, int sub, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class Traits, class Alloc> u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator( const std::basic_string<charT, Traits, Alloc>& s, const u32regex& e, int sub, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UnicodeString& s, const u32regex& e, int sub, regex_constants::match_flag_type m = regex_constants::match_default);
Each of these overloads returns an iterator that enumerates all occurrences of marked sub-expression sub in regular expression e, found in text s, using match_flags m.
template <std::size_t N> u32regex_token_iterator<const char*> make_u32regex_token_iterator( const char* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <std::size_t N> u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator( const wchar_t* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <std::size_t N> u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class Traits, class Alloc, std::size_t N> u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator( const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default); template <std::size_t N> u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default);
Each of these overloads returns an iterator that enumerates one sub-expression for each submatch in regular expression e, found in text s, using match_flags m.
u32regex_token_iterator<const char*> make_u32regex_token_iterator( const char* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const wchar_t*> make_u32regex_token_iterator( const wchar_t* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UChar* p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); template <class charT, class Traits, class Alloc> u32regex_token_iterator<typename std::basic_string<charT, Traits, Alloc>::const_iterator> make_u32regex_token_iterator( const std::basic_string<charT, Traits, Alloc>& p, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default); u32regex_token_iterator<const UChar*> make_u32regex_token_iterator( const UnicodeString& s, const u32regex& e, const std::vector<int>& submatch, regex_constants::match_flag_type m = regex_constants::match_default);
Each of these overloads returns an iterator that enumerates one sub-expression for each submatch in regular expression e, found in text s, using match_flags m.
Example: search for international currency symbols, along with their associated numeric value:
void enumerate_currencies2(const std::string& text) { // enumerate and print all the currency symbols, along // with any associated numeric values: const char* re = "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" "(?(1)" "|(?(2)" "[[:Cf:][:Cc:][:Z*:]]*" ")" "[[:Sc:]]" ")"; boost::u32regex r = boost::make_u32regex(re); boost::u32regex_token_iterator<std::string::const_iterator> i(boost::make_u32regex_token_iterator(text, r, 1)), j; while(i != j) { std::cout << *i << std::endl; ++i; } }