Makefile.am: Add regex_scanner.{h,tcc}.
2013-08-26 Tim Shen <timshen91@gmail.com> * include/Makefile.am: Add regex_scanner.{h,tcc}. * include/Makefile.in: Regenerate. * include/bits/regex.h (match_search): Handle the `__first == __last` situation correctly. * include/bits/regex_compiler.h: Move _Scanner... * include/bits/regex_scanner.h: ...to here. New. * include/bits/regex_compiler.tcc: Move _Scanner... * include/bits/regex_scanner.tcc: ...to here, too. New. * include/bits/regex_executor.tcc: Use value instead of reference for submatch. * include/std/regex: Add regex_scanner.h * testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc: New. * testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc: New. * testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc: New. * testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc: New. * testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc: New. From-SVN: r202015
This commit is contained in:
parent
fd91cfe3e0
commit
33fbbb766c
15 changed files with 1289 additions and 790 deletions
|
@ -1,3 +1,22 @@
|
|||
2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/Makefile.am: Add regex_scanner.{h,tcc}.
|
||||
* include/Makefile.in: Regenerate.
|
||||
* include/bits/regex.h (match_search): Handle the `__first == __last`
|
||||
situation correctly.
|
||||
* include/bits/regex_compiler.h: Move _Scanner...
|
||||
* include/bits/regex_scanner.h: ...to here. New.
|
||||
* include/bits/regex_compiler.tcc: Move _Scanner...
|
||||
* include/bits/regex_scanner.tcc: ...to here, too. New.
|
||||
* include/bits/regex_executor.tcc: Use value instead of reference for
|
||||
submatch.
|
||||
* include/std/regex: Add regex_scanner.h
|
||||
* testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc: New.
|
||||
* testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc: New.
|
||||
* testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc: New.
|
||||
* testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc: New.
|
||||
* testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc: New.
|
||||
|
||||
2013-08-22 Tim Shen <timshen91@gmail.com>
|
||||
|
||||
* include/bits/regex.h: Replace 8 spaces in indentation with a tab.
|
||||
|
|
|
@ -128,6 +128,8 @@ bits_headers = \
|
|||
${bits_srcdir}/regex.h \
|
||||
${bits_srcdir}/regex_constants.h \
|
||||
${bits_srcdir}/regex_error.h \
|
||||
${bits_srcdir}/regex_scanner.h \
|
||||
${bits_srcdir}/regex_scanner.tcc \
|
||||
${bits_srcdir}/regex_automaton.h \
|
||||
${bits_srcdir}/regex_automaton.tcc \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
|
|
|
@ -395,6 +395,8 @@ bits_headers = \
|
|||
${bits_srcdir}/regex.h \
|
||||
${bits_srcdir}/regex_constants.h \
|
||||
${bits_srcdir}/regex_error.h \
|
||||
${bits_srcdir}/regex_scanner.h \
|
||||
${bits_srcdir}/regex_scanner.tcc \
|
||||
${bits_srcdir}/regex_automaton.h \
|
||||
${bits_srcdir}/regex_automaton.tcc \
|
||||
${bits_srcdir}/regex_compiler.h \
|
||||
|
|
|
@ -740,11 +740,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @throws regex_error if @p [__first, __last) is not a valid regular
|
||||
* expression.
|
||||
*/
|
||||
template<typename _InputIterator>
|
||||
basic_regex(_InputIterator __first, _InputIterator __last,
|
||||
template<typename _FwdIter>
|
||||
basic_regex(_FwdIter __first, _FwdIter __last,
|
||||
flag_type __f = ECMAScript)
|
||||
: _M_flags(__f),
|
||||
_M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits>
|
||||
_M_automaton(__detail::_Compiler<_FwdIter, _Ch_type, _Rx_traits>
|
||||
(__first, __last, _M_traits, _M_flags)._M_get_nfa())
|
||||
{ }
|
||||
|
||||
|
@ -2371,7 +2371,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{
|
||||
if (__re._M_automaton == nullptr)
|
||||
return false;
|
||||
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
|
||||
auto __cur = __first;
|
||||
// Continue when __cur == __last
|
||||
do
|
||||
{
|
||||
__detail::__get_executor(__cur, __last, __m, __re, __flags)
|
||||
->_M_search_from_first();
|
||||
|
@ -2391,10 +2393,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
return true;
|
||||
}
|
||||
}
|
||||
while (__cur++ != __last);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Searches for a regular expression within a range.
|
||||
* @param __first [IN] The start of the string to search.
|
||||
|
|
|
@ -39,197 +39,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
* @{
|
||||
*/
|
||||
|
||||
/// Matches a character range (bracket expression)
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
struct _BracketMatcher
|
||||
{
|
||||
typedef typename _TraitsT::char_class_type _CharClassT;
|
||||
typedef typename _TraitsT::string_type _StringT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
explicit
|
||||
_BracketMatcher(bool __is_non_matching,
|
||||
const _TraitsT& __t,
|
||||
_FlagT __flags)
|
||||
: _M_is_non_matching(__is_non_matching), _M_traits(__t),
|
||||
_M_flags(__flags), _M_class_set(0)
|
||||
{ }
|
||||
|
||||
bool
|
||||
operator()(_CharT) const;
|
||||
|
||||
void
|
||||
_M_add_char(_CharT __c)
|
||||
{
|
||||
if (_M_flags & regex_constants::collate)
|
||||
if (_M_is_icase())
|
||||
_M_char_set.push_back(_M_traits.translate_nocase(__c));
|
||||
else
|
||||
_M_char_set.push_back(_M_traits.translate(__c));
|
||||
else
|
||||
_M_char_set.push_back(__c);
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_collating_element(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
|
||||
if (__st.empty())
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
// TODO: digraph
|
||||
_M_char_set.push_back(__st[0]);
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_equivalence_class(const _StringT& __s)
|
||||
{
|
||||
_M_add_character_class(
|
||||
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_character_class(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.
|
||||
lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
|
||||
if (__st == 0)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
_M_class_set |= __st;
|
||||
}
|
||||
|
||||
void
|
||||
_M_make_range(_CharT __l, _CharT __r)
|
||||
{ _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
|
||||
|
||||
bool
|
||||
_M_is_icase() const
|
||||
{ return _M_flags & regex_constants::icase; }
|
||||
|
||||
_StringT
|
||||
_M_get_str(_CharT __c) const
|
||||
{
|
||||
auto __s = _StringT(1,
|
||||
_M_is_icase()
|
||||
? _M_traits.translate_nocase(__c)
|
||||
: _M_traits.translate(__c));
|
||||
return _M_traits.transform(__s.begin(), __s.end());
|
||||
}
|
||||
|
||||
_TraitsT _M_traits;
|
||||
_FlagT _M_flags;
|
||||
bool _M_is_non_matching;
|
||||
std::vector<_CharT> _M_char_set;
|
||||
std::vector<pair<_StringT, _StringT>> _M_range_set;
|
||||
_CharClassT _M_class_set;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief struct _Scanner. Scans an input range for regex tokens.
|
||||
*
|
||||
* The %_Scanner class interprets the regular expression pattern in
|
||||
* the input range passed to its constructor as a sequence of parse
|
||||
* tokens passed to the regular expression compiler. The sequence
|
||||
* of tokens provided depends on the flag settings passed to the
|
||||
* constructor: different regular expression grammars will interpret
|
||||
* the same input pattern in syntactically different ways.
|
||||
*/
|
||||
template<typename _InputIter>
|
||||
class _Scanner
|
||||
{
|
||||
public:
|
||||
typedef unsigned int _StateT;
|
||||
typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
|
||||
typedef std::basic_string<_CharT> _StringT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
typedef const std::ctype<_CharT> _CtypeT;
|
||||
|
||||
/// Token types returned from the scanner.
|
||||
enum _TokenT
|
||||
{
|
||||
_S_token_anychar,
|
||||
_S_token_backref,
|
||||
_S_token_bracket_begin,
|
||||
_S_token_bracket_inverse_begin,
|
||||
_S_token_bracket_end,
|
||||
_S_token_char_class_name,
|
||||
_S_token_closure0,
|
||||
_S_token_closure1,
|
||||
_S_token_collelem_multi,
|
||||
_S_token_collelem_single,
|
||||
_S_token_collsymbol,
|
||||
_S_token_comma,
|
||||
_S_token_dash,
|
||||
_S_token_dup_count,
|
||||
_S_token_eof,
|
||||
_S_token_equiv_class_name,
|
||||
_S_token_interval_begin,
|
||||
_S_token_interval_end,
|
||||
_S_token_line_begin,
|
||||
_S_token_line_end,
|
||||
_S_token_opt,
|
||||
_S_token_or,
|
||||
_S_token_ord_char,
|
||||
_S_token_subexpr_begin,
|
||||
_S_token_subexpr_end,
|
||||
_S_token_word_begin,
|
||||
_S_token_word_end,
|
||||
_S_token_unknown
|
||||
};
|
||||
|
||||
_Scanner(_InputIter __begin, _InputIter __end,
|
||||
_FlagT __flags, std::locale __loc)
|
||||
: _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
|
||||
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
|
||||
{ _M_advance(); }
|
||||
|
||||
void
|
||||
_M_advance();
|
||||
|
||||
_TokenT
|
||||
_M_token() const
|
||||
{ return _M_curToken; }
|
||||
|
||||
const _StringT&
|
||||
_M_value() const
|
||||
{ return _M_curValue; }
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_print(std::ostream&);
|
||||
#endif
|
||||
|
||||
private:
|
||||
void
|
||||
_M_eat_escape();
|
||||
|
||||
void
|
||||
_M_scan_in_brace();
|
||||
|
||||
void
|
||||
_M_scan_in_bracket();
|
||||
|
||||
void
|
||||
_M_eat_charclass();
|
||||
|
||||
void
|
||||
_M_eat_equivclass();
|
||||
|
||||
void
|
||||
_M_eat_collsymbol();
|
||||
|
||||
static constexpr _StateT _S_state_in_brace = 1 << 0;
|
||||
static constexpr _StateT _S_state_in_bracket = 1 << 1;
|
||||
_InputIter _M_current;
|
||||
_InputIter _M_end;
|
||||
_FlagT _M_flags;
|
||||
_CtypeT& _M_ctype;
|
||||
_TokenT _M_curToken;
|
||||
_StringT _M_curValue;
|
||||
_StateT _M_state;
|
||||
};
|
||||
struct _BracketMatcher;
|
||||
|
||||
/// Builds an NFA from an input iterator interval.
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
class _Compiler
|
||||
{
|
||||
public:
|
||||
|
@ -237,7 +51,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
typedef _NFA<_CharT, _TraitsT> _RegexT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
_Compiler(_InputIter __b, _InputIter __e,
|
||||
_Compiler(_FwdIter __b, _FwdIter __e,
|
||||
const _TraitsT& __traits, _FlagT __flags);
|
||||
|
||||
std::shared_ptr<_RegexT>
|
||||
|
@ -245,7 +59,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
|
||||
|
||||
private:
|
||||
typedef _Scanner<_InputIter> _ScannerT;
|
||||
typedef _Scanner<_FwdIter> _ScannerT;
|
||||
typedef typename _ScannerT::_TokenT _TokenT;
|
||||
typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
|
||||
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
|
||||
|
@ -276,7 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool
|
||||
_M_bracket_expression();
|
||||
|
||||
bool
|
||||
void
|
||||
_M_bracket_list(_BMatcherT& __matcher);
|
||||
|
||||
bool
|
||||
|
@ -303,14 +117,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
int
|
||||
_M_cur_int_value(int __radix);
|
||||
|
||||
bool
|
||||
_M_try_char();
|
||||
|
||||
_CharT
|
||||
_M_get_char();
|
||||
|
||||
const _TraitsT& _M_traits;
|
||||
_ScannerT _M_scanner;
|
||||
_StringT _M_cur_value;
|
||||
_StringT _M_value;
|
||||
_RegexT _M_state_store;
|
||||
_StackT _M_stack;
|
||||
_FlagT _M_flags;
|
||||
};
|
||||
|
||||
/// Matches a character range (bracket expression)
|
||||
template<typename _CharT, typename _TraitsT>
|
||||
struct _BracketMatcher
|
||||
{
|
||||
typedef typename _TraitsT::char_class_type _CharClassT;
|
||||
typedef typename _TraitsT::string_type _StringT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
|
||||
explicit
|
||||
_BracketMatcher(bool __is_non_matching,
|
||||
const _TraitsT& __t,
|
||||
_FlagT __flags)
|
||||
: _M_is_non_matching(__is_non_matching), _M_traits(__t),
|
||||
_M_flags(__flags), _M_class_set(0)
|
||||
{ }
|
||||
|
||||
bool
|
||||
operator()(_CharT) const;
|
||||
|
||||
void
|
||||
_M_add_char(_CharT __c)
|
||||
{ _M_char_set.push_back(_M_translate(__c)); }
|
||||
|
||||
void
|
||||
_M_add_collating_element(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.lookup_collatename(__s.data(),
|
||||
__s.data() + __s.size());
|
||||
if (__st.empty())
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
// TODO: digraph
|
||||
_M_char_set.push_back(__st[0]);
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_equivalence_class(const _StringT& __s)
|
||||
{
|
||||
_M_add_character_class(
|
||||
_M_traits.transform_primary(__s.data(),
|
||||
__s.data() + __s.size()));
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_character_class(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.
|
||||
lookup_classname(__s.data(), __s.data() + __s.size(), _M_is_icase());
|
||||
if (__st == 0)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
_M_class_set |= __st;
|
||||
}
|
||||
|
||||
void
|
||||
_M_make_range(_CharT __l, _CharT __r)
|
||||
{
|
||||
_M_range_set.push_back(
|
||||
make_pair(_M_get_str(_M_translate(__l)),
|
||||
_M_get_str(_M_translate(__r))));
|
||||
}
|
||||
|
||||
_CharT
|
||||
_M_translate(_CharT __c) const
|
||||
{
|
||||
if (_M_flags & regex_constants::collate)
|
||||
if (_M_is_icase())
|
||||
return _M_traits.translate_nocase(__c);
|
||||
else
|
||||
return _M_traits.translate(__c);
|
||||
else
|
||||
return __c;
|
||||
}
|
||||
|
||||
bool
|
||||
_M_is_icase() const
|
||||
{ return _M_flags & regex_constants::icase; }
|
||||
|
||||
_StringT
|
||||
_M_get_str(_CharT __c) const
|
||||
{
|
||||
_StringT __s(1, __c);
|
||||
return _M_traits.transform(__s.begin(), __s.end());
|
||||
}
|
||||
|
||||
_TraitsT _M_traits;
|
||||
_FlagT _M_flags;
|
||||
bool _M_is_non_matching;
|
||||
std::vector<_CharT> _M_char_set;
|
||||
std::vector<pair<_StringT, _StringT>> _M_range_set;
|
||||
_CharClassT _M_class_set;
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
|
|
|
@ -34,506 +34,15 @@ namespace __detail
|
|||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_advance()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
|
||||
_CharT __c = *_M_current;
|
||||
if (_M_state & _S_state_in_bracket)
|
||||
{
|
||||
_M_scan_in_bracket();
|
||||
return;
|
||||
}
|
||||
if (_M_state & _S_state_in_brace)
|
||||
{
|
||||
_M_scan_in_brace();
|
||||
return;
|
||||
}
|
||||
#if 0
|
||||
// TODO: re-enable line anchors when _M_assertion is implemented.
|
||||
// See PR libstdc++/47724
|
||||
else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
|
||||
{
|
||||
_M_curToken = _S_token_line_begin;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('$'))
|
||||
{
|
||||
_M_curToken = _S_token_line_end;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
else if (__c == _M_ctype.widen('.'))
|
||||
{
|
||||
_M_curToken = _S_token_anychar;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('*'))
|
||||
{
|
||||
_M_curToken = _S_token_closure0;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('+'))
|
||||
{
|
||||
_M_curToken = _S_token_closure1;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('|'))
|
||||
{
|
||||
_M_curToken = _S_token_or;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('['))
|
||||
{
|
||||
if (*++_M_current == _M_ctype.widen('^'))
|
||||
{
|
||||
_M_curToken = _S_token_bracket_inverse_begin;
|
||||
++_M_current;
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_bracket_begin;
|
||||
_M_state |= _S_state_in_bracket;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_eat_escape();
|
||||
return;
|
||||
}
|
||||
else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
if (__c == _M_ctype.widen('('))
|
||||
{
|
||||
_M_curToken = _S_token_subexpr_begin;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen(')'))
|
||||
{
|
||||
_M_curToken = _S_token_subexpr_end;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('{'))
|
||||
{
|
||||
_M_curToken = _S_token_interval_begin;
|
||||
_M_state |= _S_state_in_brace;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
++_M_current;
|
||||
}
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_scan_in_brace()
|
||||
{
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curToken = _S_token_dup_count;
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
while (_M_current != _M_end
|
||||
&& _M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue += *_M_current;
|
||||
++_M_current;
|
||||
}
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(','))
|
||||
{
|
||||
_M_curToken = _S_token_comma;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
if (_M_flags & (regex_constants::basic | regex_constants::grep))
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('\\'))
|
||||
_M_eat_escape();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('}'))
|
||||
{
|
||||
_M_curToken = _S_token_interval_end;
|
||||
_M_state &= ~_S_state_in_brace;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_scan_in_bracket()
|
||||
{
|
||||
if (*_M_current == _M_ctype.widen('['))
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
|
||||
if (*_M_current == _M_ctype.widen('.'))
|
||||
{
|
||||
_M_curToken = _S_token_collsymbol;
|
||||
_M_eat_collsymbol();
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(':'))
|
||||
{
|
||||
_M_curToken = _S_token_char_class_name;
|
||||
_M_eat_charclass();
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('='))
|
||||
{
|
||||
_M_curToken = _S_token_equiv_class_name;
|
||||
_M_eat_equivclass();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('-'))
|
||||
{
|
||||
_M_curToken = _S_token_dash;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen(']'))
|
||||
{
|
||||
_M_curToken = _S_token_bracket_end;
|
||||
_M_state &= ~_S_state_in_bracket;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
else if (*_M_current == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_eat_escape();
|
||||
return;
|
||||
}
|
||||
_M_curToken = _S_token_collelem_single;
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
}
|
||||
|
||||
// TODO Complete it.
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_escape()
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
_CharT __c = *_M_current;
|
||||
++_M_current;
|
||||
|
||||
if (__c == _M_ctype.widen('('))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_subexpr_begin;
|
||||
}
|
||||
else if (__c == _M_ctype.widen(')'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
_M_curToken = _S_token_subexpr_end;
|
||||
}
|
||||
else if (__c == _M_ctype.widen('{'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
{
|
||||
_M_curToken = _S_token_interval_begin;
|
||||
_M_state |= _S_state_in_brace;
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('}'))
|
||||
{
|
||||
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!(_M_state && _S_state_in_brace))
|
||||
__throw_regex_error(regex_constants::error_badbrace);
|
||||
_M_state &= ~_S_state_in_brace;
|
||||
_M_curToken = _S_token_interval_end;
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('x'))
|
||||
{
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue.assign(1, *_M_current);
|
||||
++_M_current;
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_curToken = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
if (_M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
{
|
||||
_M_curValue += *_M_current;
|
||||
++_M_current;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (__c == _M_ctype.widen('^')
|
||||
|| __c == _M_ctype.widen('.')
|
||||
|| __c == _M_ctype.widen('*')
|
||||
|| __c == _M_ctype.widen('$')
|
||||
|| __c == _M_ctype.widen('\\'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if (_M_ctype.is(_CtypeT::digit, __c))
|
||||
{
|
||||
_M_curToken = _S_token_backref;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if (_M_state & _S_state_in_bracket)
|
||||
{
|
||||
if (__c == _M_ctype.widen('-')
|
||||
|| __c == _M_ctype.widen('[')
|
||||
|| __c == _M_ctype.widen(']'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, __c);
|
||||
}
|
||||
else if ((_M_flags & regex_constants::ECMAScript)
|
||||
&& __c == _M_ctype.widen('b'))
|
||||
{
|
||||
_M_curToken = _S_token_ord_char;
|
||||
_M_curValue.assign(1, _M_ctype.widen(' '));
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
}
|
||||
|
||||
// Eats a character class or throwns an exception.
|
||||
// current point to ':' delimiter on entry, char after ']' on return
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_charclass()
|
||||
{
|
||||
++_M_current; // skip ':'
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen(':');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
++_M_current; // skip ':'
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_equivclass()
|
||||
{
|
||||
++_M_current; // skip '='
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen('=');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip '='
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
|
||||
template<typename _BiIter>
|
||||
void
|
||||
_Scanner<_BiIter>::
|
||||
_M_eat_collsymbol()
|
||||
{
|
||||
++_M_current; // skip '.'
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
for (_M_curValue.clear();
|
||||
_M_current != _M_end && *_M_current != _M_ctype.widen('.');
|
||||
++_M_current)
|
||||
_M_curValue += *_M_current;
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip '.'
|
||||
if (*_M_current != _M_ctype.widen(']'))
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
++_M_current; // skip ']'
|
||||
}
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
template<typename _BiIter>
|
||||
std::ostream&
|
||||
_Scanner<_BiIter>::
|
||||
_M_print(std::ostream& ostr)
|
||||
{
|
||||
switch (_M_curToken)
|
||||
{
|
||||
case _S_token_anychar:
|
||||
ostr << "any-character\n";
|
||||
break;
|
||||
case _S_token_backref:
|
||||
ostr << "backref\n";
|
||||
break;
|
||||
case _S_token_bracket_begin:
|
||||
ostr << "bracket-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_inverse_begin:
|
||||
ostr << "bracket-inverse-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_end:
|
||||
ostr << "bracket-end\n";
|
||||
break;
|
||||
case _S_token_char_class_name:
|
||||
ostr << "char-class-name \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_closure0:
|
||||
ostr << "closure0\n";
|
||||
break;
|
||||
case _S_token_closure1:
|
||||
ostr << "closure1\n";
|
||||
break;
|
||||
case _S_token_collelem_multi:
|
||||
ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_collelem_single:
|
||||
ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_collsymbol:
|
||||
ostr << "collsymbol \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_comma:
|
||||
ostr << "comma\n";
|
||||
break;
|
||||
case _S_token_dash:
|
||||
ostr << "dash\n";
|
||||
break;
|
||||
case _S_token_dup_count:
|
||||
ostr << "dup count: " << _M_curValue << "\n";
|
||||
break;
|
||||
case _S_token_eof:
|
||||
ostr << "EOF\n";
|
||||
break;
|
||||
case _S_token_equiv_class_name:
|
||||
ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
|
||||
break;
|
||||
case _S_token_interval_begin:
|
||||
ostr << "interval begin\n";
|
||||
break;
|
||||
case _S_token_interval_end:
|
||||
ostr << "interval end\n";
|
||||
break;
|
||||
case _S_token_line_begin:
|
||||
ostr << "line begin\n";
|
||||
break;
|
||||
case _S_token_line_end:
|
||||
ostr << "line end\n";
|
||||
break;
|
||||
case _S_token_opt:
|
||||
ostr << "opt\n";
|
||||
break;
|
||||
case _S_token_or:
|
||||
ostr << "or\n";
|
||||
break;
|
||||
case _S_token_ord_char:
|
||||
ostr << "ordinary character: \"" << _M_value() << "\"\n";
|
||||
break;
|
||||
case _S_token_subexpr_begin:
|
||||
ostr << "subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_end:
|
||||
ostr << "subexpr end\n";
|
||||
break;
|
||||
case _S_token_word_begin:
|
||||
ostr << "word begin\n";
|
||||
break;
|
||||
case _S_token_word_end:
|
||||
ostr << "word end\n";
|
||||
break;
|
||||
case _S_token_unknown:
|
||||
ostr << "-- unknown token --\n";
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler(_InputIter __b, _InputIter __e,
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_Compiler(_FwdIter __b, _FwdIter __e,
|
||||
const _TraitsT& __traits, _FlagT __flags)
|
||||
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
|
||||
_M_state_store(__flags), _M_flags(__flags)
|
||||
{
|
||||
_StateSeqT __r(_M_state_store,
|
||||
_M_state_store._M_insert_subexpr_begin());
|
||||
_M_state_store._M_insert_subexpr_begin());
|
||||
_M_disjunction();
|
||||
if (!_M_stack.empty())
|
||||
{
|
||||
|
@ -544,23 +53,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
__r._M_append(_M_state_store._M_insert_accept());
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token)
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_match_token(_TokenT token)
|
||||
{
|
||||
if (token == _M_scanner._M_token())
|
||||
if (token == _M_scanner._M_get_token())
|
||||
{
|
||||
_M_cur_value = _M_scanner._M_value();
|
||||
_M_value = _M_scanner._M_get_value();
|
||||
_M_scanner._M_advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_disjunction()
|
||||
{
|
||||
this->_M_alternative();
|
||||
|
@ -573,9 +82,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_alternative()
|
||||
{
|
||||
if (this->_M_term())
|
||||
|
@ -591,9 +100,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_term()
|
||||
{
|
||||
if (this->_M_assertion())
|
||||
|
@ -606,37 +115,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
// TODO Implement it.
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_assertion()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_line_begin))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_line_begin);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_line_end))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_line_end);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_word_begin))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_word_begin);
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_word_end))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_word_end);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_quantifier()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_closure0))
|
||||
|
@ -707,15 +197,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_atom()
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_anychar))
|
||||
{
|
||||
const static auto&
|
||||
__any_matcher = [](_CharT) -> bool
|
||||
__any_matcher = [](_CharT __ch) -> bool
|
||||
{ return true; };
|
||||
|
||||
_M_stack.push(_StateSeqT(_M_state_store,
|
||||
|
@ -723,9 +213,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
(__any_matcher)));
|
||||
return true;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_ord_char))
|
||||
if (_M_try_char())
|
||||
{
|
||||
auto __c = _M_cur_value[0];
|
||||
_CharT __c = _M_value[0];
|
||||
__detail::_Matcher<_CharT> f;
|
||||
if (_M_flags & regex_constants::icase)
|
||||
{
|
||||
|
@ -744,7 +234,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_backref))
|
||||
{
|
||||
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
|
||||
_M_stack.push(_StateSeqT(_M_state_store, _M_state_store.
|
||||
_M_insert_backref(_M_cur_int_value(10))));
|
||||
return true;
|
||||
|
@ -770,90 +259,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
return _M_bracket_expression();
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_bracket_expression()
|
||||
{
|
||||
bool __inverse =
|
||||
_M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
|
||||
if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
|
||||
bool __neg =
|
||||
_M_match_token(_ScannerT::_S_token_bracket_neg_begin);
|
||||
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
|
||||
return false;
|
||||
_BMatcherT __matcher( __inverse, _M_traits, _M_flags);
|
||||
// special case: only if _not_ chr first after
|
||||
// '[' or '[^' or if ECMAscript
|
||||
if (!_M_bracket_list(__matcher) // list is empty
|
||||
&& !(_M_flags & regex_constants::ECMAScript))
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
_BMatcherT __matcher(__neg, _M_traits, _M_flags);
|
||||
_M_bracket_list(__matcher);
|
||||
_M_stack.push(_StateSeqT(_M_state_store,
|
||||
_M_state_store._M_insert_matcher(__matcher)));
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
bool // list is non-empty
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_bracket_list(_BMatcherT& __matcher)
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||
return false;
|
||||
return;
|
||||
_M_expression_term(__matcher);
|
||||
_M_bracket_list(__matcher);
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
void
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_expression_term(_BMatcherT& __matcher)
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
||||
{
|
||||
__matcher._M_add_collating_element(_M_cur_value);
|
||||
__matcher._M_add_collating_element(_M_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
|
||||
{
|
||||
__matcher._M_add_equivalence_class(_M_cur_value);
|
||||
__matcher._M_add_equivalence_class(_M_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
||||
{
|
||||
__matcher._M_add_character_class(_M_cur_value);
|
||||
__matcher._M_add_character_class(_M_value);
|
||||
return;
|
||||
}
|
||||
if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
|
||||
if (_M_try_char()) // [a
|
||||
{
|
||||
auto __ch = _M_cur_value[0];
|
||||
if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
|
||||
auto __ch = _M_value[0];
|
||||
if (_M_try_char())
|
||||
{
|
||||
// If the dash is the last character in the bracket expression,
|
||||
// it is not special.
|
||||
if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
|
||||
__matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
|
||||
else // [a-z]
|
||||
if (_M_value[0] == std::use_facet<std::ctype<_CharT>>
|
||||
(_M_traits.getloc()).widen('-')) // [a-
|
||||
{
|
||||
if (!_M_match_token(_ScannerT::_S_token_collelem_single))
|
||||
if (_M_try_char()) // [a-z]
|
||||
{
|
||||
__matcher._M_make_range(__ch, _M_value[0]);
|
||||
return;
|
||||
}
|
||||
// If the dash is the last character in the bracket
|
||||
// expression, it is not special.
|
||||
if (_M_scanner._M_get_token()
|
||||
!= _ScannerT::_S_token_bracket_end)
|
||||
__throw_regex_error(regex_constants::error_range);
|
||||
__matcher._M_make_range(__ch, _M_cur_value[0]);
|
||||
}
|
||||
__matcher._M_add_char(_M_value[0]);
|
||||
}
|
||||
else // [a]
|
||||
__matcher._M_add_char(__ch);
|
||||
__matcher._M_add_char(__ch);
|
||||
return;
|
||||
}
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
}
|
||||
|
||||
template<typename _InputIter, typename _CharT, typename _TraitsT>
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
bool
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_try_char()
|
||||
{
|
||||
bool __is_char = false;
|
||||
if (_M_match_token(_ScannerT::_S_token_oct_num))
|
||||
{
|
||||
__is_char = true;
|
||||
_M_value.assign(1, _M_cur_int_value(8));
|
||||
}
|
||||
else if (_M_match_token(_ScannerT::_S_token_hex_num))
|
||||
{
|
||||
__is_char = true;
|
||||
_M_value.assign(1, _M_cur_int_value(16));
|
||||
}
|
||||
else if (_M_match_token(_ScannerT::_S_token_ord_char))
|
||||
__is_char = true;
|
||||
return __is_char;
|
||||
}
|
||||
|
||||
template<typename _FwdIter, typename _CharT, typename _TraitsT>
|
||||
int
|
||||
_Compiler<_InputIter, _CharT, _TraitsT>::
|
||||
_Compiler<_FwdIter, _CharT, _TraitsT>::
|
||||
_M_cur_int_value(int __radix)
|
||||
{
|
||||
int __v = 0;
|
||||
for (typename _StringT::size_type __i = 0;
|
||||
__i < _M_cur_value.length(); ++__i)
|
||||
__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
|
||||
__i < _M_value.length(); ++__i)
|
||||
__v =__v * __radix + _M_traits.value(_M_value[__i], __radix);
|
||||
return __v;
|
||||
}
|
||||
|
||||
|
@ -861,35 +371,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
bool _BracketMatcher<_CharT, _TraitsT>::
|
||||
operator()(_CharT __ch) const
|
||||
{
|
||||
auto __oldch = __ch;
|
||||
if (_M_flags & regex_constants::collate)
|
||||
if (_M_is_icase())
|
||||
__ch = _M_traits.translate_nocase(__ch);
|
||||
else
|
||||
__ch = _M_traits.translate(__ch);
|
||||
|
||||
bool __ret = false;
|
||||
for (auto __c : _M_char_set)
|
||||
if (__c == __ch)
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
if (!__ret && _M_traits.isctype(__oldch, _M_class_set))
|
||||
if (_M_traits.isctype(__ch, _M_class_set))
|
||||
__ret = true;
|
||||
else
|
||||
{
|
||||
_StringT __s = _M_get_str(__ch);
|
||||
for (auto& __it : _M_range_set)
|
||||
if (__it.first <= __s && __s <= __it.second)
|
||||
__ch = _M_translate(__ch);
|
||||
|
||||
for (auto __c : _M_char_set)
|
||||
if (__c == __ch)
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
if (!__ret)
|
||||
{
|
||||
_StringT __s = _M_get_str(__ch);
|
||||
for (auto& __it : _M_range_set)
|
||||
if (__it.first <= __s && __s <= __it.second)
|
||||
{
|
||||
__ret = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_M_is_non_matching)
|
||||
__ret = !__ret;
|
||||
return __ret;
|
||||
return !__ret;
|
||||
else
|
||||
return __ret;
|
||||
}
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
|
|
|
@ -260,7 +260,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
auto __size = __u.size();
|
||||
for (auto __i = 0; __i < __size; __i++)
|
||||
{
|
||||
auto& __uit = __u[__i], __vit = __v[__i];
|
||||
auto __uit = __u[__i], __vit = __v[__i];
|
||||
if (__uit.matched && !__vit.matched)
|
||||
return true;
|
||||
if (!__uit.matched && __vit.matched)
|
||||
|
|
194
libstdc++-v3/include/bits/regex_scanner.h
Normal file
194
libstdc++-v3/include/bits/regex_scanner.h
Normal file
|
@ -0,0 +1,194 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_scanner.h
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
/**
|
||||
* @addtogroup regex-detail
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief struct _Scanner. Scans an input range for regex tokens.
|
||||
*
|
||||
* The %_Scanner class interprets the regular expression pattern in
|
||||
* the input range passed to its constructor as a sequence of parse
|
||||
* tokens passed to the regular expression compiler. The sequence
|
||||
* of tokens provided depends on the flag settings passed to the
|
||||
* constructor: different regular expression grammars will interpret
|
||||
* the same input pattern in syntactically different ways.
|
||||
*/
|
||||
template<typename _FwdIter>
|
||||
class _Scanner
|
||||
{
|
||||
public:
|
||||
typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
|
||||
typedef std::basic_string<_CharT> _StringT;
|
||||
typedef regex_constants::syntax_option_type _FlagT;
|
||||
typedef const std::ctype<_CharT> _CtypeT;
|
||||
|
||||
/// Token types returned from the scanner.
|
||||
enum _TokenT
|
||||
{
|
||||
_S_token_anychar,
|
||||
_S_token_ord_char,
|
||||
_S_token_oct_num,
|
||||
_S_token_hex_num,
|
||||
_S_token_backref,
|
||||
_S_token_subexpr_begin,
|
||||
_S_token_subexpr_no_group_begin,
|
||||
_S_token_subexpr_lookahead_begin,
|
||||
_S_token_subexpr_neg_lookahead_begin,
|
||||
_S_token_subexpr_end,
|
||||
_S_token_bracket_begin,
|
||||
_S_token_bracket_neg_begin,
|
||||
_S_token_bracket_end,
|
||||
_S_token_interval_begin,
|
||||
_S_token_interval_end,
|
||||
_S_token_quoted_class,
|
||||
_S_token_char_class_name,
|
||||
_S_token_collsymbol,
|
||||
_S_token_equiv_class_name,
|
||||
_S_token_opt,
|
||||
_S_token_or,
|
||||
_S_token_closure0,
|
||||
_S_token_closure1,
|
||||
_S_token_line_begin,
|
||||
_S_token_line_end,
|
||||
_S_token_comma,
|
||||
_S_token_dup_count,
|
||||
_S_token_eof,
|
||||
_S_token_unknown
|
||||
};
|
||||
|
||||
_Scanner(_FwdIter __begin, _FwdIter __end,
|
||||
_FlagT __flags, std::locale __loc);
|
||||
|
||||
void
|
||||
_M_advance();
|
||||
|
||||
_TokenT
|
||||
_M_get_token() const
|
||||
{ return _M_token; }
|
||||
|
||||
const _StringT&
|
||||
_M_get_value() const
|
||||
{ return _M_value; }
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
std::ostream&
|
||||
_M_print(std::ostream&);
|
||||
#endif
|
||||
|
||||
private:
|
||||
enum _StateT
|
||||
{
|
||||
_S_state_normal,
|
||||
_S_state_in_brace,
|
||||
_S_state_in_bracket,
|
||||
};
|
||||
|
||||
void
|
||||
_M_scan_normal();
|
||||
|
||||
void
|
||||
_M_scan_in_bracket();
|
||||
|
||||
void
|
||||
_M_scan_in_brace();
|
||||
|
||||
void
|
||||
_M_eat_escape_ecma();
|
||||
|
||||
void
|
||||
_M_eat_escape_posix();
|
||||
|
||||
void
|
||||
_M_eat_escape_awk();
|
||||
|
||||
void
|
||||
_M_eat_class(char);
|
||||
|
||||
constexpr bool
|
||||
_M_is_ecma()
|
||||
{ return _M_flags & regex_constants::ECMAScript; }
|
||||
|
||||
constexpr bool
|
||||
_M_is_basic()
|
||||
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
|
||||
|
||||
constexpr bool
|
||||
_M_is_extended()
|
||||
{
|
||||
return _M_flags & (regex_constants::extended
|
||||
| regex_constants::egrep
|
||||
| regex_constants::awk);
|
||||
}
|
||||
|
||||
constexpr bool
|
||||
_M_is_grep()
|
||||
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
|
||||
|
||||
constexpr bool
|
||||
_M_is_awk()
|
||||
{ return _M_flags & regex_constants::awk; }
|
||||
|
||||
_StateT _M_state;
|
||||
_FwdIter _M_current;
|
||||
_FwdIter _M_end;
|
||||
_FlagT _M_flags;
|
||||
_CtypeT& _M_ctype;
|
||||
_TokenT _M_token;
|
||||
_StringT _M_value;
|
||||
bool _M_at_bracket_start;
|
||||
public:
|
||||
// TODO: make them static when this file is stable.
|
||||
const std::map<char, _TokenT> _M_token_map;
|
||||
const std::map<char, char> _M_ecma_escape_map;
|
||||
const std::map<char, char> _M_awk_escape_map;
|
||||
const std::set<char> _M_ecma_spec_char;
|
||||
const std::set<char> _M_basic_spec_char;
|
||||
const std::set<char> _M_extended_spec_char;
|
||||
|
||||
const std::map<char, char>& _M_escape_map;
|
||||
const std::set<char>& _M_spec_char;
|
||||
void (_Scanner::* _M_eat_escape)();
|
||||
};
|
||||
|
||||
//@} regex-detail
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace std
|
||||
|
||||
#include <bits/regex_scanner.tcc>
|
609
libstdc++-v3/include/bits/regex_scanner.tcc
Normal file
609
libstdc++-v3/include/bits/regex_scanner.tcc
Normal file
|
@ -0,0 +1,609 @@
|
|||
// class template regex -*- C++ -*-
|
||||
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// Under Section 7 of GPL version 3, you are granted additional
|
||||
// permissions described in the GCC Runtime Library Exception, version
|
||||
// 3.1, as published by the Free Software Foundation.
|
||||
|
||||
// You should have received a copy of the GNU General Public License and
|
||||
// a copy of the GCC Runtime Library Exception along with this program;
|
||||
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
/**
|
||||
* @file bits/regex_scanner.tcc
|
||||
* This is an internal header file, included by other library headers.
|
||||
* Do not attempt to use it directly. @headername{regex}
|
||||
*/
|
||||
|
||||
// TODO make comments doxygen format
|
||||
|
||||
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
|
||||
// and awk
|
||||
// 1) grep is basic except '\n' is treated as '|'
|
||||
// 2) egrep is extended except '\n' is treated as '|'
|
||||
// 3) awk is extended except special escaping rules, and there's no
|
||||
// back-reference.
|
||||
//
|
||||
// References:
|
||||
//
|
||||
// ECMAScript: ECMA-262 15.10
|
||||
//
|
||||
// basic, extended:
|
||||
// http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
|
||||
//
|
||||
// awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html
|
||||
|
||||
namespace std _GLIBCXX_VISIBILITY(default)
|
||||
{
|
||||
namespace __detail
|
||||
{
|
||||
_GLIBCXX_BEGIN_NAMESPACE_VERSION
|
||||
|
||||
template<typename _FwdIter>
|
||||
_Scanner<_FwdIter>::
|
||||
_Scanner(_FwdIter __begin, _FwdIter __end,
|
||||
_FlagT __flags, std::locale __loc)
|
||||
: _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
|
||||
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_normal),
|
||||
_M_at_bracket_start(false),
|
||||
_M_token_map
|
||||
{
|
||||
{'^', _S_token_line_begin},
|
||||
{'$', _S_token_line_end},
|
||||
{'.', _S_token_anychar},
|
||||
{'*', _S_token_closure0},
|
||||
{'+', _S_token_closure1},
|
||||
{'?', _S_token_opt},
|
||||
{'|', _S_token_or},
|
||||
// grep and egrep
|
||||
{'\n', _S_token_or},
|
||||
},
|
||||
_M_ecma_escape_map
|
||||
{
|
||||
{'0', '\0'},
|
||||
{'b', '\b'},
|
||||
{'f', '\f'},
|
||||
{'n', '\n'},
|
||||
{'r', '\r'},
|
||||
{'t', '\t'},
|
||||
{'v', '\v'},
|
||||
},
|
||||
_M_awk_escape_map
|
||||
{
|
||||
{'"', '"'},
|
||||
{'/', '/'},
|
||||
{'\\', '\\'},
|
||||
{'a', '\a'},
|
||||
{'b', '\b'},
|
||||
{'f', '\f'},
|
||||
{'n', '\n'},
|
||||
{'r', '\r'},
|
||||
{'t', '\t'},
|
||||
{'v', '\v'},
|
||||
},
|
||||
_M_escape_map(_M_is_ecma()
|
||||
? _M_ecma_escape_map
|
||||
: _M_awk_escape_map),
|
||||
_M_ecma_spec_char
|
||||
{
|
||||
'^',
|
||||
'$',
|
||||
'\\',
|
||||
'.',
|
||||
'*',
|
||||
'+',
|
||||
'?',
|
||||
'(',
|
||||
')',
|
||||
'[',
|
||||
']',
|
||||
'{',
|
||||
'}',
|
||||
'|',
|
||||
},
|
||||
_M_basic_spec_char
|
||||
{
|
||||
'.',
|
||||
'[',
|
||||
'\\',
|
||||
'*',
|
||||
'^',
|
||||
'$',
|
||||
},
|
||||
_M_extended_spec_char
|
||||
{
|
||||
'.',
|
||||
'[',
|
||||
'\\',
|
||||
'(',
|
||||
')',
|
||||
'*',
|
||||
'+',
|
||||
'?',
|
||||
'{',
|
||||
'|',
|
||||
'^',
|
||||
'$',
|
||||
},
|
||||
_M_eat_escape(_M_is_ecma()
|
||||
? &_Scanner::_M_eat_escape_ecma
|
||||
: &_Scanner::_M_eat_escape_posix),
|
||||
_M_spec_char(_M_is_ecma()
|
||||
? _M_ecma_spec_char
|
||||
: _M_is_basic()
|
||||
? _M_basic_spec_char
|
||||
: _M_extended_spec_char)
|
||||
{ _M_advance(); }
|
||||
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_advance()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
{
|
||||
_M_token = _S_token_eof;
|
||||
return;
|
||||
}
|
||||
|
||||
if (_M_state == _S_state_normal)
|
||||
_M_scan_normal();
|
||||
else if (_M_state == _S_state_in_bracket)
|
||||
_M_scan_in_bracket();
|
||||
else if (_M_state == _S_state_in_brace)
|
||||
_M_scan_in_brace();
|
||||
else
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
|
||||
// Differences between styles:
|
||||
// 1) "\(", "\)", "\{" in basic. It's not escaping.
|
||||
// 2) "(?:", "(?=", "(?!" in ECMAScript.
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_scan_normal()
|
||||
{
|
||||
auto __c = *_M_current++;
|
||||
|
||||
if (__c == '\\')
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
|
||||
if (!_M_is_basic()
|
||||
|| (*_M_current != '('
|
||||
&& *_M_current != ')'
|
||||
&& *_M_current != '{'))
|
||||
{
|
||||
(this->*_M_eat_escape)();
|
||||
return;
|
||||
}
|
||||
__c = *_M_current++;
|
||||
}
|
||||
if (__c == '(')
|
||||
{
|
||||
if (_M_is_ecma() && *_M_current == '?')
|
||||
{
|
||||
if (++_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_paren);
|
||||
|
||||
if (*_M_current == ':')
|
||||
{
|
||||
++_M_current;
|
||||
_M_token = _S_token_subexpr_no_group_begin;
|
||||
}
|
||||
else if (*_M_current == '=')
|
||||
{
|
||||
++_M_current;
|
||||
_M_token = _S_token_subexpr_lookahead_begin;
|
||||
}
|
||||
else if (*_M_current == '!')
|
||||
{
|
||||
++_M_current;
|
||||
_M_token = _S_token_subexpr_neg_lookahead_begin;
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_paren);
|
||||
}
|
||||
else
|
||||
_M_token = _S_token_subexpr_begin;
|
||||
}
|
||||
else if (__c == ')')
|
||||
_M_token = _S_token_subexpr_end;
|
||||
else if (__c == '[')
|
||||
{
|
||||
_M_state = _S_state_in_bracket;
|
||||
_M_at_bracket_start = true;
|
||||
if (_M_current != _M_end && *_M_current == '^')
|
||||
{
|
||||
_M_token = _S_token_bracket_neg_begin;
|
||||
++_M_current;
|
||||
}
|
||||
else
|
||||
_M_token = _S_token_bracket_begin;
|
||||
}
|
||||
else if (__c == '{')
|
||||
{
|
||||
_M_state = _S_state_in_brace;
|
||||
_M_token = _S_token_interval_begin;
|
||||
}
|
||||
else if (_M_spec_char.count(__c)
|
||||
&& __c != ']'
|
||||
&& __c != '}'
|
||||
|| (_M_is_grep() && __c == '\n'))
|
||||
_M_token = _M_token_map.at(__c);
|
||||
else
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
}
|
||||
|
||||
// Differences between styles:
|
||||
// 1) different semantics of "[]" and "[^]".
|
||||
// 2) Escaping in bracket expr.
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_scan_in_bracket()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
|
||||
auto __c = *_M_current++;
|
||||
|
||||
if (__c == '[')
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
|
||||
if (*_M_current == '.')
|
||||
{
|
||||
_M_token = _S_token_collsymbol;
|
||||
_M_eat_class(*_M_current++);
|
||||
}
|
||||
else if (*_M_current == ':')
|
||||
{
|
||||
_M_token = _S_token_char_class_name;
|
||||
_M_eat_class(*_M_current++);
|
||||
}
|
||||
else if (*_M_current == '=')
|
||||
{
|
||||
_M_token = _S_token_equiv_class_name;
|
||||
_M_eat_class(*_M_current++);
|
||||
}
|
||||
else
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
}
|
||||
// In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
|
||||
// literally. So "[]]" or "[^]]" is valid regex. See the testcases
|
||||
// `*/empty_range.cc`.
|
||||
else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
|
||||
{
|
||||
_M_token = _S_token_bracket_end;
|
||||
_M_state = _S_state_normal;
|
||||
}
|
||||
// ECMAScirpt and awk permmits escaping in bracket.
|
||||
else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
|
||||
(this->*_M_eat_escape)();
|
||||
else
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
_M_at_bracket_start = false;
|
||||
}
|
||||
|
||||
// Differences between styles:
|
||||
// 1) "\}" in basic style.
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_scan_in_brace()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_brace);
|
||||
|
||||
auto __c = *_M_current++;
|
||||
|
||||
if (_M_ctype.is(_CtypeT::digit, __c))
|
||||
{
|
||||
_M_token = _S_token_dup_count;
|
||||
_M_value.assign(1, __c);
|
||||
while (_M_current != _M_end
|
||||
&& _M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
_M_value += *_M_current++;
|
||||
}
|
||||
else if (__c == ',')
|
||||
_M_token = _S_token_comma;
|
||||
// basic use \}.
|
||||
else if (_M_is_basic())
|
||||
{
|
||||
if (__c == '\\' && _M_current != _M_end && *_M_current == '}')
|
||||
{
|
||||
_M_state = _S_state_normal;
|
||||
_M_token = _S_token_interval_end;
|
||||
++_M_current;
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_brace);
|
||||
}
|
||||
else if (__c == '}')
|
||||
{
|
||||
_M_state = _S_state_normal;
|
||||
_M_token = _S_token_interval_end;
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_brace);
|
||||
}
|
||||
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_eat_escape_ecma()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
|
||||
auto __c = *_M_current++;
|
||||
|
||||
if (_M_escape_map.count(__c)
|
||||
&& (__c != 'b' || _M_state == _S_state_in_bracket))
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, _M_escape_map.at(__c));
|
||||
}
|
||||
// N3376 28.13
|
||||
else if (__c == 'b'
|
||||
|| __c == 'B'
|
||||
|| __c == 'd'
|
||||
|| __c == 'D'
|
||||
|| __c == 's'
|
||||
|| __c == 'S'
|
||||
|| __c == 'w'
|
||||
|| __c == 'W')
|
||||
{
|
||||
_M_token = _S_token_quoted_class;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
else if (__c == 'c')
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, *_M_current++);
|
||||
}
|
||||
else if (__c == 'x' || __c == 'u')
|
||||
{
|
||||
_M_value.erase();
|
||||
for (int i = 0; i < (__c == 'x' ? 2 : 4); i++)
|
||||
{
|
||||
if (_M_current == _M_end
|
||||
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current))
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
_M_value += *_M_current++;
|
||||
}
|
||||
_M_token = _S_token_hex_num;
|
||||
}
|
||||
// ECMAScript recongnizes multi-digit back-references.
|
||||
else if (_M_ctype.is(_CtypeT::digit, __c))
|
||||
{
|
||||
_M_value.assign(1, __c);
|
||||
while (_M_current != _M_end
|
||||
&& _M_ctype.is(_CtypeT::digit, *_M_current))
|
||||
_M_value += *_M_current++;
|
||||
_M_token = _S_token_backref;
|
||||
}
|
||||
else
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_eat_escape_posix()
|
||||
{
|
||||
if (_M_current == _M_end)
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
|
||||
auto __c = *_M_current;
|
||||
|
||||
if (_M_spec_char.count(__c))
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
// We MUST judge awk before handling backrefs. There's no backref in awk.
|
||||
else if (_M_is_awk())
|
||||
{
|
||||
_M_eat_escape_awk();
|
||||
return;
|
||||
}
|
||||
else if (_M_ctype.is(_CtypeT::digit, __c) && __c != '0')
|
||||
{
|
||||
_M_token = _S_token_backref;
|
||||
_M_value.assign(1, __c);
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
++_M_current;
|
||||
}
|
||||
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_eat_escape_awk()
|
||||
{
|
||||
auto __c = *_M_current++;
|
||||
|
||||
if (_M_escape_map.count(__c))
|
||||
{
|
||||
_M_token = _S_token_ord_char;
|
||||
_M_value.assign(1, _M_escape_map.at(__c));
|
||||
}
|
||||
// \ddd for oct representation
|
||||
else if (_M_ctype.is(_CtypeT::digit, __c)
|
||||
&& __c != '8'
|
||||
&& __c != '9')
|
||||
{
|
||||
_M_value.assign(1, __c);
|
||||
for (int __i = 0;
|
||||
__i < 2
|
||||
&& _M_current != _M_end
|
||||
&& _M_ctype.is(_CtypeT::digit, *_M_current)
|
||||
&& *_M_current != '8'
|
||||
&& *_M_current != '9';
|
||||
__i++)
|
||||
_M_value += *_M_current++;
|
||||
_M_token = _S_token_oct_num;
|
||||
return;
|
||||
}
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_escape);
|
||||
}
|
||||
|
||||
// Eats a character class or throwns an exception.
|
||||
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when
|
||||
// returning.
|
||||
template<typename _FwdIter>
|
||||
void
|
||||
_Scanner<_FwdIter>::
|
||||
_M_eat_class(char __ch)
|
||||
{
|
||||
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
|
||||
_M_value += *_M_current++;
|
||||
if (_M_current == _M_end
|
||||
|| *_M_current++ != __ch
|
||||
|| _M_current == _M_end // skip __ch
|
||||
|| *_M_current++ != ']') // skip ']'
|
||||
if (__ch == ':')
|
||||
__throw_regex_error(regex_constants::error_ctype);
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_collate);
|
||||
}
|
||||
|
||||
#ifdef _GLIBCXX_DEBUG
|
||||
template<typename _FwdIter>
|
||||
std::ostream&
|
||||
_Scanner<_FwdIter>::
|
||||
_M_print(std::ostream& ostr)
|
||||
{
|
||||
switch (_M_token)
|
||||
{
|
||||
case _S_token_anychar:
|
||||
ostr << "any-character\n";
|
||||
break;
|
||||
case _S_token_backref:
|
||||
ostr << "backref\n";
|
||||
break;
|
||||
case _S_token_bracket_begin:
|
||||
ostr << "bracket-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_neg_begin:
|
||||
ostr << "bracket-neg-begin\n";
|
||||
break;
|
||||
case _S_token_bracket_end:
|
||||
ostr << "bracket-end\n";
|
||||
break;
|
||||
case _S_token_char_class_name:
|
||||
ostr << "char-class-name \"" << _M_value << "\"\n";
|
||||
break;
|
||||
case _S_token_closure0:
|
||||
ostr << "closure0\n";
|
||||
break;
|
||||
case _S_token_closure1:
|
||||
ostr << "closure1\n";
|
||||
break;
|
||||
case _S_token_collsymbol:
|
||||
ostr << "collsymbol \"" << _M_value << "\"\n";
|
||||
break;
|
||||
case _S_token_comma:
|
||||
ostr << "comma\n";
|
||||
break;
|
||||
case _S_token_dup_count:
|
||||
ostr << "dup count: " << _M_value << "\n";
|
||||
break;
|
||||
case _S_token_eof:
|
||||
ostr << "EOF\n";
|
||||
break;
|
||||
case _S_token_equiv_class_name:
|
||||
ostr << "equiv-class-name \"" << _M_value << "\"\n";
|
||||
break;
|
||||
case _S_token_interval_begin:
|
||||
ostr << "interval begin\n";
|
||||
break;
|
||||
case _S_token_interval_end:
|
||||
ostr << "interval end\n";
|
||||
break;
|
||||
case _S_token_line_begin:
|
||||
ostr << "line begin\n";
|
||||
break;
|
||||
case _S_token_line_end:
|
||||
ostr << "line end\n";
|
||||
break;
|
||||
case _S_token_opt:
|
||||
ostr << "opt\n";
|
||||
break;
|
||||
case _S_token_or:
|
||||
ostr << "or\n";
|
||||
break;
|
||||
case _S_token_ord_char:
|
||||
ostr << "ordinary character: \"" << _M_value << "\"\n";
|
||||
break;
|
||||
case _S_token_subexpr_begin:
|
||||
ostr << "subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_no_group_begin:
|
||||
ostr << "no grouping subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_lookahead_begin:
|
||||
ostr << "lookahead subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_neg_lookahead_begin:
|
||||
ostr << "neg lookahead subexpr begin\n";
|
||||
break;
|
||||
case _S_token_subexpr_end:
|
||||
ostr << "subexpr end\n";
|
||||
break;
|
||||
case _S_token_unknown:
|
||||
ostr << "-- unknown token --\n";
|
||||
break;
|
||||
case _S_token_oct_num:
|
||||
ostr << "oct number " << _M_value << "\n";
|
||||
break;
|
||||
case _S_token_hex_num:
|
||||
ostr << "hex number " << _M_value << "\n";
|
||||
break;
|
||||
case _S_token_quoted_class:
|
||||
ostr << "quoted class " << "\\" << _M_value << "\n";
|
||||
break;
|
||||
default:
|
||||
_GLIBCXX_DEBUG_ASSERT(false);
|
||||
}
|
||||
return ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
_GLIBCXX_END_NAMESPACE_VERSION
|
||||
} // namespace __detail
|
||||
} // namespace
|
|
@ -56,6 +56,7 @@
|
|||
|
||||
#include <bits/regex_constants.h>
|
||||
#include <bits/regex_error.h>
|
||||
#include <bits/regex_scanner.h>
|
||||
#include <bits/regex_automaton.h>
|
||||
#include <bits/regex_compiler.h>
|
||||
#include <bits/regex_executor.h>
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests awk escaping.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex("\\[", regex_constants::awk);
|
||||
VERIFY(regex_match("\"", regex("[\\\"]", regex_constants::awk)));
|
||||
VERIFY(regex_match("/", regex("/", regex_constants::awk)));
|
||||
VERIFY(regex_match("\a", regex("\\a", regex_constants::awk)));
|
||||
VERIFY(regex_match("\"", regex("\\\"", regex_constants::awk)));
|
||||
VERIFY(regex_match("5", regex("\\65", regex_constants::awk)));
|
||||
VERIFY(regex_match("53", regex("\\0653", regex_constants::awk)));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests ECMAScript empty range.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
#define FAIL(s) \
|
||||
try\
|
||||
{\
|
||||
regex re(s, regex_constants::basic);\
|
||||
VERIFY(false);\
|
||||
}\
|
||||
catch (...)\
|
||||
{\
|
||||
VERIFY(true);\
|
||||
}
|
||||
FAIL("[]");
|
||||
FAIL("[^]");
|
||||
VERIFY(regex_match("]", regex("[]]", regex_constants::basic)));
|
||||
VERIFY(!regex_match("]", regex("[^]]", regex_constants::basic)));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests ECMAScript \x and \u.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
VERIFY(regex_match(":", regex("\\x3a")));
|
||||
VERIFY(regex_match(L"\u1234", wregex(L"\\u1234")));
|
||||
try
|
||||
{
|
||||
regex("\\u400x");
|
||||
VERIFY(false);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
VERIFY(true);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.2 regex_match
|
||||
// Tests ECMAScript empty range.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
VERIFY(!regex_match("x", regex("[]")));
|
||||
VERIFY(regex_match("x", regex("[^]")));
|
||||
VERIFY(!regex_match("]", regex("[]]")));
|
||||
VERIFY(!regex_match("]", regex("[^]]")));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
// { dg-options "-std=gnu++11" }
|
||||
|
||||
//
|
||||
// 2013-08-26 Tim Shen <timshen91@gmail.com>
|
||||
//
|
||||
// Copyright (C) 2013 Free Software Foundation, Inc.
|
||||
//
|
||||
// This file is part of the GNU ISO C++ Library. This library is free
|
||||
// software; you can redistribute it and/or modify it under the
|
||||
// terms of the GNU General Public License as published by the
|
||||
// Free Software Foundation; either version 3, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License along
|
||||
// with this library; see the file COPYING3. If not see
|
||||
// <http://www.gnu.org/licenses/>.
|
||||
|
||||
// 28.11.3 regex_search
|
||||
// Tests BRE against a std::string target.
|
||||
|
||||
#include <regex>
|
||||
#include <testsuite_hooks.h>
|
||||
|
||||
void
|
||||
test01()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
VERIFY(std::regex_search("", std::regex("")));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
test01();
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue