boost/spirit/home/lex/tokenize_and_parse.hpp
// Copyright (c) 2001-2011 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
#define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM
#if defined(_MSC_VER)
#pragma once
#endif
#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parse.hpp>
#include <boost/spirit/home/qi/nonterminal/grammar.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/lex/lexer.hpp>
#include <boost/mpl/assert.hpp>
namespace boost { namespace phoenix
{
template <typename Expr>
struct actor;
}}
namespace boost { namespace spirit { namespace lex
{
///////////////////////////////////////////////////////////////////////////
// Import skip_flag enumerator type from Qi namespace
using qi::skip_flag;
///////////////////////////////////////////////////////////////////////////
//
// The tokenize_and_parse() function is one of the main Spirit API
// functions. It simplifies using a lexer as the underlying token source
// while parsing a given input sequence.
//
// The function takes a pair of iterators spanning the underlying input
// stream to parse, the lexer object (built from the token definitions)
// and a parser object (built from the parser grammar definition).
//
// The second version of this function additionally takes an attribute to
// be used as the top level data structure instance the parser should use
// to store the recognized input to.
//
// The function returns true if the parsing succeeded (the given input
// sequence has been successfully matched by the given grammar).
//
// first, last: The pair of iterators spanning the underlying input
// sequence to parse. These iterators must at least
// conform to the requirements of the std::intput_iterator
// category.
// On exit the iterator 'first' will be updated to the
// position right after the last successfully matched
// token.
// lex: The lexer object (encoding the token definitions) to be
// used to convert the input sequence into a sequence of
// tokens. This token sequence is passed to the parsing
// process. The LexerExpr type must conform to the
// lexer interface described in the corresponding section
// of the documentation.
// xpr: The grammar object (encoding the parser grammar) to be
// used to match the token sequence generated by the lex
// object instance. The ParserExpr type must conform to
// the grammar interface described in the corresponding
// section of the documentation.
// attr: The top level attribute passed to the parser. It will
// be populated during the parsing of the input sequence.
// On exit it will hold the 'parser result' corresponding
// to the matched input sequence.
//
///////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer, typename ParserExpr>
inline bool
tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
ParserExpr const& xpr)
{
// Report invalid expression error as early as possible.
// If you got an error_invalid_expression error message here,
// then the expression (expr) is not a valid spirit qi expression.
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
typename Lexer::iterator_type iter = lex.begin(first, last);
return compile<qi::domain>(xpr).parse(
iter, lex.end(), unused, unused, unused);
}
///////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer, typename ParserExpr
, typename Attribute>
inline bool
tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
, ParserExpr const& xpr, Attribute& attr)
{
// Report invalid expression error as early as possible.
// If you got an error_invalid_expression error message here,
// then the expression (expr) is not a valid spirit qi expression.
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
typename Lexer::iterator_type iter = lex.begin(first, last);
return compile<qi::domain>(xpr).parse(
iter, lex.end(), unused, unused, attr);
}
///////////////////////////////////////////////////////////////////////////
//
// The tokenize_and_phrase_parse() function is one of the main Spirit API
// functions. It simplifies using a lexer as the underlying token source
// while phrase parsing a given input sequence.
//
// The function takes a pair of iterators spanning the underlying input
// stream to parse, the lexer object (built from the token definitions)
// and a parser object (built from the parser grammar definition). The
// additional skipper parameter will be used as the skip parser during
// the parsing process.
//
// The second version of this function additionally takes an attribute to
// be used as the top level data structure instance the parser should use
// to store the recognized input to.
//
// The function returns true if the parsing succeeded (the given input
// sequence has been successfully matched by the given grammar).
//
// first, last: The pair of iterators spanning the underlying input
// sequence to parse. These iterators must at least
// conform to the requirements of the std::intput_iterator
// category.
// On exit the iterator 'first' will be updated to the
// position right after the last successfully matched
// token.
// lex: The lexer object (encoding the token definitions) to be
// used to convert the input sequence into a sequence of
// tokens. This token sequence is passed to the parsing
// process. The LexerExpr type must conform to the
// lexer interface described in the corresponding section
// of the documentation.
// xpr: The grammar object (encoding the parser grammar) to be
// used to match the token sequence generated by the lex
// object instance. The ParserExpr type must conform to
// the grammar interface described in the corresponding
// section of the documentation.
// skipper: The skip parser to be used while parsing the given
// input sequence. Note, the skip parser will have to
// act on the same token sequence as the main parser
// 'xpr'.
// post_skip: The post_skip flag controls whether the function will
// invoke an additional post skip after the main parser
// returned.
// attr: The top level attribute passed to the parser. It will
// be populated during the parsing of the input sequence.
// On exit it will hold the 'parser result' corresponding
// to the matched input sequence.
//
///////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer, typename ParserExpr
, typename Skipper>
inline bool
tokenize_and_phrase_parse(Iterator& first, Iterator last
, Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
, BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
{
// Report invalid expression error as early as possible.
// If you got an error_invalid_expression error message here,
// then the expression (expr) is not a valid spirit qi expression.
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
typedef
typename spirit::result_of::compile<qi::domain, Skipper>::type
skipper_type;
skipper_type const skipper_ = compile<qi::domain>(skipper);
typename Lexer::iterator_type iter = lex.begin(first, last);
typename Lexer::iterator_type end = lex.end();
if (!compile<qi::domain>(xpr).parse(
iter, end, unused, skipper_, unused))
return false;
// do a final post-skip
if (post_skip == skip_flag::postskip)
qi::skip_over(iter, end, skipper_);
return true;
}
template <typename Iterator, typename Lexer, typename ParserExpr
, typename Skipper, typename Attribute>
inline bool
tokenize_and_phrase_parse(Iterator& first, Iterator last
, Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
, BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
{
// Report invalid expression error as early as possible.
// If you got an error_invalid_expression error message here,
// then the expression (expr) is not a valid spirit qi expression.
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
typedef
typename spirit::result_of::compile<qi::domain, Skipper>::type
skipper_type;
skipper_type const skipper_ = compile<qi::domain>(skipper);
typename Lexer::iterator_type iter = lex.begin(first, last);
typename Lexer::iterator_type end = lex.end();
if (!compile<qi::domain>(xpr).parse(
iter, end, unused, skipper_, attr))
return false;
// do a final post-skip
if (post_skip == skip_flag::postskip)
qi::skip_over(iter, end, skipper_);
return true;
}
///////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer, typename ParserExpr
, typename Skipper, typename Attribute>
inline bool
tokenize_and_phrase_parse(Iterator& first, Iterator last
, Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
, Attribute& attr)
{
return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
, skip_flag::postskip, attr);
}
///////////////////////////////////////////////////////////////////////////
//
// The tokenize() function is one of the main Spirit API functions. It
// simplifies using a lexer to tokenize a given input sequence. It's main
// purpose is to use the lexer to tokenize all the input.
//
// The second version below discards all generated tokens afterwards.
// This is useful whenever all the needed functionality has been
// implemented directly inside the lexer semantic actions, which are being
// executed while the tokens are matched.
//
// The function takes a pair of iterators spanning the underlying input
// stream to scan, the lexer object (built from the token definitions),
// and a (optional) functor being called for each of the generated tokens.
//
// The function returns true if the scanning of the input succeeded (the
// given input sequence has been successfully matched by the given token
// definitions).
//
// first, last: The pair of iterators spanning the underlying input
// sequence to parse. These iterators must at least
// conform to the requirements of the std::intput_iterator
// category.
// On exit the iterator 'first' will be updated to the
// position right after the last successfully matched
// token.
// lex: The lexer object (encoding the token definitions) to be
// used to convert the input sequence into a sequence of
// tokens. The LexerExpr type must conform to the
// lexer interface described in the corresponding section
// of the documentation.
// f: A functor (callable object) taking a single argument of
// the token type and returning a bool, indicating whether
// the tokenization should be canceled.
// initial_state: The name of the state the lexer should start matching.
// The default value is zero, causing the lexer to start
// in its 'INITIAL' state.
//
///////////////////////////////////////////////////////////////////////////
namespace detail
{
template <typename Token, typename F>
bool tokenize_callback(Token const& t, F f)
{
return f(t);
}
template <typename Token, typename Eval>
bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
{
f(t);
return true;
}
template <typename Token>
bool tokenize_callback(Token const& t, void (*f)(Token const&))
{
f(t);
return true;
}
template <typename Token>
bool tokenize_callback(Token const& t, bool (*f)(Token const&))
{
return f(t);
}
}
template <typename Iterator, typename Lexer, typename F>
inline bool
tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
, typename Lexer::char_type const* initial_state = 0)
{
typedef typename Lexer::iterator_type iterator_type;
iterator_type iter = lex.begin(first, last, initial_state);
iterator_type end = lex.end();
for (/**/; iter != end && token_is_valid(*iter); ++iter)
{
if (!detail::tokenize_callback(*iter, f))
return false;
}
return (iter == end) ? true : false;
}
///////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
inline bool
tokenize(Iterator& first, Iterator last, Lexer const& lex
, typename Lexer::char_type const* initial_state = 0)
{
typedef typename Lexer::iterator_type iterator_type;
iterator_type iter = lex.begin(first, last, initial_state);
iterator_type end = lex.end();
while (iter != end && token_is_valid(*iter))
++iter;
return (iter == end) ? true : false;
}
}}}
#endif