Boost C++ Libraries

...one of the most highly regarded and expertly designed C++ library projects in the world. Herb Sutter and Andrei Alexandrescu, C++ Coding Standards

This is the documentation for an old version of Boost. Click here to view this page for the latest version.

boost/format/parsing.hpp

// ----------------------------------------------------------------------------
// parsing.hpp :  implementation of the parsing member functions
//                      ( parse, parse_printf_directive)
// ----------------------------------------------------------------------------

//  Copyright Samuel Krempp 2003. Use, modification, and distribution are
//  subject to the Boost Software License, Version 1.0. (See accompanying
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

// see http://www.boost.org/libs/format for library home page

// ----------------------------------------------------------------------------

#ifndef BOOST_FORMAT_PARSING_HPP
#define BOOST_FORMAT_PARSING_HPP


#include <boost/format/format_class.hpp>
#include <boost/format/exceptions.hpp>
#include <boost/throw_exception.hpp>
#include <boost/assert.hpp>
#include <boost/config.hpp>
#include <boost/core/ignore_unused.hpp>

namespace boost {
namespace io {
namespace detail {

#if defined(BOOST_NO_STD_LOCALE)
    // streams will be used for narrow / widen. but these methods are not const
    template<class T>
    T& const_or_not(const T& x) {
        return const_cast<T&> (x);
    }
#else
    template<class T>
    const T& const_or_not(const T& x) {
        return x;
    }
#endif

    template<class Ch, class Facet> inline
    char wrap_narrow(const Facet& fac, Ch c, char deflt) {
        return const_or_not(fac).narrow(c, deflt);
    }

    template<class Ch, class Facet> inline
    bool wrap_isdigit(const Facet& fac, Ch c) {
#if ! defined( BOOST_NO_LOCALE_ISDIGIT )
        return fac.is(std::ctype<Ch>::digit, c);
# else
        ignore_unused(fac);
        using namespace std;
        return isdigit(c) != 0;
#endif
    }

    template<class Iter, class Facet>
    Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) {
        using namespace std;
        for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ;
        return beg;
    }


    // Input : [start, last) iterators range and a
    //          a Facet to use its widen/narrow member function
    // Effects : read sequence and convert digits into integral n, of type Res
    // Returns : n
    template<class Res, class Iter, class Facet>
    Iter str2int (const Iter & start, const Iter & last, Res & res,
                 const Facet& fac)
    {
        using namespace std;
        Iter it;
        res=0;
        for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) {
            char cur_ch = wrap_narrow(fac, *it, 0); // cant fail.
            res *= 10;
            res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard
        }
        return it;
    }

    // auxiliary func called by parse_printf_directive
    // for centralising error handling
    // it either throws if user sets the corresponding flag, or does nothing.
    inline void maybe_throw_exception(unsigned char exceptions,
                                      std::size_t pos, std::size_t size)
    {
        if(exceptions & io::bad_format_string_bit)
            boost::throw_exception(io::bad_format_string(pos, size) );
    }


    // Input: the position of a printf-directive in the format-string
    //    a basic_ios& merely to use its widen/narrow member function
    //    a bitset'exceptions' telling whether to throw exceptions on errors.
    // Returns:
    //  true if parse succeeded (ignore some errors if exceptions disabled)
    //  false if it failed so bad that the directive should be printed verbatim
    // Effects:
    //  start is incremented so that *start is the first char after
    //     this directive
    //  *fpar is set with the parameters read in the directive
    template<class Ch, class Tr, class Alloc, class Iter, class Facet>
    bool parse_printf_directive(Iter & start, const Iter& last,
                                detail::format_item<Ch, Tr, Alloc> * fpar,
                                const Facet& fac,
                                std::size_t offset, unsigned char exceptions)
    {
        typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t;

        fpar->argN_ = format_item_t::argN_no_posit;  // if no positional-directive
        bool precision_set = false;
        bool in_brackets=false;
        Iter start0 = start;
        std::size_t fstring_size = last-start0+offset;
        char mssiz = 0;

        if(start>= last) { // empty directive : this is a trailing %
                maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
                return false;
        }

        if(*start== const_or_not(fac).widen( '|')) {
            in_brackets=true;
            if( ++start >= last ) {
                maybe_throw_exception(exceptions, start-start0 + offset, fstring_size);
                return false;
            }
        }

        // the flag '0' would be picked as a digit for argument order, but here it's a flag :
        if(*start== const_or_not(fac).widen( '0'))
            goto parse_flags;

        // handle argument order (%2$d)  or possibly width specification: %2d
        if(wrap_isdigit(fac, *start)) {
            int n;
            start = str2int(start, last, n, fac);
            if( start >= last ) {
                maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
                return false;
            }

            // %N% case : this is already the end of the directive
            if( *start ==  const_or_not(fac).widen( '%') ) {
                fpar->argN_ = n-1;
                ++start;
                if( in_brackets)
                    maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
                return true;
            }

            if ( *start== const_or_not(fac).widen( '$') ) {
                fpar->argN_ = n-1;
                ++start;
            }
            else {
                // non-positional directive
                fpar->fmtstate_.width_ = n;
                fpar->argN_  = format_item_t::argN_no_posit;
                goto parse_precision;
            }
        }

      parse_flags:
        // handle flags
        while (start != last) { // as long as char is one of + - = _ # 0 or ' '
            switch ( wrap_narrow(fac, *start, 0)) {
                case '\'':
                    break; // no effect yet. (painful to implement)
                case '-':
                    fpar->fmtstate_.flags_ |= std::ios_base::left;
                    break;
                case '=':
                    fpar->pad_scheme_ |= format_item_t::centered;
                    break;
                case '_':
                    fpar->fmtstate_.flags_ |= std::ios_base::internal;
                    break;
                case ' ':
                    fpar->pad_scheme_ |= format_item_t::spacepad;
                    break;
                case '+':
                    fpar->fmtstate_.flags_ |= std::ios_base::showpos;
                    break;
                case '0':
                    fpar->pad_scheme_ |= format_item_t::zeropad;
                    // need to know alignment before really setting flags,
                    // so just add 'zeropad' flag for now, it will be processed later.
                    break;
                case '#':
                    fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase;
                    break;
                default:
                    goto parse_width;
            }
            ++start;
        } // loop on flag.

        if( start>=last) {
            maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
            return true;
        }

      // first skip 'asterisk fields' : * or num (length)
      parse_width:
        if(*start == const_or_not(fac).widen( '*') )
            ++start;
        else if(start!=last && wrap_isdigit(fac, *start))
            start = str2int(start, last, fpar->fmtstate_.width_, fac);

      parse_precision:
        if( start>= last) {
            maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
            return true;
        }
        // handle precision spec
        if (*start== const_or_not(fac).widen( '.')) {
            ++start;
            if(start != last && *start == const_or_not(fac).widen( '*') )
                ++start;
            else if(start != last && wrap_isdigit(fac, *start)) {
                start = str2int(start, last, fpar->fmtstate_.precision_, fac);
                precision_set = true;
            }
            else
                fpar->fmtstate_.precision_ =0;
        }

      // argument type modifiers
        while (start != last) {
            switch (wrap_narrow(fac, *start, 0)) {
                case 'h':
                case 'l':
                case 'j':
                case 'z':
                case 'L':
                    // boost::format ignores argument type modifiers as it relies on
                    // the type of the argument fed into it by operator %
                    break;

                // Note that the ptrdiff_t argument type 't' from C++11 is not honored
                // because it was already in use as the tabulation specifier in boost::format
                // case 't':

                // Microsoft extensions:
                // https://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx

                case 'w':
                    break;
                case 'I':
                    mssiz = 'I';
                    break;
                case '3':
                    if (mssiz != 'I') {
                        maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
                        return true;
                    }
                    mssiz = '3';
                    break;
                case '2':
                    if (mssiz != '3') {
                        maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
                        return true;
                    }
                    mssiz = 0x00;
                    break;
                case '6':
                    if (mssiz != 'I') {
                        maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
                        return true;
                    }
                    mssiz = '6';
                    break;
                case '4':
                    if (mssiz != '6') {
                        maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
                        return true;
                    }
                    mssiz = 0x00;
                    break;
                default:
                    if (mssiz && mssiz == 'I') {
                        mssiz = 0;
                    }
                    goto parse_conversion_specification;
            }
            ++start;
        } // loop on argument type modifiers to pick up 'hh', 'll', and the more complex microsoft ones

      parse_conversion_specification:
        if (start >= last || mssiz) {
            maybe_throw_exception(exceptions, start - start0 + offset, fstring_size);
            return true;
        }

        if( in_brackets && *start== const_or_not(fac).widen( '|') ) {
            ++start;
            return true;
        }

        // The default flags are "dec" and "skipws"
        // so if changing the base, need to unset basefield first

        switch (wrap_narrow(fac, *start, 0))
        {
            // Boolean
            case 'b':
                fpar->fmtstate_.flags_ |= std::ios_base::boolalpha;
                break;

            // Decimal
            case 'u':
            case 'd':
            case 'i':
                // Defaults are sufficient
                break;

            // Hex
            case 'X':
                fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
                BOOST_FALLTHROUGH;
            case 'x':
            case 'p': // pointer => set hex.
                fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
                fpar->fmtstate_.flags_ |= std::ios_base::hex;
                break;

            // Octal
            case 'o':
                fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
                fpar->fmtstate_.flags_ |= std::ios_base::oct;
                break;

            // Floating
            case 'A':
                fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
                BOOST_FALLTHROUGH;
            case 'a':
                fpar->fmtstate_.flags_ &= ~std::ios_base::basefield;
                fpar->fmtstate_.flags_ |= std::ios_base::fixed;
                fpar->fmtstate_.flags_ |= std::ios_base::scientific;
                break;
            case 'E':
                fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
                BOOST_FALLTHROUGH;
            case 'e':
                fpar->fmtstate_.flags_ |= std::ios_base::scientific;
                break;
            case 'F':
                fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
                BOOST_FALLTHROUGH;
            case 'f':
                fpar->fmtstate_.flags_ |= std::ios_base::fixed;
                break;
            case 'G':
                fpar->fmtstate_.flags_ |= std::ios_base::uppercase;
                BOOST_FALLTHROUGH;
            case 'g':
                // default flags are correct here
                break;

            // Tabulation (a boost::format extension)
            case 'T':
                ++start;
                if( start >= last) {
                    maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
                    return false;
                } else {
                    fpar->fmtstate_.fill_ = *start;
                }
                fpar->pad_scheme_ |= format_item_t::tabulation;
                fpar->argN_ = format_item_t::argN_tabulation;
                break;
            case 't':
                fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' ');
                fpar->pad_scheme_ |= format_item_t::tabulation;
                fpar->argN_ = format_item_t::argN_tabulation;
                break;

            // Character
            case 'C':
            case 'c':
                fpar->truncate_ = 1;
                break;

            // String
            case 'S':
            case 's':
                if(precision_set) // handle truncation manually, with own parameter.
                    fpar->truncate_ = fpar->fmtstate_.precision_;
                fpar->fmtstate_.precision_ = 6; // default stream precision.
                break;

            // %n is insecure and ignored by boost::format
            case 'n' :
                fpar->argN_ = format_item_t::argN_ignored;
                break;

            default:
                maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
        }
        ++start;

        if( in_brackets ) {
            if( start != last && *start== const_or_not(fac).widen( '|') ) {
                ++start;
                return true;
            }
            else  maybe_throw_exception(exceptions, start-start0+offset, fstring_size);
        }
        return true;
    }
    // -end parse_printf_directive()

    template<class String, class Facet>
    int upper_bound_from_fstring(const String& buf,
                                 const typename String::value_type arg_mark,
                                 const Facet& fac,
                                 unsigned char exceptions)
    {
        // quick-parsing of the format-string to count arguments mark (arg_mark, '%')
        // returns : upper bound on the number of format items in the format strings
        using namespace boost::io;
        typename String::size_type i1=0;
        int num_items=0;
        while( (i1=buf.find(arg_mark,i1)) != String::npos ) {
            if( i1+1 >= buf.size() ) {
                if(exceptions & bad_format_string_bit)
                    boost::throw_exception(bad_format_string(i1, buf.size() )); // must not end in ".. %"
                else {
                  ++num_items;
                  break;
                }
            }
            if(buf[i1+1] == buf[i1] ) {// escaped "%%"
                i1+=2; continue;
            }

            ++i1;
            // in case of %N% directives, dont count it double (wastes allocations..) :
            i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin();
            if( i1 < buf.size() && buf[i1] == arg_mark )
                ++i1;
            ++num_items;
        }
        return num_items;
    }
    template<class String> inline
    void append_string(String& dst, const String& src,
                       const typename String::size_type beg,
                       const typename String::size_type end) {
        dst.append(src.begin()+beg, src.begin()+end);
    }

} // detail namespace
} // io namespace



// -----------------------------------------------
//  format :: parse(..)

    template<class Ch, class Tr, class Alloc>
    basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>::
    parse (const string_type& buf) {
        // parse the format-string
        using namespace std;
#if !defined(BOOST_NO_STD_LOCALE)
        const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc());
#else
        io::basic_oaltstringstream<Ch, Tr, Alloc> fac;
        //has widen and narrow even on compilers without locale
#endif

        const Ch arg_mark = io::detail::const_or_not(fac).widen( '%');
        bool ordered_args=true;
        int max_argN=-1;

        // A: find upper_bound on num_items and allocates arrays
        int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions());
        make_or_reuse_data(num_items);

        // B: Now the real parsing of the format string :
        num_items=0;
        typename string_type::size_type i0=0, i1=0;
        typename string_type::const_iterator it;
        bool special_things=false;
        int cur_item=0;
        while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) {
            string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
            if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%'
                io::detail::append_string(piece, buf, i0, i1+1);
                i1+=2; i0=i1;
                continue;
            }
            BOOST_ASSERT(  static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0);

            if(i1!=i0) {
                io::detail::append_string(piece, buf, i0, i1);
                i0=i1;
            }
            ++i1;
            it = buf.begin()+i1;
            bool parse_ok = io::detail::parse_printf_directive(
                it, buf.end(), &items_[cur_item], fac, i1, exceptions());
            i1 = it - buf.begin();
            if( ! parse_ok ) // the directive will be printed verbatim
                continue;
            i0=i1;
            items_[cur_item].compute_states(); // process complex options, like zeropad, into params

            int argN=items_[cur_item].argN_;
            if(argN == format_item_t::argN_ignored)
                continue;
            if(argN ==format_item_t::argN_no_posit)
                ordered_args=false;
            else if(argN == format_item_t::argN_tabulation) special_things=true;
            else if(argN > max_argN) max_argN = argN;
            ++num_items;
            ++cur_item;
        } // loop on %'s
        BOOST_ASSERT(cur_item == num_items);

        // store the final piece of string
        {
            string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_;
            io::detail::append_string(piece, buf, i0, buf.size());
        }

        if( !ordered_args) {
            if(max_argN >= 0 ) {  // dont mix positional with non-positionnal directives
                if(exceptions() & io::bad_format_string_bit)
                    boost::throw_exception(
                        io::bad_format_string(static_cast<std::size_t>(max_argN), 0));
                // else do nothing. => positionnal arguments are processed as non-positionnal
            }
            // set things like it would have been with positional directives :
            int non_ordered_items = 0;
            for(int i=0; i< num_items; ++i)
                if(items_[i].argN_ == format_item_t::argN_no_posit) {
                    items_[i].argN_ = non_ordered_items;
                    ++non_ordered_items;
                }
            max_argN = non_ordered_items-1;
        }

        // C: set some member data :
        items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) );

        if(special_things) style_ |= special_needs;
        num_args_ = max_argN + 1;
        if(ordered_args) style_ |=  ordered;
        else style_ &= ~ordered;
        return *this;
    }

} // namespace boost


#endif //  BOOST_FORMAT_PARSING_HPP