Boost.Locale
types.hpp
1 //
2 // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See
5 // accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
9 #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
10 
11 #include <boost/locale/config.hpp>
12 #include <boost/cstdint.hpp>
13 #include <boost/assert.hpp>
14 #ifdef BOOST_MSVC
15 # pragma warning(push)
16 # pragma warning(disable : 4275 4251 4231 4660)
17 #endif
18 
19 
20 namespace boost {
21 
22  namespace locale {
23 
27  namespace boundary {
35 
41  word,
44  };
45 
51  typedef uint32_t rule_type;
52 
57  static const rule_type
58  word_none = 0x0000F,
59  word_number = 0x000F0,
60  word_letter = 0x00F00,
61  word_kana = 0x0F000,
62  word_ideo = 0xF0000,
63  word_any = 0xFFFF0,
64  word_letters = 0xFFF00,
65  word_kana_ideo = 0xFF000,
66  word_mask = 0xFFFFF;
67 
73  static const rule_type
74  line_soft = 0x0F,
75  line_hard = 0xF0,
76  line_any = 0xFF,
77  line_mask = 0xFF;
78 
80 
86  static const rule_type
87  sentence_term = 0x0F,
88  sentence_sep = 0xF0,
90  sentence_any = 0xFF,
92  sentence_mask = 0xFF;
93 
95 
102  static const rule_type
105 
107 
112  {
113  switch(t) {
114  case character: return character_mask;
115  case word: return word_mask;
116  case sentence: return sentence_mask;
117  case line: return line_mask;
118  default: return 0;
119  }
120  }
121 
125 
126  } // boundary
127  } // locale
128 } // boost
129 
130 
131 #ifdef BOOST_MSVC
132 #pragma warning(pop)
133 #endif
134 
135 #endif
136 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
boundary_type
Definition: types.hpp:39
static const rule_type character_any
Not in use, just for consistency.
Definition: types.hpp:103
Analyse the text for positions suitable for line breaks.
Definition: types.hpp:43
static const rule_type sentence_sep
The sentence does not contain terminator like ".", "!" but ended with hard separator like CR...
Definition: types.hpp:89
static const rule_type word_letter
Word that contains letters, excluding kana and ideographic characters.
Definition: types.hpp:60
static const rule_type word_any
Any word including numbers, 0 is special flag, equivalent to 15.
Definition: types.hpp:63
static const rule_type line_any
Soft or Hard line break.
Definition: types.hpp:76
uint32_t rule_type
Flags used with word boundary analysis – the type of the word, line or sentence boundary found...
Definition: types.hpp:51
static const rule_type line_hard
Hard line break: like break is required (as per CR/LF)
Definition: types.hpp:75
static const rule_type sentence_term
The sentence was terminated with a sentence terminator like ".", "!" possible followed by hard separa...
Definition: types.hpp:87
static const rule_type line_mask
Select all types of line breaks.
Definition: types.hpp:77
static const rule_type word_ideo
Word that contains ideographic characters.
Definition: types.hpp:62
static const rule_type word_letters
Any word, excluding numbers but including letters, kana and ideograms.
Definition: types.hpp:64
rule_type boundary_rule(boundary_type t)
Definition: types.hpp:111
static const rule_type sentence_mask
Select all sentence breaking points.
Definition: types.hpp:92
Analyse the text for character boundaries.
Definition: types.hpp:40
static const rule_type word_number
Word that appear to be a number.
Definition: types.hpp:59
static const rule_type line_soft
Soft line break: optional but not required.
Definition: types.hpp:74
static const rule_type word_kana
Word that contains kana characters.
Definition: types.hpp:61
Analyse the text for Find sentence boundaries.
Definition: types.hpp:42
static const rule_type word_none
Not a word, like white space or punctuation mark.
Definition: types.hpp:58
static const rule_type character_mask
Select all character breaking points.
Definition: types.hpp:104
Analyse the text for word boundaries.
Definition: types.hpp:41
static const rule_type word_mask
Definition: types.hpp:66
static const rule_type word_kana_ideo
Word that includes kana or ideographic characters.
Definition: types.hpp:65
static const rule_type sentence_any
Either first or second sentence break type;.
Definition: types.hpp:91