9 #ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 10 #define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED 13 #include <boost/nowide/utf/utf.hpp> 21 static_assert(
sizeof(std::mbstate_t) >= 2,
"mbstate_t is to small to store an UTF-16 codepoint");
24 inline void copy_uint16_t(
void* dst,
const void* src)
26 unsigned char* cdst = static_cast<unsigned char*>(dst);
27 const unsigned char* csrc = static_cast<const unsigned char*>(src);
31 inline std::uint16_t read_state(
const std::mbstate_t& src)
34 copy_uint16_t(&dst, &src);
37 inline void write_state(std::mbstate_t& dst,
const std::uint16_t src)
39 copy_uint16_t(&dst, &src);
49 template<
typename CharType,
int CharSize = sizeof(CharType)>
52 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
54 template<
typename CharType>
55 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 2> :
public std::codecvt<CharType, char, std::mbstate_t>
58 static_assert(
sizeof(CharType) >= 2,
"CharType must be able to store UTF16 code point");
60 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
62 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
65 using uchar = CharType;
67 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override 69 if(detail::read_state(s) != 0)
70 return std::codecvt_base::error;
72 return std::codecvt_base::ok;
74 int do_encoding()
const noexcept
override 78 int do_max_length()
const noexcept
override 82 bool do_always_noconv()
const noexcept
override 88 int do_length(std::mbstate_t& std_state,
const char* from,
const char* from_end,
size_t max)
const override 92 std::uint16_t state = detail::read_state(std_state);
93 const char* save_from = from;
99 while(max > 0 && from < from_end)
101 const char* prev_from = from;
112 if(BOOST_LIKELY(static_cast<size_t>(utf16_traits::width(ch)) <= max))
114 max -= utf16_traits::width(ch);
117 static_assert(utf16_traits::max_width == 2,
"Required for below");
118 std::uint16_t tmpOut[2]{};
119 utf16_traits::encode(ch, tmpOut);
124 detail::write_state(std_state, state);
125 return static_cast<int>(from - save_from);
128 std::codecvt_base::result do_in(std::mbstate_t& std_state,
130 const char* from_end,
131 const char*& from_next,
134 uchar*& to_next)
const override 136 std::codecvt_base::result r = std::codecvt_base::ok;
142 std::uint16_t state = detail::read_state(std_state);
144 if(state && to < to_end)
146 *to++ = static_cast<CharType>(state);
149 while(to < to_end && from < from_end)
151 const char* from_saved = from;
161 r = std::codecvt_base::partial;
165 if(BOOST_LIKELY(utf16_traits::width(ch) <= to_end - to))
167 to = utf16_traits::encode(ch, to);
170 static_assert(utf16_traits::max_width == 2,
"Required for below");
171 std::uint16_t tmpOut[2]{};
172 utf16_traits::encode(ch, tmpOut);
173 *to++ = static_cast<CharType>(tmpOut[0]);
180 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
181 r = std::codecvt_base::partial;
182 detail::write_state(std_state, state);
186 std::codecvt_base::result do_out(std::mbstate_t& std_state,
188 const uchar* from_end,
189 const uchar*& from_next,
192 char*& to_next)
const override 194 std::codecvt_base::result r = std::codecvt_base::ok;
199 std::uint16_t state = detail::read_state(std_state);
200 for(; to < to_end && from < from_end; ++from)
202 std::uint32_t ch = 0;
206 std::uint16_t w1 = state;
207 std::uint16_t w2 = *from;
208 if(BOOST_LIKELY(utf16_traits::is_trail(w2)))
210 ch = utf16_traits::combine_surrogate(w1, w2);
217 std::uint16_t w1 = *from;
218 if(BOOST_LIKELY(utf16_traits::is_single_codepoint(w1)))
221 }
else if(BOOST_LIKELY(utf16_traits::is_first_surrogate(w1)))
235 if(to_end - to < len)
237 r = std::codecvt_base::partial;
245 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
246 r = std::codecvt_base::partial;
247 detail::write_state(std_state, state);
252 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
254 template<
typename CharType>
255 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 4> :
public std::codecvt<CharType, char, std::mbstate_t>
258 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
260 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
263 using uchar = CharType;
265 std::codecvt_base::result
266 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override 269 return std::codecvt_base::noconv;
271 int do_encoding()
const noexcept
override 275 int do_max_length()
const noexcept
override 279 bool do_always_noconv()
const noexcept
override 284 int do_length(std::mbstate_t& ,
const char* from,
const char* from_end,
size_t max)
const override 286 const char* start_from = from;
288 while(max > 0 && from < from_end)
290 const char* save_from = from;
302 return static_cast<int>(from - start_from);
305 std::codecvt_base::result do_in(std::mbstate_t& ,
307 const char* from_end,
308 const char*& from_next,
311 uchar*& to_next)
const override 313 std::codecvt_base::result r = std::codecvt_base::ok;
315 while(to < to_end && from < from_end)
317 const char* from_saved = from;
326 r = std::codecvt_base::partial;
334 if(r == std::codecvt_base::ok && from != from_end)
335 r = std::codecvt_base::partial;
339 std::codecvt_base::result do_out(std::mbstate_t& ,
341 const uchar* from_end,
342 const uchar*& from_next,
345 char*& to_next)
const override 347 std::codecvt_base::result r = std::codecvt_base::ok;
348 while(to < to_end && from < from_end)
350 std::uint32_t ch = 0;
357 if(to_end - to < len)
359 r = std::codecvt_base::partial;
367 if(r == std::codecvt_base::ok && from != from_end)
368 r = std::codecvt_base::partial;
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57
static Iterator encode(code_point value, Iterator out)
Definition: utf8_codecvt.hpp:50
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:33
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:16
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:38
static int width(code_point value)
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:43
static code_point decode(Iterator &p, Iterator e)