8#ifndef BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED
9#define BOOST_NOWIDE_UTF8_CODECVT_HPP_INCLUDED
12#include <boost/nowide/utf/utf.hpp>
20 static_assert(
sizeof(std::mbstate_t) >= 2,
"mbstate_t is to small to store an UTF-16 codepoint");
23 inline void copy_uint16_t(
void* dst,
const void* src)
25 unsigned char* cdst =
static_cast<unsigned char*
>(dst);
26 const unsigned char* csrc =
static_cast<const unsigned char*
>(src);
30 inline std::uint16_t read_state(
const std::mbstate_t& src)
33 copy_uint16_t(&dst, &src);
36 inline void write_state(std::mbstate_t& dst,
const std::uint16_t src)
38 copy_uint16_t(&dst, &src);
48 template<
typename CharType,
int CharSize = sizeof(CharType)>
51 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
53 template<
typename CharType>
54 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 2> :
public std::codecvt<CharType, char, std::mbstate_t>
57 static_assert(
sizeof(CharType) >= 2,
"CharType must be able to store UTF16 code point");
59 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
61 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
64 using uchar = CharType;
66 std::codecvt_base::result do_unshift(std::mbstate_t& s,
char* from,
char* ,
char*& next)
const override
68 if(detail::read_state(s) != 0)
69 return std::codecvt_base::error;
71 return std::codecvt_base::ok;
73 int do_encoding()
const noexcept override
77 int do_max_length()
const noexcept override
81 bool do_always_noconv()
const noexcept override
87 int do_length(std::mbstate_t& std_state,
const char* from,
const char* from_end,
size_t max)
const override
91 std::uint16_t state = detail::read_state(std_state);
92 const char* save_from = from;
98 while(max > 0 && from < from_end)
100 const char* prev_from = from;
111 if(BOOST_LIKELY(
static_cast<size_t>(utf16_traits::width(ch)) <= max))
113 max -= utf16_traits::width(ch);
116 static_assert(utf16_traits::max_width == 2,
"Required for below");
117 std::uint16_t tmpOut[2]{};
118 utf16_traits::encode(ch, tmpOut);
123 detail::write_state(std_state, state);
124 return static_cast<int>(from - save_from);
127 std::codecvt_base::result do_in(std::mbstate_t& std_state,
129 const char* from_end,
130 const char*& from_next,
133 uchar*& to_next)
const override
135 std::codecvt_base::result r = std::codecvt_base::ok;
141 std::uint16_t state = detail::read_state(std_state);
143 if(state && to < to_end)
145 *to++ =
static_cast<CharType
>(state);
148 while(to < to_end && from < from_end)
150 const char* from_saved = from;
160 r = std::codecvt_base::partial;
164 if(BOOST_LIKELY(utf16_traits::width(ch) <= to_end - to))
166 to = utf16_traits::encode(ch, to);
169 static_assert(utf16_traits::max_width == 2,
"Required for below");
170 std::uint16_t tmpOut[2]{};
171 utf16_traits::encode(ch, tmpOut);
172 *to++ =
static_cast<CharType
>(tmpOut[0]);
179 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
180 r = std::codecvt_base::partial;
181 detail::write_state(std_state, state);
185 std::codecvt_base::result do_out(std::mbstate_t& std_state,
187 const uchar* from_end,
188 const uchar*& from_next,
191 char*& to_next)
const override
193 std::codecvt_base::result r = std::codecvt_base::ok;
198 std::uint16_t state = detail::read_state(std_state);
199 for(; to < to_end && from < from_end; ++from)
201 std::uint32_t ch = 0;
205 std::uint16_t w1 = state;
206 std::uint16_t w2 = *from;
207 if(BOOST_LIKELY(utf16_traits::is_trail(w2)))
209 ch = utf16_traits::combine_surrogate(w1, w2);
216 std::uint16_t w1 = *from;
217 if(BOOST_LIKELY(utf16_traits::is_single_codepoint(w1)))
220 }
else if(BOOST_LIKELY(utf16_traits::is_first_surrogate(w1)))
234 if(to_end - to < len)
236 r = std::codecvt_base::partial;
244 if(r == std::codecvt_base::ok && (from != from_end || state != 0))
245 r = std::codecvt_base::partial;
246 detail::write_state(std_state, state);
251 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_BEGIN
253 template<
typename CharType>
254 class BOOST_SYMBOL_VISIBLE
utf8_codecvt<CharType, 4> :
public std::codecvt<CharType, char, std::mbstate_t>
257 utf8_codecvt(
size_t refs = 0) : std::codecvt<CharType, char, std::mbstate_t>(refs)
259 BOOST_NOWIDE_SUPPRESS_UTF_CODECVT_DEPRECATION_END
262 using uchar = CharType;
264 std::codecvt_base::result
265 do_unshift(std::mbstate_t& ,
char* from,
char* ,
char*& next)
const override
268 return std::codecvt_base::noconv;
270 int do_encoding()
const noexcept override
274 int do_max_length()
const noexcept override
278 bool do_always_noconv()
const noexcept override
283 int do_length(std::mbstate_t& ,
const char* from,
const char* from_end,
size_t max)
const override
285 const char* start_from = from;
287 while(max > 0 && from < from_end)
289 const char* save_from = from;
301 return static_cast<int>(from - start_from);
304 std::codecvt_base::result do_in(std::mbstate_t& ,
306 const char* from_end,
307 const char*& from_next,
310 uchar*& to_next)
const override
312 std::codecvt_base::result r = std::codecvt_base::ok;
314 while(to < to_end && from < from_end)
316 const char* from_saved = from;
325 r = std::codecvt_base::partial;
333 if(r == std::codecvt_base::ok && from != from_end)
334 r = std::codecvt_base::partial;
338 std::codecvt_base::result do_out(std::mbstate_t& ,
340 const uchar* from_end,
341 const uchar*& from_next,
344 char*& to_next)
const override
346 std::codecvt_base::result r = std::codecvt_base::ok;
347 while(to < to_end && from < from_end)
349 std::uint32_t ch = 0;
356 if(to_end - to < len)
358 r = std::codecvt_base::partial;
366 if(r == std::codecvt_base::ok && from != from_end)
367 r = std::codecvt_base::partial;
Definition: utf8_codecvt.hpp:49
bool is_valid_codepoint(code_point v)
the function checks if v is a valid code point
Definition: utf.hpp:42
static const code_point incomplete
Special constant that defines incomplete code point.
Definition: utf.hpp:37
static const code_point illegal
Special constant that defines illegal code point.
Definition: utf.hpp:32
#define BOOST_NOWIDE_REPLACEMENT_CHARACTER
Definition: replacement.hpp:15
UTF Traits class - functions to convert UTF sequences to and from Unicode code points.
Definition: utf.hpp:57