DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_std_string.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
12
17
18#include <daw/algorithms/daw_algorithm_copy.h>
19#include <daw/algorithms/daw_algorithm_copy_n.h>
20#include <daw/daw_data_end.h>
21#include <daw/daw_likely.h>
22#include <daw/daw_not_null.h>
23
24#include <cstddef>
25#include <daw/stdinc/data_access.h>
26#include <daw/stdinc/range_access.h>
27#include <type_traits>
28
29namespace daw::json {
30 inline namespace DAW_JSON_VER {
31 namespace json_details {
32 [[nodiscard]] static constexpr UInt8 to_nibble( unsigned char chr ) {
33 int const b = static_cast<int>( chr );
34 int const maskLetter = ( ( '9' - b ) >> 31 );
35 int const maskSmall = ( ( 'Z' - b ) >> 31 );
36 int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
37 ( maskSmall & int( 'a' - 'A' ) );
38 auto const result = static_cast<unsigned>( b - offset );
39 return to_uint8( result );
40 }
41
42 template<bool is_unchecked_input>
43 DAW_ATTRIB_NONNULL( )
44 [[nodiscard]] static constexpr UInt16
45 byte_from_nibbles( daw::not_null<char const *> &first ) {
46 auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
47 auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
48 if constexpr( is_unchecked_input ) {
49 daw_json_ensure( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
50 }
51 return to_uint16( ( n0 << 4U ) | n1 );
52 }
53
54 static constexpr char u32toC( UInt32 value ) {
55 return static_cast<char>( static_cast<unsigned char>( value ) );
56 }
57
58 template<typename ParseState>
59 [[nodiscard]] static constexpr daw::not_null<char *>
60 decode_utf16( ParseState &parse_state, daw::not_null<char *> it ) {
61 daw_json_assert_weak( parse_state.size( ) >= 5,
62 ErrorReason::UnexpectedEndOfData,
63 parse_state );
64 auto first = daw::not_null<char const *>( parse_state.first );
65 ++first;
66 UInt32 cp =
67 to_uint32(
68 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
69 << 8U;
70 cp |= byte_from_nibbles<ParseState::is_unchecked_input>( first );
71 if( cp <= 0x7FU ) {
72 *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
73 parse_state.first = first;
74 return it;
75 }
76
77 //******************************
78 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
79 cp = ( cp - 0xD800U ) * 0x400U;
80 ++first;
82 ( parse_state.last - first >= 5 ) and *first == 'u',
83 ErrorReason::InvalidUTFEscape,
84 parse_state ); // Expected parse_state to start with a \\u
85 ++first;
86 auto trailing =
87 to_uint32(
88 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
89 << 8U;
90 trailing |=
91 byte_from_nibbles<ParseState::is_unchecked_input>( first );
92 trailing -= 0xDC00U;
93 cp += trailing;
94 cp += 0x10000;
95 }
96 // UTF32-> UTF8
97 if( cp >= 0x10000U ) {
98 // 4 bytes
99 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
100 char const enc2 =
101 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
102 char const enc1 =
103 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
104 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
105 *it++ = enc0;
106 *it++ = enc1;
107 *it++ = enc2;
108 *it++ = enc3;
109 parse_state.first = first;
110 return it;
111 }
112 //******************************
113 if( cp >= 0x800U ) {
114 // 3 bytes
115 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
116 char const enc1 =
117 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
118 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
119 *it++ = enc0;
120 *it++ = enc1;
121 *it++ = enc2;
122 parse_state.first = first;
123 return it;
124 }
125 //******************************
126 // cp >= 0x80U
127 // 2 bytes
128 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
129 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
130 *it++ = enc0;
131 *it++ = enc1;
132 parse_state.first = first;
133 return it;
134 }
135
136 template<typename ParseState, typename Appender>
137 static constexpr void decode_utf16( ParseState &parse_state,
138 Appender &app ) {
139 auto first = daw::not_null<char const *>( parse_state.first );
140 ++first;
141 UInt32 cp =
142 to_uint32(
143 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
144 << 8U;
145 cp |= byte_from_nibbles<ParseState::is_unchecked_input>( first );
146 if( cp <= 0x7FU ) {
147 app( u32toC( cp ) );
148 parse_state.first = first;
149 return;
150 }
151 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
152 cp = ( cp - 0xD800U ) * 0x400U;
153 ++first;
155 *first == 'u', ErrorReason::InvalidUTFEscape, parse_state );
156 ++first;
157 auto trailing =
158 to_uint32(
159 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
160 << 8U;
161 trailing |=
162 byte_from_nibbles<ParseState::is_unchecked_input>( first );
163 trailing -= 0xDC00U;
164 cp += trailing;
165 cp += 0x10000;
166 }
167 // UTF32-> UTF8
168 if( cp >= 0x10000U ) {
169 // 4 bytes
170 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
171 char const enc2 =
172 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
173 char const enc1 =
174 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
175 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
176 app( enc0 );
177 app( enc1 );
178 app( enc2 );
179 app( enc3 );
180 parse_state.first = first;
181 return;
182 }
183 if( cp >= 0x800U ) {
184 // 3 bytes
185 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
186 char const enc1 =
187 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
188 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
189 app( enc0 );
190 app( enc1 );
191 app( enc2 );
192 parse_state.first = first;
193 return;
194 }
195 // cp >= 0x80U
196 // 2 bytes
197 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
198 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
199 app( enc0 );
200 app( enc1 );
201 parse_state.first = first;
202 }
203
204 namespace parse_tokens {
205 inline constexpr char escape_quotes[] = "\\\"";
206 }
207
208 // Fast path for parsing escaped strings to a std::string with the default
209 // appender
210 template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
211 typename ParseState>
212 [[nodiscard]] constexpr auto
213 parse_string_known_stdstring( ParseState &parse_state ) {
214 using string_type = json_base_type_t<JsonMember>;
215 string_type result =
216 string_type( std::size( parse_state ) + 1,
217 '\0',
218 parse_state.template get_allocator_for<char>( ) );
219 daw::not_null<char *> it = std::data( result );
220
221 bool const has_quote = parse_state.front( ) == '"';
222 if( has_quote ) {
223 parse_state.remove_prefix( );
224 }
225
226 if( auto const first_slash =
227 static_cast<std::ptrdiff_t>( parse_state.counter ) - 1;
228 first_slash > 1 ) {
229 it = daw::algorithm::copy_n( parse_state.first,
230 it.get( ),
231 static_cast<std::size_t>( first_slash ) )
232 .output;
233 parse_state.first += first_slash;
234 }
235 DAW_CPP23_STATIC_LOCAL constexpr auto in_json_string =
236 []( auto const &r ) DAW_JSON_CPP23_STATIC_CALL_OP -> bool {
237 if constexpr( not ParseState::is_unchecked_input ) {
238 if( not DAW_LIKELY( r.has_more( ) ) ) {
239 return false;
240 }
241 }
242 return DAW_LIKELY( r.front( ) != '"' );
243 };
244
245 while( in_json_string( parse_state ) ) {
246 {
247 daw::not_null<char const *> first = parse_state.first;
248 daw::not_null<char const *> const last = parse_state.last;
249
250 if( not json_details::use_constexpr_exec_mode<
251 typename ParseState::exec_tag_t>( ) ) {
252 first =
253 mem_move_to_next_of<( ParseState::is_unchecked_input or
254 ParseState::is_zero_terminated_string ),
255 typename ParseState::exec_tag_t,
256 '"',
257 '\\'>( first, last );
258 } else {
259 daw_json_assert_weak( KnownBounds or first < last,
260 ErrorReason::UnexpectedEndOfData,
261 parse_state );
262 while( *first != '"' and *first != '\\' ) {
263 ++first;
264 daw_json_assert_weak( KnownBounds or first < last,
265 ErrorReason::UnexpectedEndOfData,
266 parse_state );
267 }
268 }
270 static_cast<std::ptrdiff_t>( result.size( ) ) -
271 std::distance( result.data( ), it.get( ) ) >=
272 std::distance( parse_state.first, first.get( ) ),
273 ErrorReason::UnexpectedEndOfData );
274 it = daw::algorithm::copy(
275 parse_state.first, first.get( ), it.get( ) );
276 parse_state.first = first;
277 }
278 if( parse_state.front( ) == '\\' ) {
279 parse_state.remove_prefix( );
280 daw_json_assert_weak( not parse_state.is_space_unchecked( ),
281 ErrorReason::InvalidUTFCodepoint,
282 parse_state );
283 switch( parse_state.front( ) ) {
284 case 'b':
285 *it++ = '\b';
286 parse_state.remove_prefix( );
287 break;
288 case 'f':
289 *it++ = '\f';
290 parse_state.remove_prefix( );
291 break;
292 case 'n':
293 *it++ = '\n';
294 parse_state.remove_prefix( );
295 break;
296 case 'r':
297 *it++ = '\r';
298 parse_state.remove_prefix( );
299 break;
300 case 't':
301 *it++ = '\t';
302 parse_state.remove_prefix( );
303 break;
304 case 'u':
305 it = decode_utf16( parse_state, it );
306 break;
307 case '/':
308 case '\\':
309 case '"':
310 *it++ = parse_state.front( );
311 parse_state.remove_prefix( );
312 break;
313 default:
314 if constexpr( not AllowHighEight ) {
316 ( not parse_state.is_space_unchecked( ) ) &
317 ( static_cast<unsigned char>( parse_state.front( ) ) <=
318 0x7FU ),
319 ErrorReason::InvalidStringHighASCII,
320 parse_state );
321 }
322 *it++ = parse_state.front( );
323 parse_state.remove_prefix( );
324 }
325 } else {
326 daw_json_assert_weak( not has_quote or
327 parse_state.is_quotes_checked( ),
328 ErrorReason::InvalidString,
329 parse_state );
330 }
331 daw_json_assert_weak( not has_quote or parse_state.has_more( ),
332 ErrorReason::UnexpectedEndOfData,
333 parse_state );
334 }
335 auto const sz = static_cast<std::size_t>(
336 std::distance( std::data( result ), it.get( ) ) );
338 std::size( result ) >= sz, ErrorReason::InvalidString, parse_state );
339 result.resize( sz );
340 if constexpr( std::is_convertible_v<string_type,
341 json_result_t<JsonMember>> ) {
342 return result;
343 } else {
344 using constructor_t = json_constructor_t<JsonMember>;
345 construct_value<json_result_t<JsonMember>, constructor_t>(
346 parse_state, std::data( result ), daw::data_end( result ) );
347 }
348 }
349 } // namespace json_details
350 } // namespace DAW_JSON_VER
351} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20