DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_std_string.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
12
17
18#include <daw/algorithms/daw_algorithm_copy.h>
19#include <daw/algorithms/daw_algorithm_copy_n.h>
20#include <daw/daw_data_end.h>
21#include <daw/daw_likely.h>
22#include <daw/daw_not_null.h>
23
24#include <cstddef>
25#include <daw/stdinc/data_access.h>
26#include <daw/stdinc/range_access.h>
27#include <type_traits>
28
29namespace daw::json {
30 inline namespace DAW_JSON_VER {
31 namespace json_details {
32 [[nodiscard]] static constexpr UInt8 to_nibble( unsigned char chr ) {
33 int const b = static_cast<int>( chr );
34 int const maskLetter = ( ( '9' - b ) >> 31 );
35 int const maskSmall = ( ( 'Z' - b ) >> 31 );
36 int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
37 ( maskSmall & int( 'a' - 'A' ) );
38 auto const result = static_cast<unsigned>( b - offset );
39 return to_uint8( result );
40 }
41
42 template<bool is_unchecked_input>
43 [[nodiscard]] static constexpr UInt16
44 byte_from_nibbles( daw::not_null<char const *> &first ) {
45 auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
46 auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
47 if constexpr( is_unchecked_input ) {
48 daw_json_ensure( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
49 }
50 return to_uint16( ( n0 << 4U ) | n1 );
51 }
52
53 static constexpr char u32toC( UInt32 value ) {
54 return static_cast<char>( static_cast<unsigned char>( value ) );
55 }
56
57 template<typename ParseState>
58 [[nodiscard]] static constexpr daw::not_null<char *>
59 decode_utf16( ParseState &parse_state, daw::not_null<char *> it ) {
60 daw_json_assert_weak( parse_state.size( ) >= 5,
61 ErrorReason::UnexpectedEndOfData,
62 parse_state );
63 auto first = daw::not_null<char const *>( parse_state.first );
64 ++first;
65 UInt32 cp =
66 to_uint32(
67 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
68 << 8U;
69 cp |= byte_from_nibbles<ParseState::is_unchecked_input>( first );
70 if( cp <= 0x7FU ) {
71 *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
72 parse_state.first = first;
73 return it;
74 }
75
76 //******************************
77 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
78 cp = ( cp - 0xD800U ) * 0x400U;
79 ++first;
81 ( parse_state.last - first >= 5 ) and *first == 'u',
82 ErrorReason::InvalidUTFEscape,
83 parse_state ); // Expected parse_state to start with a \\u
84 ++first;
85 auto trailing =
86 to_uint32(
87 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
88 << 8U;
89 trailing |=
90 byte_from_nibbles<ParseState::is_unchecked_input>( first );
91 trailing -= 0xDC00U;
92 cp += trailing;
93 cp += 0x10000;
94 }
95 // UTF32-> UTF8
96 if( cp >= 0x10000U ) {
97 // 4 bytes
98 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
99 char const enc2 =
100 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
101 char const enc1 =
102 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
103 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
104 *it++ = enc0;
105 *it++ = enc1;
106 *it++ = enc2;
107 *it++ = enc3;
108 parse_state.first = first;
109 return it;
110 }
111 //******************************
112 if( cp >= 0x800U ) {
113 // 3 bytes
114 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
115 char const enc1 =
116 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
117 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
118 *it++ = enc0;
119 *it++ = enc1;
120 *it++ = enc2;
121 parse_state.first = first;
122 return it;
123 }
124 //******************************
125 // cp >= 0x80U
126 // 2 bytes
127 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
128 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
129 *it++ = enc0;
130 *it++ = enc1;
131 parse_state.first = first;
132 return it;
133 }
134
135 template<typename ParseState, typename Appender>
136 static constexpr void decode_utf16( ParseState &parse_state,
137 Appender &app ) {
138 auto first = daw::not_null<char const *>( parse_state.first );
139 ++first;
140 UInt32 cp =
141 to_uint32(
142 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
143 << 8U;
144 cp |= byte_from_nibbles<ParseState::is_unchecked_input>( first );
145 if( cp <= 0x7FU ) {
146 app( u32toC( cp ) );
147 parse_state.first = first;
148 return;
149 }
150 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
151 cp = ( cp - 0xD800U ) * 0x400U;
152 ++first;
154 *first == 'u', ErrorReason::InvalidUTFEscape, parse_state );
155 ++first;
156 auto trailing =
157 to_uint32(
158 byte_from_nibbles<ParseState::is_unchecked_input>( first ) )
159 << 8U;
160 trailing |=
161 byte_from_nibbles<ParseState::is_unchecked_input>( first );
162 trailing -= 0xDC00U;
163 cp += trailing;
164 cp += 0x10000;
165 }
166 // UTF32-> UTF8
167 if( cp >= 0x10000U ) {
168 // 4 bytes
169 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
170 char const enc2 =
171 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
172 char const enc1 =
173 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
174 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
175 app( enc0 );
176 app( enc1 );
177 app( enc2 );
178 app( enc3 );
179 parse_state.first = first;
180 return;
181 }
182 if( cp >= 0x800U ) {
183 // 3 bytes
184 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
185 char const enc1 =
186 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
187 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
188 app( enc0 );
189 app( enc1 );
190 app( enc2 );
191 parse_state.first = first;
192 return;
193 }
194 // cp >= 0x80U
195 // 2 bytes
196 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
197 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
198 app( enc0 );
199 app( enc1 );
200 parse_state.first = first;
201 }
202
203 namespace parse_tokens {
204 inline constexpr char escape_quotes[] = "\\\"";
205 }
206
207 // Fast path for parsing escaped strings to a std::string with the default
208 // appender
209 template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
210 typename ParseState>
211 [[nodiscard]] constexpr auto
212 parse_string_known_stdstring( ParseState &parse_state ) {
213 using string_type = json_base_type_t<JsonMember>;
214 string_type result =
215 string_type( std::size( parse_state ) + 1,
216 '\0',
217 parse_state.template get_allocator_for<char>( ) );
218 daw::not_null<char *> it = std::data( result );
219
220 bool const has_quote = parse_state.front( ) == '"';
221 if( has_quote ) {
222 parse_state.remove_prefix( );
223 }
224
225 if( auto const first_slash =
226 static_cast<std::ptrdiff_t>( parse_state.counter ) - 1;
227 first_slash > 1 ) {
228 it = daw::algorithm::copy_n( parse_state.first,
229 it.get( ),
230 static_cast<std::size_t>( first_slash ) )
231 .output;
232 parse_state.first += first_slash;
233 }
234 DAW_CPP23_STATIC_LOCAL constexpr auto in_json_string =
235 []( auto const &r ) DAW_JSON_CPP23_STATIC_CALL_OP -> bool {
236 if constexpr( not ParseState::is_unchecked_input ) {
237 if( not DAW_LIKELY( r.has_more( ) ) ) {
238 return false;
239 }
240 }
241 return DAW_LIKELY( r.front( ) != '"' );
242 };
243
244 while( in_json_string( parse_state ) ) {
245 {
246 daw::not_null<char const *> first = parse_state.first;
247 daw::not_null<char const *> const last = parse_state.last;
248
249 if( not json_details::use_constexpr_exec_mode<
250 typename ParseState::exec_tag_t>( ) ) {
251 first =
252 mem_move_to_next_of<( ParseState::is_unchecked_input or
253 ParseState::is_zero_terminated_string ),
254 typename ParseState::exec_tag_t,
255 '"',
256 '\\'>( first, last );
257 } else {
258 daw_json_assert_weak( KnownBounds or first < last,
259 ErrorReason::UnexpectedEndOfData,
260 parse_state );
261 while( *first != '"' and *first != '\\' ) {
262 ++first;
263 daw_json_assert_weak( KnownBounds or first < last,
264 ErrorReason::UnexpectedEndOfData,
265 parse_state );
266 }
267 }
269 static_cast<std::ptrdiff_t>( result.size( ) ) -
270 std::distance( result.data( ), it.get( ) ) >=
271 std::distance( parse_state.first, first.get( ) ),
272 ErrorReason::UnexpectedEndOfData );
273 it = daw::algorithm::copy(
274 parse_state.first, first.get( ), it.get( ) );
275 parse_state.first = first;
276 }
277 if( parse_state.front( ) == '\\' ) {
278 parse_state.remove_prefix( );
279 daw_json_assert_weak( not parse_state.is_space_unchecked( ),
280 ErrorReason::InvalidUTFCodepoint,
281 parse_state );
282 switch( parse_state.front( ) ) {
283 case 'b':
284 *it++ = '\b';
285 parse_state.remove_prefix( );
286 break;
287 case 'f':
288 *it++ = '\f';
289 parse_state.remove_prefix( );
290 break;
291 case 'n':
292 *it++ = '\n';
293 parse_state.remove_prefix( );
294 break;
295 case 'r':
296 *it++ = '\r';
297 parse_state.remove_prefix( );
298 break;
299 case 't':
300 *it++ = '\t';
301 parse_state.remove_prefix( );
302 break;
303 case 'u':
304 it = decode_utf16( parse_state, it );
305 break;
306 case '/':
307 case '\\':
308 case '"':
309 *it++ = parse_state.front( );
310 parse_state.remove_prefix( );
311 break;
312 default:
313 if constexpr( not AllowHighEight ) {
315 ( not parse_state.is_space_unchecked( ) ) &
316 ( static_cast<unsigned char>( parse_state.front( ) ) <=
317 0x7FU ),
318 ErrorReason::InvalidStringHighASCII,
319 parse_state );
320 }
321 *it++ = parse_state.front( );
322 parse_state.remove_prefix( );
323 }
324 } else {
325 daw_json_assert_weak( not has_quote or
326 parse_state.is_quotes_checked( ),
327 ErrorReason::InvalidString,
328 parse_state );
329 }
330 daw_json_assert_weak( not has_quote or parse_state.has_more( ),
331 ErrorReason::UnexpectedEndOfData,
332 parse_state );
333 }
334 auto const sz = static_cast<std::size_t>(
335 std::distance( std::data( result ), it.get( ) ) );
337 std::size( result ) >= sz, ErrorReason::InvalidString, parse_state );
338 result.resize( sz );
339 if constexpr( std::is_convertible_v<string_type,
340 json_result_t<JsonMember>> ) {
341 return result;
342 } else {
343 using constructor_t = json_constructor_t<JsonMember>;
344 construct_value<json_result_t<JsonMember>, constructor_t>(
345 parse_state, std::data( result ), daw::data_end( result ) );
346 }
347 }
348 } // namespace json_details
349 } // namespace DAW_JSON_VER
350} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20