DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_std_string.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
11#include "version.h"
12
14#include "daw_json_assert.h"
17
18#include <daw/algorithms/daw_algorithm_copy.h>
19#include <daw/algorithms/daw_algorithm_copy_n.h>
20#include <daw/daw_data_end.h>
21#include <daw/daw_likely.h>
22
23#include <cstddef>
24#include <daw/stdinc/data_access.h>
25#include <daw/stdinc/range_access.h>
26#include <type_traits>
27
28namespace daw::json {
29 inline namespace DAW_JSON_VER {
30 namespace json_details {
31 [[nodiscard]] static inline constexpr UInt8
32 to_nibble( unsigned char chr ) {
33 int const b = static_cast<int>( chr );
34 int const maskLetter = ( ( '9' - b ) >> 31 );
35 int const maskSmall = ( ( 'Z' - b ) >> 31 );
36 int const offset = '0' + ( maskLetter & int( 'A' - '0' - 10 ) ) +
37 ( maskSmall & int( 'a' - 'A' ) );
38 auto const result = static_cast<unsigned>( b - offset );
39 return to_uint8( result );
40 }
41
42 template<bool is_unchecked_input>
43 DAW_ATTRIB_NONNULL( )
44 [[nodiscard]] static inline constexpr UInt16
45 byte_from_nibbles( char const *&first ) {
46 auto const n0 = to_nibble( static_cast<unsigned char>( *first++ ) );
47 auto const n1 = to_nibble( static_cast<unsigned char>( *first++ ) );
48 if constexpr( is_unchecked_input ) {
49 daw_json_ensure( n0 < 16 and n1 < 16, ErrorReason::InvalidUTFEscape );
50 }
51 return to_uint16( ( n0 << 4U ) | n1 );
52 }
53
54 static constexpr char u32toC( UInt32 value ) {
55 return static_cast<char>( static_cast<unsigned char>( value ) );
56 }
57
58 template<typename ParseState>
59 DAW_ATTRIB_NONNULL( )
60 DAW_ATTRIB_RET_NONNULL [[nodiscard]] static constexpr char *decode_utf16(
61 ParseState &parse_state, char *it ) {
62 constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
63 daw_json_assert_weak( parse_state.size( ) >= 5,
64 ErrorReason::UnexpectedEndOfData, parse_state );
65 char const *first = parse_state.first;
66 ++first;
67 UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
68 << 8U;
69 cp |= byte_from_nibbles<is_unchecked_input>( first );
70 if( cp <= 0x7FU ) {
71 *it++ = static_cast<char>( static_cast<unsigned char>( cp ) );
72 parse_state.first = first;
73 return it;
74 }
75
76 //******************************
77 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
78 cp = ( cp - 0xD800U ) * 0x400U;
79 ++first;
81 ( parse_state.last - first >= 5 ) and *first == 'u',
82 ErrorReason::InvalidUTFEscape,
83 parse_state ); // Expected parse_state to start with a \\u
84 ++first;
85 auto trailing =
86 to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
87 trailing |= byte_from_nibbles<is_unchecked_input>( first );
88 trailing -= 0xDC00U;
89 cp += trailing;
90 cp += 0x10000;
91 }
92 // UTF32-> UTF8
93 if( cp >= 0x10000U ) {
94 // 4 bytes
95 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
96 char const enc2 =
97 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
98 char const enc1 =
99 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
100 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
101 *it++ = enc0;
102 *it++ = enc1;
103 *it++ = enc2;
104 *it++ = enc3;
105 parse_state.first = first;
106 return it;
107 }
108 //******************************
109 if( cp >= 0x800U ) {
110 // 3 bytes
111 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
112 char const enc1 =
113 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
114 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
115 *it++ = enc0;
116 *it++ = enc1;
117 *it++ = enc2;
118 parse_state.first = first;
119 return it;
120 }
121 //******************************
122 // cp >= 0x80U
123 // 2 bytes
124 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
125 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
126 *it++ = enc0;
127 *it++ = enc1;
128 parse_state.first = first;
129 return it;
130 }
131
132 template<typename ParseState, typename Appender>
133 static constexpr void decode_utf16( ParseState &parse_state,
134 Appender &app ) {
135 constexpr bool is_unchecked_input = ParseState::is_unchecked_input;
136 char const *first = parse_state.first;
137 ++first;
138 UInt32 cp = to_uint32( byte_from_nibbles<is_unchecked_input>( first ) )
139 << 8U;
140 cp |= byte_from_nibbles<is_unchecked_input>( first );
141 if( cp <= 0x7FU ) {
142 app( u32toC( cp ) );
143 parse_state.first = first;
144 return;
145 }
146 if( 0xD800U <= cp and cp <= 0xDBFFU ) {
147 cp = ( cp - 0xD800U ) * 0x400U;
148 ++first;
149 daw_json_assert_weak( *first == 'u', ErrorReason::InvalidUTFEscape,
150 parse_state );
151 ++first;
152 auto trailing =
153 to_uint32( byte_from_nibbles<is_unchecked_input>( first ) ) << 8U;
154 trailing |= byte_from_nibbles<is_unchecked_input>( first );
155 trailing -= 0xDC00U;
156 cp += trailing;
157 cp += 0x10000;
158 }
159 // UTF32-> UTF8
160 if( cp >= 0x10000U ) {
161 // 4 bytes
162 char const enc3 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
163 char const enc2 =
164 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
165 char const enc1 =
166 u32toC( ( ( cp >> 12U ) & 0b0011'1111U ) | 0b1000'0000U );
167 char const enc0 = u32toC( ( cp >> 18U ) | 0b1111'0000U );
168 app( enc0 );
169 app( enc1 );
170 app( enc2 );
171 app( enc3 );
172 parse_state.first = first;
173 return;
174 }
175 if( cp >= 0x800U ) {
176 // 3 bytes
177 char const enc2 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
178 char const enc1 =
179 u32toC( ( ( cp >> 6U ) & 0b0011'1111U ) | 0b1000'0000U );
180 char const enc0 = u32toC( ( cp >> 12U ) | 0b1110'0000U );
181 app( enc0 );
182 app( enc1 );
183 app( enc2 );
184 parse_state.first = first;
185 return;
186 }
187 // cp >= 0x80U
188 // 2 bytes
189 char const enc1 = u32toC( ( cp & 0b0011'1111U ) | 0b1000'0000U );
190 char const enc0 = u32toC( ( cp >> 6U ) | 0b1100'0000U );
191 app( enc0 );
192 app( enc1 );
193 parse_state.first = first;
194 }
195
196 namespace parse_tokens {
197 inline constexpr char const escape_quotes[] = "\\\"";
198 }
199
200 // Fast path for parsing escaped strings to a std::string with the default
201 // appender
202 template<bool AllowHighEight, typename JsonMember, bool KnownBounds,
203 typename ParseState>
204 [[nodiscard]] static constexpr auto
205 parse_string_known_stdstring( ParseState &parse_state ) {
206 using string_type = json_base_type_t<JsonMember>;
207 string_type result =
208 string_type( std::size( parse_state ) + 1, '\0',
209 parse_state.template get_allocator_for<char>( ) );
210 char *it = std::data( result );
211
212 bool const has_quote = parse_state.front( ) == '"';
213 if( has_quote ) {
214 parse_state.remove_prefix( );
215 }
216
217 if( auto const first_slash =
218 static_cast<std::ptrdiff_t>( parse_state.counter ) - 1;
219 first_slash > 1 ) {
220 it = daw::algorithm::copy_n( parse_state.first, it,
221 static_cast<std::size_t>( first_slash ) )
222 .output;
223 parse_state.first += first_slash;
224 }
225 constexpr auto pred =
226 []( auto const &r ) DAW_JSON_CPP23_STATIC_CALL_OP {
227 if constexpr( ParseState::is_unchecked_input ) {
228 return DAW_LIKELY( r.front( ) != '"' );
229 } else {
230 return DAW_LIKELY( r.has_more( ) ) and ( r.front( ) != '"' );
231 }
232 };
233
234 while( pred( parse_state ) ) {
235 {
236 char const *first = parse_state.first;
237 char const *const last = parse_state.last;
238 if constexpr( std::is_same_v<typename ParseState::exec_tag_t,
240
241 daw_json_assert_weak( KnownBounds or first < last,
242 ErrorReason::UnexpectedEndOfData,
243 parse_state );
244 while( *first != '"' and *first != '\\' ) {
245 ++first;
246 daw_json_assert_weak( KnownBounds or first < last,
247 ErrorReason::UnexpectedEndOfData,
248 parse_state );
249 }
250 } else {
251 first =
252 mem_move_to_next_of<( ParseState::is_unchecked_input or
253 ParseState::is_zero_terminated_string ),
254 '"', '\\'>( ParseState::exec_tag, first,
255 last );
256 }
258 static_cast<std::ptrdiff_t>( result.size( ) ) -
259 std::distance( result.data( ), it ) >=
260 std::distance( parse_state.first, first ),
261 ErrorReason::UnexpectedEndOfData );
262 it = daw::algorithm::copy( parse_state.first, first, it );
263 parse_state.first = first;
264 }
265 if( parse_state.front( ) == '\\' ) {
266 parse_state.remove_prefix( );
267 daw_json_assert_weak( not parse_state.is_space_unchecked( ),
268 ErrorReason::InvalidUTFCodepoint,
269 parse_state );
270 switch( parse_state.front( ) ) {
271 case 'b':
272 *it++ = '\b';
273 parse_state.remove_prefix( );
274 break;
275 case 'f':
276 *it++ = '\f';
277 parse_state.remove_prefix( );
278 break;
279 case 'n':
280 *it++ = '\n';
281 parse_state.remove_prefix( );
282 break;
283 case 'r':
284 *it++ = '\r';
285 parse_state.remove_prefix( );
286 break;
287 case 't':
288 *it++ = '\t';
289 parse_state.remove_prefix( );
290 break;
291 case 'u':
292 it = decode_utf16( parse_state, it );
293 break;
294 case '/':
295 case '\\':
296 case '"':
297 *it++ = parse_state.front( );
298 parse_state.remove_prefix( );
299 break;
300 default:
301 if constexpr( not AllowHighEight ) {
303 ( not parse_state.is_space_unchecked( ) ) &
304 ( static_cast<unsigned char>( parse_state.front( ) ) <=
305 0x7FU ),
306 ErrorReason::InvalidStringHighASCII, parse_state );
307 }
308 *it++ = parse_state.front( );
309 parse_state.remove_prefix( );
310 }
311 } else {
312 daw_json_assert_weak( not has_quote or
313 parse_state.is_quotes_checked( ),
314 ErrorReason::InvalidString, parse_state );
315 }
316 daw_json_assert_weak( not has_quote or parse_state.has_more( ),
317 ErrorReason::UnexpectedEndOfData, parse_state );
318 }
319 auto const sz =
320 static_cast<std::size_t>( std::distance( std::data( result ), it ) );
321 daw_json_assert_weak( std::size( result ) >= sz,
322 ErrorReason::InvalidString, parse_state );
323 result.resize( sz );
324 if constexpr( std::is_convertible_v<string_type,
325 json_result_t<JsonMember>> ) {
326 return result;
327 } else {
328 using constructor_t = json_constructor_t<JsonMember>;
329 construct_value<json_result_t<JsonMember>, constructor_t>(
330 parse_state, std::data( result ), daw::data_end( result ) );
331 }
332 }
333 } // namespace json_details
334 } // namespace DAW_JSON_VER
335} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20