DAW JSON Link
daw_json_parse_string_quote.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "version.h"
12 
13 #include "daw_json_assert.h"
15 
16 #include <daw/daw_traits.h>
17 #include <daw/daw_uint_buffer.h>
18 
19 #include <cstddef>
20 #include <type_traits>
21 
22 namespace daw::json {
23  inline namespace DAW_JSON_VER {
24  namespace json_details::string_quote {
25  template<std::size_t N, char c>
26  inline constexpr UInt8 test_at_byte( UInt64 b ) {
27  auto const lhs = b & ( 0xFF_u64 << ( N * 8U ) );
28  using rhs = daw::constant<to_uint64( static_cast<unsigned char>( c ) )
29  << ( N * 8U )>;
30  return to_uint8( not( lhs - rhs::value ) );
31  }
32 
33  template<std::size_t N, char c>
34  DAW_ATTRIB_INLINE constexpr UInt8 test_at_byte( UInt32 b ) {
35  auto const lhs = b & ( 0xFF_u32 << ( N * 8U ) );
36  using rhs = daw::constant<to_uint32( static_cast<unsigned char>( c ) )
37  << ( N * 8U )>;
38  return to_uint8( not( lhs - rhs::value ) );
39  }
40 
41  template<typename CharT>
42  DAW_ATTRIB_NONNULL( )
43  inline constexpr void skip_to_first8( CharT *&first, CharT *const last ) {
44  bool keep_going = last - first >= 8;
45  while( keep_going ) {
46  auto buff = daw::to_uint64_buffer( first );
47  auto const q7 = test_at_byte<7U, '"'>( buff );
48  auto const q6 = test_at_byte<6U, '"'>( buff );
49  auto const q5 = test_at_byte<5U, '"'>( buff );
50  auto const q4 = test_at_byte<4U, '"'>( buff );
51  auto const q3 = test_at_byte<3U, '"'>( buff );
52  auto const q2 = test_at_byte<2U, '"'>( buff );
53  auto const q1 = test_at_byte<1U, '"'>( buff );
54  auto const q0 = test_at_byte<0U, '"'>( buff );
55  auto const s7 = test_at_byte<7U, '\\'>( buff );
56  auto const s6 = test_at_byte<6U, '\\'>( buff );
57  auto const s5 = test_at_byte<5U, '\\'>( buff );
58  auto const s4 = test_at_byte<4U, '\\'>( buff );
59  auto const s3 = test_at_byte<3U, '\\'>( buff );
60  auto const s2 = test_at_byte<2U, '\\'>( buff );
61  auto const s1 = test_at_byte<1U, '\\'>( buff );
62  auto const s0 = test_at_byte<0U, '\\'>( buff );
63 
64  keep_going = not( q0 | q1 | q2 | q3 | q4 | q5 | q6 | q7 | s0 | s1 |
65  s2 | s3 | s4 | s5 | s6 | s7 );
66  keep_going = keep_going & static_cast<bool>( last - first >= 16 );
67  first += static_cast<int>( keep_going ) * 8;
68  }
69  first -= *( first - 1 ) == '\\' ? 1 : 0;
70  }
71 
72  template<typename CharT>
73  DAW_ATTRIB_NONNULL( )
74  inline constexpr void skip_to_first4( CharT *&first, CharT *const last ) {
75  bool keep_going = last - first >= 4;
76  while( keep_going ) {
77  // Need to look for escapes as this is fast path
78  auto buff = daw::to_uint32_buffer( first );
79  auto const q3 = test_at_byte<3U, '"'>( buff );
80  auto const q2 = test_at_byte<2U, '"'>( buff );
81  auto const q1 = test_at_byte<1U, '"'>( buff );
82  auto const q0 = test_at_byte<0U, '"'>( buff );
83  auto const s3 = test_at_byte<3U, '\\'>( buff );
84  auto const s2 = test_at_byte<2U, '\\'>( buff );
85  auto const s1 = test_at_byte<1U, '\\'>( buff );
86  auto const s0 = test_at_byte<0U, '\\'>( buff );
87  keep_going = not( q0 | q1 | q2 | q3 | s0 | s1 | s2 | s3 );
88  keep_going = keep_going & static_cast<bool>( last - first >= 8 );
89  first += static_cast<int>( keep_going ) * 4;
90  }
91  first -= *( first - 1 ) == '\\' ? 1 : 0;
92  }
93 
94  namespace string_quote_parser {
95  template<typename ParseState>
96  [[nodiscard]] static constexpr std::size_t
97  parse_nq_uncheck( ParseState &parse_state ) {
98  using CharT = typename ParseState::CharT;
99  std::ptrdiff_t need_slow_path = -1;
100  CharT *first = parse_state.first;
101  CharT *const last = parse_state.last;
102  // This is a logic error to happen.
103  // daw_json_assert_weak( first != '"', "Unexpected quote", parse_state
104  // );
105  if constexpr( daw::traits::not_same_v<typename ParseState::exec_tag_t,
106  constexpr_exec_tag> ) {
107  first = mem_skip_until_end_of_string<true>(
108  ParseState::exec_tag, first, last, need_slow_path );
109  } else {
110 
111  {
112  auto const sz = last - first;
113  if( sz >= 8 ) {
114  skip_to_first8( first, last );
115  } else if( sz >= 4 ) {
116  skip_to_first4( first, last );
117  }
118  }
119  while( *first != '"' ) {
120  while( []( char c ) DAW_JSON_CPP23_STATIC_CALL_OP {
121  return ( c != '"' ) & ( c != '\\' );
122  }( *first ) ) {
123  ++first;
124  }
125  if( *first == '\\' ) {
126  if( need_slow_path < 0 ) {
127  need_slow_path = first - parse_state.first;
128  }
129  first += 2;
130  } else {
131  break;
132  }
133  }
134  }
135  parse_state.first = first;
136  return static_cast<std::size_t>( need_slow_path );
137  }
138 
139  template<typename ParseState>
140  [[nodiscard]] static constexpr std::size_t
141  parse_nq_check( ParseState &parse_state ) {
142 
143  using CharT = typename ParseState::CharT;
144  std::ptrdiff_t need_slow_path = -1;
145  CharT *first = parse_state.first;
146  CharT *const last = parse_state.class_last;
147  if constexpr( daw::traits::not_same_v<typename ParseState::exec_tag_t,
148  constexpr_exec_tag> ) {
149  first = mem_skip_until_end_of_string<false>(
150  ParseState::exec_tag, first, last, need_slow_path );
151  } else {
152  if constexpr( not ParseState::exclude_special_escapes ) {
153  if( CharT *const l = parse_state.last; l - first >= 8 ) {
154  skip_to_first8( first, l );
155  } else if( last - first >= 4 ) {
156  skip_to_first4( first, l );
157  }
158  }
159  if constexpr( ParseState::is_zero_terminated_string ) {
160  if constexpr( ParseState::exclude_special_escapes ) {
161  while( *first != '\0' ) {
162  char c = *first;
163  daw_json_ensure( static_cast<unsigned char>( c ) >= 0x20U,
164  ErrorReason::InvalidString, parse_state );
165  if( c == '\\' ) {
166  daw_json_ensure( last - first > 1,
167  ErrorReason::InvalidString, parse_state );
168  if( need_slow_path < 0 ) {
169  need_slow_path = first - parse_state.first;
170  }
171  ++first;
172  c = *first;
173  switch( c ) {
174  case '"':
175  case '\\':
176  case '/':
177  case 'b':
178  case 'f':
179  case 'n':
180  case 'r':
181  case 't':
182  case 'u':
183  break;
184  default:
185  daw_json_error( ErrorReason::InvalidString, parse_state );
186  }
187  } else if( c == '"' ) {
188  break;
189  }
190  ++first;
191  }
192  } else {
193  while( ( *first != 0 ) & ( *first != '"' ) ) {
194  while( ( *first != 0 ) & ( *first != '"' ) &
195  ( *first != '\\' ) ) {
196  ++first;
197  }
198 
199  if( ( ( *first != 0 ) & ( *first == '\\' ) ) ) {
200  if( need_slow_path < 0 ) {
201  need_slow_path = first - parse_state.first;
202  }
203  first += 2;
204  } else {
205  break;
206  }
207  }
208  }
209  } else {
210  if constexpr( ParseState::exclude_special_escapes ) {
211  while( first < last ) {
212  char c = *first;
213  daw_json_ensure( static_cast<unsigned char>( c ) >= 0x20U,
214  ErrorReason::InvalidString, parse_state );
215  if( c == '\\' ) {
216  daw_json_ensure( last - first > 1,
217  ErrorReason::InvalidString, parse_state );
218  if( need_slow_path < 0 ) {
219  need_slow_path = first - parse_state.first;
220  }
221  ++first;
222  c = *first;
223  switch( c ) {
224  case '"':
225  case '\\':
226  case '/':
227  case 'b':
228  case 'f':
229  case 'n':
230  case 'r':
231  case 't':
232  case 'u':
233  break;
234  default:
235  daw_json_error( ErrorReason::InvalidString, parse_state );
236  }
237  } else if( c == '"' ) {
238  break;
239  }
240  ++first;
241  }
242  } else {
243  while( first < last and *first != '"' ) {
244  while( first < last and
245  ( ( *first != '"' ) & ( *first != '\\' ) ) ) {
246  ++first;
247  }
248 
249  if( first < last and *first == '\\' ) {
250  if( need_slow_path < 0 ) {
251  need_slow_path = first - parse_state.first;
252  }
253  first += 2;
254  } else {
255  break;
256  }
257  }
258  }
259  }
260  }
261  if constexpr( ParseState::is_zero_terminated_string ) {
262  daw_json_assert_weak( *first == '"', ErrorReason::InvalidString,
263  parse_state );
264  } else {
265  daw_json_assert_weak( first < last and *first == '"',
266  ErrorReason::InvalidString, parse_state );
267  }
268  parse_state.first = first;
269  return static_cast<std::size_t>( need_slow_path );
270  }
271 
272  template<typename ParseState>
273  [[nodiscard]] DAW_ATTRIB_FLATTEN static constexpr std::size_t
274  parse_nq( ParseState &parse_state ) {
275  if constexpr( ParseState::is_unchecked_input ) {
276  return parse_nq_uncheck( parse_state );
277  } else {
278  return parse_nq_check( parse_state );
279  }
280  }
281  } // namespace string_quote_parser
282  } // namespace json_details::string_quote
283  } // namespace DAW_JSON_VER
284 } // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
This is in addition to the parse policy. Always do a full name match instead of sometimes relying on ...
DAW_ATTRIB_NOINLINE void daw_json_error(ErrorReason reason)
std::bool_constant< is_zero_terminated_string_v< T > > is_zero_terminated_string
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:25