DAW JSON Link
daw_not_const_ex_functions.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "version.h"
12 
13 #include "daw_json_assert.h"
14 #include "daw_json_exec_modes.h"
15 
16 #include <daw/daw_attributes.h>
17 #include <daw/daw_cpp_feature_check.h>
18 #include <daw/daw_cxmath.h>
19 #include <daw/daw_do_n.h>
20 #include <daw/daw_likely.h>
21 #include <daw/daw_uint_buffer.h>
22 #include <daw/daw_unreachable.h>
23 
24 #if defined( DAW_ALLOW_SSE42 )
25 #include <emmintrin.h>
26 #include <nmmintrin.h>
27 #include <smmintrin.h>
28 #include <tmmintrin.h>
29 #include <wmmintrin.h>
30 #include <xmmintrin.h>
31 #if defined( DAW_HAS_MSVC_LIKE )
32 #include <intrin.h>
33 #endif
34 #endif
35 
36 #include <cstddef>
37 #include <cstring>
38 
39 namespace daw::json {
40  inline namespace DAW_JSON_VER {
41  namespace json_details {
42  DAW_ATTRIB_NONNULL( )
43  DAW_ATTRIB_INLINE
44  constexpr bool is_escaped( char const *ptr, char const *min_ptr ) {
45  if( *( ptr - 1 ) != '\\' ) {
46  return false;
47  }
48  if( ( ptr - min_ptr ) < 2 ) {
49  return false;
50  }
51  return *( ptr - 2 ) != '\\';
52  }
53 
54 #if defined( DAW_ALLOW_SSE42 )
55  struct key_table_t {
56  alignas( 16 ) bool values[256] = { };
57 
58  constexpr bool operator[]( char idx ) const {
59  return values[static_cast<unsigned char>( idx )];
60  }
61  };
62 
63  template<char... keys>
64  static constexpr inline key_table_t key_table = [] {
65  auto result = key_table_t{ };
66  (void)( ( result.values[static_cast<unsigned char>( keys )] = true ) |
67  ... );
68  return result;
69  }( );
70 #endif
71 
72  inline std::ptrdiff_t find_lsb_set( runtime_exec_tag, UInt32 value ) {
73 #if DAW_HAS_BUILTIN( __builtin_ffs )
74  return __builtin_ffs( static_cast<int>( value ) ) - 1;
75 #elif defined( DAW_HAS_MSVC_LIKE )
76  unsigned long index;
77  _BitScanForward( &index, static_cast<int>( value ) );
78  return static_cast<std::ptrdiff_t>( index );
79 #else
80  std::ptrdiff_t result = 0;
81  if( value == 0 ) {
82  return -1;
83  }
84  while( ( value & 1 ) == 0 ) {
85  value >>= 1;
86  ++result;
87  }
88  return result;
89 #endif
90  }
91 
92 #if defined( DAW_ALLOW_SSE42 )
93  DAW_ATTRIB_INLINE __m128i
94  set_reverse( char c0, char c1 = 0, char c2 = 0, char c3 = 0, char c4 = 0,
95  char c5 = 0, char c6 = 0, char c7 = 0, char c8 = 0,
96  char c9 = 0, char c10 = 0, char c11 = 0, char c12 = 0,
97  char c13 = 0, char c14 = 0, char c15 = 0 ) {
98  return _mm_set_epi8( c15, c14, c13, c12, c11, c10, c9, c8, c7, c6, c5,
99  c4, c3, c2, c1, c0 );
100  }
101 
102  DAW_ATTRIB_INLINE __m128i uload16_char_data( sse42_exec_tag,
103  char const *ptr ) {
104  return _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) );
105  }
106 
107  DAW_ATTRIB_INLINE __m128i load16_char_data( sse42_exec_tag,
108  char const *ptr ) {
109  return _mm_load_si128( reinterpret_cast<__m128i const *>( ptr ) );
110  }
111 
112  template<char k>
113  DAW_ATTRIB_INLINE UInt32 mem_find_eq( sse42_exec_tag, __m128i block ) {
114  __m128i const keys = _mm_set1_epi8( k );
115  __m128i const found = _mm_cmpeq_epi8( block, keys );
116  return to_uint32( _mm_movemask_epi8( found ) );
117  }
118 
119  template<unsigned char k>
120  DAW_ATTRIB_INLINE UInt32 mem_find_gt( sse42_exec_tag, __m128i block ) {
121  static __m128i const keys = _mm_set1_epi8( k );
122  __m128i const found = _mm_cmpgt_epi8( block, keys );
123  return to_uint32( _mm_movemask_epi8( found ) );
124  }
125 
126  template<bool is_unchecked_input, char... keys, typename CharT>
127  DAW_ATTRIB_INLINE CharT *mem_move_to_next_of( sse42_exec_tag tag,
128  CharT *first,
129  CharT *const last ) {
130 
131  while( last - first >= 16 ) {
132  auto const val0 = uload16_char_data( tag, first );
133  auto const key_positions = ( mem_find_eq<keys>( tag, val0 ) | ... );
134  if( key_positions != 0 ) {
135  return first + find_lsb_set( tag, key_positions );
136  }
137  first += 16;
138  }
139  __m128i val1{ };
140  auto const max_pos = last - first;
141  memcpy( &val1, first, static_cast<std::size_t>( max_pos ) );
142  auto const key_positions = ( mem_find_eq<keys>( tag, val1 ) | ... );
143  if( key_positions != 0 ) {
144  auto const offset = find_lsb_set( tag, key_positions );
145  if( offset >= max_pos ) {
146  return last;
147  }
148  return first + offset;
149  }
150  return last;
151  }
152 
153  template<bool is_unchecked_input, char... keys, typename CharT>
154  DAW_ATTRIB_INLINE CharT *
155  mem_move_to_next_not_of( sse42_exec_tag tag, CharT *first, CharT *last ) {
156  using keys_len = daw::constant<static_cast<int>( sizeof...( keys ) )>;
157  using compare_mode = daw::constant<static_cast<int>(
158  _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_NEGATIVE_POLARITY )>;
159  static_assert( keys_len::value <= 16 );
160 
161  __m128i const a = set_reverse( keys... );
162 
163  while( last - first >= 16 ) {
164  auto const b = uload16_char_data( tag, first );
165  int const result =
166  _mm_cmpestri( a, keys_len::value, b, 16, compare_mode::value );
167  first += result;
168  if( result < 16 ) {
169  return first;
170  }
171  }
172  __m128i b{ };
173  auto const max_pos = last - first;
174  int const result =
175  _mm_cmpestri( a, keys_len::vlaue, b, 16, compare_mode::value );
176  if( result < max_pos ) {
177  return first + result;
178  }
179  return last;
180  }
181 
182  template<typename U32>
183  DAW_ATTRIB_INLINE bool add_overflow( U32 value1, U32 value2,
184  U32 &result ) {
185  static_assert( sizeof( U32 ) <= sizeof( unsigned long long ) );
186  static_assert( sizeof( U32 ) == 4 );
187 #if defined( DAW_JSON_HAS_BUILTIN_UADD )
188  if constexpr( sizeof( unsigned ) == sizeof( U32 ) ) {
189  return __builtin_uadd_overflow(
190  static_cast<unsigned>( value1 ), static_cast<unsigned>( value2 ),
191  reinterpret_cast<unsigned *>( &result ) );
192  } else if constexpr( sizeof( unsigned long ) == sizeof( U32 ) ) {
193  return __builtin_uaddl_overflow(
194  static_cast<unsigned long>( value1 ),
195  static_cast<unsigned long>( value2 ),
196  reinterpret_cast<unsigned long *>( &result ) );
197  } else {
198  return __builtin_uaddll_overflow(
199  static_cast<unsigned long long>( value1 ),
200  static_cast<unsigned long long>( value2 ),
201  reinterpret_cast<unsigned long long *>( &result ) );
202  }
203 #else
204  return _addcarry_u32( 0, static_cast<std::uint32_t>( value1 ),
205  static_cast<std::uint32_t>( value2 ),
206  reinterpret_cast<std::uint32_t *>( &result ) );
207 #endif
208  }
209 
210  // Adapted from
211  // https://github.com/simdjson/simdjson/blob/master/src/generic/stage1/json_string_scanner.h#L79
212  DAW_ATTRIB_INLINE constexpr UInt32
213  find_escaped_branchless( constexpr_exec_tag, UInt32 &prev_escaped,
214  UInt32 backslashes ) {
215  backslashes &= ~prev_escaped;
216  UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
217  using even_bits = daw::constant<0x5555'5555_u32>;
218 
219  UInt32 const odd_seq_start =
220  backslashes & ( ~even_bits::value ) & ( ~follow_escape );
221  UInt32 seq_start_on_even_bits = 0_u32;
222  prev_escaped = [&] {
223  auto r = odd_seq_start + backslashes;
224  seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
225  r >>= 16U;
226  return r;
227  }( );
228  UInt32 invert_mask = seq_start_on_even_bits << 1U;
229 
230  return ( even_bits::value ^ invert_mask ) & follow_escape;
231  }
232 
233  DAW_ATTRIB_INLINE UInt32 prefix_xor( sse42_exec_tag, UInt32 bitmask ) {
234  __m128i const all_ones = _mm_set1_epi8( '\xFF' );
235  __m128i const result = _mm_clmulepi64_si128(
236  _mm_set_epi32( 0, 0, 0, static_cast<std::int32_t>( bitmask ) ),
237  all_ones, 0 );
238  return to_uint32( _mm_cvtsi128_si32( result ) );
239  }
240 
241  template<bool is_unchecked_input, typename CharT>
242  DAW_ATTRIB_NONNULL( )
243  DAW_ATTRIB_RET_NONNULL
244  inline CharT *mem_skip_until_end_of_string( simd_exec_tag tag,
245  CharT *first,
246  CharT *const last ) {
247  UInt32 prev_escapes = 0_u32;
248  while( last - first >= 16 ) {
249  auto const val0 = uload16_char_data( tag, first );
250  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
251  UInt32 const escaped =
252  find_escaped_branchless( tag, prev_escapes, backslashes );
253  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
254  UInt32 const in_string = prefix_xor( tag, quotes );
255  if( in_string != 0 ) {
256  first += find_lsb_set( tag, in_string );
257  return first;
258  }
259  first += 16;
260  }
261  if constexpr( is_unchecked_input ) {
262  while( *first != '"' ) {
263  while( not key_table<'"', '\\'>[*first] ) {
264  ++first;
265  }
266  if( *first == '"' ) {
267  return first;
268  }
269  first += 2;
270  }
271  } else {
272  while( DAW_LIKELY( first < last ) and *first != '"' ) {
273  while( DAW_LIKELY( first < last ) and
274  not key_table<'"', '\\'>[*first] ) {
275  ++first;
276  }
277  if( first >= last ) {
278  return last;
279  }
280  if( *first == '"' ) {
281  return first;
282  }
283  first += 2;
284  }
285  }
286  return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
287  : last;
288  }
289 
290  template<bool is_unchecked_input, typename CharT>
291  DAW_ATTRIB_NONNULL( )
292  DAW_ATTRIB_RET_NONNULL inline CharT *mem_skip_until_end_of_string(
293  simd_exec_tag tag, CharT *first, CharT *const last,
294  std::ptrdiff_t &first_escape ) {
295  CharT *const first_first = first;
296  UInt32 prev_escapes = 0_u32;
297  while( last - first >= 16 ) {
298  auto const val0 = uload16_char_data( tag, first );
299  UInt32 const backslashes = mem_find_eq<'\\'>( tag, val0 );
300  if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
301  first_escape = find_lsb_set( tag, backslashes );
302  }
303  UInt32 const escaped =
304  find_escaped_branchless( tag, prev_escapes, backslashes );
305  UInt32 const quotes = mem_find_eq<'"'>( tag, val0 ) & ( ~escaped );
306  UInt32 const in_string = prefix_xor( tag, quotes );
307  if( in_string != 0 ) {
308  first += find_lsb_set( tag, in_string );
309  return first;
310  }
311  first += 16;
312  }
313  if constexpr( is_unchecked_input ) {
314  while( *first != '"' ) {
315  while( not key_table<'"', '\\'>[*first] ) {
316  ++first;
317  }
318  if( *first == '"' ) {
319  return first;
320  }
321  if( first_escape < 0 ) {
322  first_escape = first_first - first;
323  }
324  first += 2;
325  }
326  } else {
327  while( DAW_LIKELY( first < last ) and *first != '"' ) {
328  while( DAW_LIKELY( first < last ) and
329  not key_table<'"', '\\'>[*first] ) {
330  ++first;
331  }
332  if( first >= last ) {
333  return last;
334  }
335  if( *first == '"' ) {
336  return first;
337  }
338  if( first_escape < 0 ) {
339  first_escape = first_first - first;
340  }
341  first += 2;
342  }
343  }
344  return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
345  : last;
346  }
347 
348 #endif
349  template<bool is_unchecked_input, char... keys, typename CharT>
350  DAW_ATTRIB_NONNULL( )
351  DAW_ATTRIB_RET_NONNULL DAW_ATTRIB_INLINE CharT *mem_move_to_next_of(
352  runtime_exec_tag, CharT *first, CharT *last ) {
353  if constexpr( sizeof...( keys ) == 1 ) {
354  char const key[]{ keys... };
355  auto *ptr = reinterpret_cast<CharT *>( std::memchr(
356  first, key[0], static_cast<std::size_t>( last - first ) ) );
357  if( ptr == nullptr ) {
358  ptr = last;
359  }
360  return ptr;
361  } else {
362  constexpr auto eq = []( char l, char r )
364  return l == r;
365  };
366  while( is_unchecked_input or first < last ) {
367  char const c = *first;
368  if( nsc_or( eq( c, keys )... ) ) {
369  return first;
370  }
371  ++first;
372  }
373  return first;
374  }
375  }
376 
377  template<bool is_unchecked_input, typename CharT>
378  DAW_ATTRIB_NONNULL( )
379  DAW_ATTRIB_RET_NONNULL DAW_ATTRIB_INLINE CharT *mem_skip_string(
380  runtime_exec_tag const &tag, CharT *first, CharT *const last ) {
381  return mem_move_to_next_of<is_unchecked_input, '"', '\\'>( tag, first,
382  last );
383  }
384 
385  template<bool is_unchecked_input, typename CharT>
386  DAW_ATTRIB_NONNULL( )
387  DAW_ATTRIB_RET_NONNULL DAW_ATTRIB_INLINE
388  CharT *mem_skip_until_end_of_string( runtime_exec_tag tag, CharT *first,
389  CharT *const last ) {
390  if constexpr( not is_unchecked_input ) {
391  daw_json_ensure( first < last, ErrorReason::UnexpectedEndOfData );
392  }
393  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
394  last );
395  while( is_unchecked_input or first < last ) {
396  switch( *first ) {
397  case '"':
398  return first;
399  case '\\':
400  ++first;
401  break;
402  }
403  ++first;
404  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
405  tag, first, last );
406  }
407  return first;
408  }
409 
410  template<bool is_unchecked_input, typename CharT>
411  DAW_ATTRIB_NONNULL( )
412  DAW_ATTRIB_RET_NONNULL DAW_ATTRIB_INLINE
413  constexpr CharT *mem_skip_until_end_of_string( constexpr_exec_tag,
414  CharT *first,
415  CharT *const last ) {
416  if( first == last ) {
417  return first;
418  }
419  using char_t = std::remove_const_t<CharT>;
420  // Check if the last valid char is a '\'. If not we can skip a check in
421  // the loop on escaped things
422  if( is_unchecked_input or
423  DAW_LIKELY( *( last - 1 ) != char_t{ '\\' } ) ) {
424  while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
425  char const c = *first;
426  if( c == char_t{ '"' } ) {
427  break;
428  }
429  if( c == char_t{ '\\' } ) {
430  // We know that the last \ character is not the last character in
431  // range
432  first += 2;
433  } else {
434  ++first;
435  }
436  }
437  } else {
438  while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
439  char const c = *first;
440  if( c == char_t{ '"' } ) {
441  break;
442  }
443  if( c == char_t{ '\\' } ) {
444  if( DAW_LIKELY( first + 1 < last ) ) {
445  first += 2;
446  } else {
447  first = last;
448  break;
449  }
450  } else {
451  ++first;
452  }
453  }
454  }
455  return first;
456  }
457 
458  template<bool is_unchecked_input, typename CharT>
459  DAW_ATTRIB_NONNULL( )
460  DAW_ATTRIB_RET_NONNULL DAW_ATTRIB_INLINE
461  CharT *mem_skip_until_end_of_string( runtime_exec_tag tag, CharT *first,
462  CharT *const last,
463  std::ptrdiff_t &first_escape ) {
464  CharT *const first_first = first;
465  if constexpr( not is_unchecked_input ) {
466  daw_json_ensure( first < last, ErrorReason::UnexpectedEndOfData );
467  }
468  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>( tag, first,
469  last );
470  while( is_unchecked_input or first < last ) {
471  switch( *first ) {
472  case '"':
473  return first;
474  case '\\':
475  if( first_escape < 0 ) {
476  first_escape = first_first - first;
477  }
478  if constexpr( is_unchecked_input ) {
479  ++first;
480  } else {
481  first += static_cast<int>( static_cast<bool>( last - first ) );
482  }
483  break;
484  }
485  ++first;
486  first = mem_move_to_next_of<is_unchecked_input, '\\', '"'>(
487  tag, first, last );
488  }
489  return first;
490  }
491  } // namespace json_details
492  } // namespace DAW_JSON_VER
493 } // namespace daw::json
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
This is in addition to the parse policy. Always do a full name match instead of sometimes relying on ...
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:25