DAW JSON Link
Loading...
Searching...
No Matches
daw_not_const_ex_functions.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
12
15
16#include <daw/daw_attributes.h>
17#include <daw/daw_cpp_feature_check.h>
18#include <daw/daw_cxmath.h>
19#include <daw/daw_do_n.h>
20#include <daw/daw_likely.h>
21#include <daw/daw_logic.h>
22#include <daw/daw_not_null.h>
23#include <daw/daw_uint_buffer.h>
24#include <daw/daw_unreachable.h>
25
26#if defined( DAW_ALLOW_SSE42 )
27#include <emmintrin.h>
28#include <nmmintrin.h>
29#include <smmintrin.h>
30#include <tmmintrin.h>
31#include <wmmintrin.h>
32#include <xmmintrin.h>
33#if defined( DAW_HAS_MSVC_LIKE )
34#include <intrin.h>
35#endif
36#endif
37
38#include <cstddef>
39#include <cstring>
40#include <type_traits>
41
42namespace daw::json {
43 inline namespace DAW_JSON_VER {
44 namespace json_details {
45 DAW_ATTRIB_INLINE
46 constexpr bool is_escaped( daw::not_null<char const *> ptr,
47 daw::not_null<char const *> min_ptr ) {
48 if( *( ptr - 1 ) != '\\' ) {
49 return false;
50 }
51 if( ( ptr - min_ptr ) < 2 ) {
52 return false;
53 }
54 return *( ptr - 2 ) != '\\';
55 }
56
57#if defined( DAW_ALLOW_SSE42 )
58 struct key_table_t {
59 alignas( 16 ) bool values[256] = { };
60
61 constexpr bool operator[]( char idx ) const {
62 return values[static_cast<unsigned char>( idx )];
63 }
64 };
65
66 template<char... keys>
67 static constexpr inline key_table_t key_table =
68 [] DAW_CPP23_STATIC_CALL_OP {
69 auto result = key_table_t{ };
70 (void)( daw::nsc_or(
71 ( result.values[static_cast<unsigned char>( keys )] = true )... ) );
72 return result;
73 }( );
74
75 constexpr std::ptrdiff_t find_lsb_set( UInt32 value ) {
76#if DAW_HAS_BUILTIN( __builtin_ffs )
77 return __builtin_ffs( static_cast<int>( value ) ) - 1;
78#elif defined( DAW_HAS_MSVC_LIKE )
79 if( not DAW_IS_CONSTANT_EVALUATED( ) ) {
80 unsigned long index;
81 _BitScanForward( &index, static_cast<int>( value ) );
82 return static_cast<std::ptrdiff_t>( index );
83 }
84#else
85 std::ptrdiff_t result = 0;
86 if( value == 0 ) {
87 return -1;
88 }
89 while( ( value & 1 ) == 0 ) {
90 value >>= 1;
91 ++result;
92 }
93 return result;
94#endif
95 }
96
97 DAW_ATTRIB_INLINE
98 __m128i set_reverse( char c0, char c1 = 0, char c2 = 0, char c3 = 0,
99 char c4 = 0, char c5 = 0, char c6 = 0, char c7 = 0,
100 char c8 = 0, char c9 = 0, char c10 = 0, char c11 = 0,
101 char c12 = 0, char c13 = 0, char c14 = 0,
102 char c15 = 0 ) {
103 return _mm_set_epi8( c15,
104 c14,
105 c13,
106 c12,
107 c11,
108 c10,
109 c9,
110 c8,
111 c7,
112 c6,
113 c5,
114 c4,
115 c3,
116 c2,
117 c1,
118 c0 );
119 }
120
121 DAW_ATTRIB_INLINE __m128i
122 uload16_char_data_simd( daw::not_null<char const *> ptr ) {
123 return _mm_loadu_si128(
124 reinterpret_cast<__m128i const *>( ptr.get( ) ) );
125 }
126
127 DAW_ATTRIB_INLINE __m128i
128 load16_char_data_simd( daw::not_null<char const *> ptr ) {
129 return _mm_load_si128(
130 reinterpret_cast<__m128i const *>( ptr.get( ) ) );
131 }
132
133 template<char k>
134 DAW_ATTRIB_INLINE UInt32 mem_find_eq_simd( __m128i block ) {
135 __m128i const keys = _mm_set1_epi8( k );
136 __m128i const found = _mm_cmpeq_epi8( block, keys );
137 return to_uint32( _mm_movemask_epi8( found ) );
138 }
139
140 template<bool is_unchecked_input, char... keys>
141 DAW_ATTRIB_INLINE daw::not_null<char const *>
142 mem_move_to_next_of_sse42( daw::not_null<char const *> first,
143 daw::not_null<char const *> const last ) {
144
145 while( last - first >= 16 ) {
146 auto const val0 = uload16_char_data_simd( first );
147 auto const key_positions = ( mem_find_eq_simd<keys>( val0 ) | ... );
148 if( key_positions != 0 ) {
149 return first + find_lsb_set( key_positions );
150 }
151 first += 16;
152 }
153 auto val1 = __m128i{ };
154 auto const max_pos = last - first;
155 std::memcpy( &val1, first, static_cast<std::size_t>( max_pos ) );
156 auto const key_positions = ( mem_find_eq_simd<keys>( val1 ) | ... );
157 if( key_positions != 0 ) {
158 auto const offset = find_lsb_set( key_positions );
159 if( offset >= max_pos ) {
160 return last;
161 }
162 return first + offset;
163 }
164 return last;
165 }
166
167 // Adapted from
168 // https://github.com/simdjson/simdjson/blob/master/src/generic/stage1/json_string_scanner.h#L79
169 DAW_ATTRIB_INLINE constexpr UInt32
170 find_escaped_branchless( UInt32 &prev_escaped, UInt32 backslashes ) {
171 backslashes &= ~prev_escaped;
172 UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
173 using even_bits = daw::constant<0x5555'5555_u32>;
174
175 UInt32 const odd_seq_start =
176 backslashes & ( ~even_bits::value ) & ( ~follow_escape );
177 UInt32 seq_start_on_even_bits = 0_u32;
178 prev_escaped = [&] {
179 auto r = odd_seq_start + backslashes;
180 seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
181 r >>= 16U;
182 return r;
183 }( );
184 UInt32 invert_mask = seq_start_on_even_bits << 1U;
185
186 return ( even_bits::value ^ invert_mask ) & follow_escape;
187 }
188
189 DAW_ATTRIB_INLINE UInt32 prefix_xor_simd( UInt32 bitmask ) {
190 __m128i const all_ones = _mm_set1_epi8( '\xFF' );
191 __m128i const result = _mm_clmulepi64_si128(
192 _mm_set_epi32( 0, 0, 0, static_cast<std::int32_t>( bitmask ) ),
193 all_ones,
194 0 );
195 return to_uint32( _mm_cvtsi128_si32( result ) );
196 }
197
198 template<bool is_unchecked_input>
199 inline daw::not_null<char const *> mem_skip_until_end_of_string_simd(
200 daw::not_null<char const *> first,
201 daw::not_null<char const *> const last ) {
202 UInt32 prev_escapes = 0_u32;
203 while( last - first >= 16 ) {
204 auto const val0 = uload16_char_data_simd( first );
205 UInt32 const backslashes = mem_find_eq_simd<'\\'>( val0 );
206 UInt32 const escaped =
207 find_escaped_branchless( prev_escapes, backslashes );
208 UInt32 const quotes = mem_find_eq_simd<'"'>( val0 ) & ( ~escaped );
209 UInt32 const in_string = prefix_xor_simd( quotes );
210 if( in_string != 0 ) {
211 first += find_lsb_set( in_string );
212 return first;
213 }
214 first += 16;
215 }
216 if constexpr( is_unchecked_input ) {
217 while( *first != '"' ) {
218 while( not key_table<'"', '\\'>[*first] ) {
219 ++first;
220 }
221 if( *first == '"' ) {
222 return first;
223 }
224 first += 2;
225 }
226 } else {
227 while( DAW_LIKELY( first < last ) and *first != '"' ) {
228 while( DAW_LIKELY( first < last ) and
229 not key_table<'"', '\\'>[*first] ) {
230 ++first;
231 }
232 if( first >= last ) {
233 return last;
234 }
235 if( *first == '"' ) {
236 return first;
237 }
238 first += 2;
239 }
240 }
241 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
242 : last;
243 }
244
245 template<bool is_unchecked_input>
246 inline daw::not_null<char const *>
247 mem_skip_until_end_of_string_simd( daw::not_null<char const *> first,
248 daw::not_null<char const *> const last,
249 std::ptrdiff_t &first_escape ) {
250 auto const first_first = first;
251 UInt32 prev_escapes = 0_u32;
252 while( last - first >= 16 ) {
253 auto const val0 = uload16_char_data_simd( first );
254 UInt32 const backslashes = mem_find_eq_simd<'\\'>( val0 );
255 if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
256 first_escape = find_lsb_set( backslashes );
257 }
258 UInt32 const escaped =
259 find_escaped_branchless( prev_escapes, backslashes );
260 UInt32 const quotes = mem_find_eq_simd<'"'>( val0 ) & ( ~escaped );
261 UInt32 const in_string = prefix_xor_simd( quotes );
262 if( in_string != 0 ) {
263 first += find_lsb_set( in_string );
264 return first;
265 }
266 first += 16;
267 }
268 if constexpr( is_unchecked_input ) {
269 while( *first != '"' ) {
270 while( not key_table<'"', '\\'>[*first] ) {
271 ++first;
272 }
273 if( *first == '"' ) {
274 return first;
275 }
276 if( first_escape < 0 ) {
277 first_escape = first - first_first;
278 }
279 first += 2;
280 }
281 } else {
282 while( DAW_LIKELY( first < last ) and *first != '"' ) {
283 while( DAW_LIKELY( first < last ) and
284 not key_table<'"', '\\'>[*first] ) {
285 ++first;
286 }
287 if( first >= last ) {
288 return last;
289 }
290 if( *first == '"' ) {
291 return first;
292 }
293 if( first_escape < 0 ) {
294 first_escape = first - first_first;
295 }
296 first += 2;
297 }
298 }
299 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
300 : last;
301 }
302
303#endif
304 template<bool is_unchecked_input, char... keys>
305 DAW_ATTRIB_INLINE daw::not_null<char const *>
306 mem_move_to_next_of_runtime( daw::not_null<char const *> first,
307 daw::not_null<char const *> last ) {
308 if constexpr( sizeof...( keys ) == 1 ) {
309 char const key[]{ keys... };
310 char const *ptr = static_cast<char const *>( std::memchr(
311 first, key[0], static_cast<std::size_t>( last - first ) ) );
312 if( ptr == nullptr ) {
313 ptr = last;
314 }
315 return ptr;
316 } else {
317 DAW_CPP23_STATIC_LOCAL constexpr auto eq =
318 []( char l, char r ) DAW_JSON_CPP23_STATIC_CALL_OP {
319 return l == r;
320 };
321 while( is_unchecked_input or first < last ) {
322 char const c = *first;
323 if( nsc_or( eq( c, keys )... ) ) {
324 return first;
325 }
326 ++first;
327 }
328 return first;
329 }
330 }
331 template<bool is_unchecked_input, typename ExecTag, char... keys>
332 DAW_ATTRIB_FLATTEN daw::not_null<char const *>
333 mem_move_to_next_of( daw::not_null<char const *> first,
334 daw::not_null<char const *> last ) {
335
336#if defined( DAW_ALLOW_SSE42 )
337 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
338 return mem_move_to_next_of_sse42<is_unchecked_input, keys...>( first,
339 last );
340 }
341#endif
342 return mem_move_to_next_of_runtime<is_unchecked_input, keys...>( first,
343 last );
344 }
345
346 template<bool is_unchecked_input>
347 DAW_ATTRIB_INLINE daw::not_null<char const *>
348 mem_skip_until_end_of_string_runtime(
349 daw::not_null<char const *> first,
350 daw::not_null<char const *> const last ) {
351 if constexpr( not is_unchecked_input ) {
352 daw_json_ensure( first < last, ErrorReason::UnexpectedEndOfData );
353 }
354 first =
355 mem_move_to_next_of<is_unchecked_input, runtime_exec_tag, '\\', '"'>(
356 first, last );
357 while( is_unchecked_input or first < last ) {
358 switch( *first ) {
359 case '"':
360 return first;
361 case '\\':
362 ++first;
363 break;
364 }
365 ++first;
366 first = mem_move_to_next_of<is_unchecked_input,
368 '\\',
369 '"'>( first, last );
370 }
371 return first;
372 }
373
374 template<bool is_unchecked_input>
375 DAW_ATTRIB_INLINE constexpr daw::not_null<char const *>
376 mem_skip_until_end_of_string_constexpr(
377 daw::not_null<char const *> first,
378 daw::not_null<char const *> const last ) {
379 if( first == last ) {
380 return first;
381 }
382 using char_t = std::remove_const_t<char const>;
383 // Check if the last valid char is a '\'. If not we can skip a check
384 // in the loop on escaped things
385 if( is_unchecked_input or
386 DAW_LIKELY( *( last - 1 ) != char_t{ '\\' } ) ) {
387 while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
388 char const c = *first;
389 if( c == char_t{ '"' } ) {
390 break;
391 }
392 if( c == char_t{ '\\' } ) {
393 // We know that the last \ character is not the last character
394 // in range
395 first += 2;
396 } else {
397 ++first;
398 }
399 }
400 } else {
401 while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
402 char const c = *first;
403 if( c == char_t{ '"' } ) {
404 break;
405 }
406 if( c == char_t{ '\\' } ) {
407 if( DAW_LIKELY( first + 1 < last ) ) {
408 first += 2;
409 } else {
410 first = last;
411 break;
412 }
413 } else {
414 ++first;
415 }
416 }
417 }
418 return first;
419 }
420
421 template<bool is_unchecked_input, typename ExecTag>
422 DAW_ATTRIB_INLINE constexpr daw::not_null<char const *>
423 mem_skip_until_end_of_string( daw::not_null<char const *> first,
424 daw::not_null<char const *> const last ) {
425 if( use_constexpr_exec_mode<ExecTag>( ) ) {
426 return mem_skip_until_end_of_string_constexpr<is_unchecked_input>(
427 first, last );
428 }
429#if defined( DAW_ALLOW_SSE42 )
430 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
431 return mem_skip_until_end_of_string_simd<is_unchecked_input>( first,
432 last );
433 }
434#endif
435 return mem_skip_until_end_of_string_runtime<is_unchecked_input>( first,
436 last );
437 }
438
439 template<bool is_unchecked_input>
440 DAW_ATTRIB_INLINE daw::not_null<char const *>
441 mem_skip_until_end_of_string_runtime(
442 daw::not_null<char const *> first,
443 daw::not_null<char const *> const last, std::ptrdiff_t &first_escape ) {
444 auto first_first = first;
445 if constexpr( not is_unchecked_input ) {
446 daw_json_ensure( first < last, ErrorReason::UnexpectedEndOfData );
447 }
448 first =
449 mem_move_to_next_of<is_unchecked_input, runtime_exec_tag, '\\', '"'>(
450 first, last );
451 while( is_unchecked_input or first < last ) {
452 switch( *first ) {
453 case '"':
454 return first;
455 case '\\':
456 if( first_escape < 0 ) {
457 first_escape = first - first_first;
458 }
459 if constexpr( is_unchecked_input ) {
460 ++first;
461 } else {
462 first += static_cast<int>( static_cast<bool>( last - first ) );
463 }
464 break;
465 }
466 ++first;
467 first = mem_move_to_next_of<is_unchecked_input,
469 '\\',
470 '"'>( first, last );
471 }
472 return first;
473 }
474
475 template<bool is_unchecked_input, typename ExecTag>
476 DAW_ATTRIB_FLATINLINE daw::not_null<char const *>
477 mem_skip_until_end_of_string( daw::not_null<char const *> first,
478 daw::not_null<char const *> const last,
479 std::ptrdiff_t &first_escape ) {
480#if defined( DAW_ALLOW_SSE42 )
481 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
482 return mem_skip_until_end_of_string_simd<is_unchecked_input>(
483 first, last, first_escape );
484 }
485#endif
486 return mem_skip_until_end_of_string_runtime<is_unchecked_input>(
487 first, last, first_escape );
488 }
489 } // namespace json_details
490 } // namespace DAW_JSON_VER
491} // namespace daw::json
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
#define DAW_JSON_CPP23_STATIC_CALL_OP
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20