44 namespace json_details {
46 constexpr bool is_escaped( daw::not_null<char const *> ptr,
47 daw::not_null<char const *> min_ptr ) {
48 if( *( ptr - 1 ) !=
'\\' ) {
51 if( ( ptr - min_ptr ) < 2 ) {
54 return *( ptr - 2 ) !=
'\\';
57#if defined( DAW_ALLOW_SSE42 )
59 alignas( 16 )
bool values[256] = { };
61 constexpr bool operator[](
char idx )
const {
62 return values[
static_cast<unsigned char>( idx )];
66 template<
char... keys>
67 static constexpr inline key_table_t key_table =
68 [] DAW_CPP23_STATIC_CALL_OP {
69 auto result = key_table_t{ };
71 ( result.values[
static_cast<unsigned char>( keys )] =
true )... ) );
75 constexpr std::ptrdiff_t find_lsb_set( UInt32 value ) {
76#if DAW_HAS_BUILTIN( __builtin_ffs )
77 return __builtin_ffs(
static_cast<int>( value ) ) - 1;
78#elif defined( DAW_HAS_MSVC_LIKE )
79 if( not DAW_IS_CONSTANT_EVALUATED( ) ) {
81 _BitScanForward( &index,
static_cast<int>( value ) );
82 return static_cast<std::ptrdiff_t
>( index );
85 std::ptrdiff_t result = 0;
89 while( ( value & 1 ) == 0 ) {
98 __m128i set_reverse(
char c0,
char c1 = 0,
char c2 = 0,
char c3 = 0,
99 char c4 = 0,
char c5 = 0,
char c6 = 0,
char c7 = 0,
100 char c8 = 0,
char c9 = 0,
char c10 = 0,
char c11 = 0,
101 char c12 = 0,
char c13 = 0,
char c14 = 0,
103 return _mm_set_epi8( c15,
121 DAW_ATTRIB_INLINE __m128i
122 uload16_char_data_simd( daw::not_null<char const *> ptr ) {
123 return _mm_loadu_si128(
124 reinterpret_cast<__m128i
const *
>( ptr.get( ) ) );
127 DAW_ATTRIB_INLINE __m128i
128 load16_char_data_simd( daw::not_null<char const *> ptr ) {
129 return _mm_load_si128(
130 reinterpret_cast<__m128i
const *
>( ptr.get( ) ) );
134 DAW_ATTRIB_INLINE UInt32 mem_find_eq_simd( __m128i block ) {
135 __m128i
const keys = _mm_set1_epi8( k );
136 __m128i
const found = _mm_cmpeq_epi8( block, keys );
137 return to_uint32( _mm_movemask_epi8( found ) );
140 template<
bool is_unchecked_input,
char... keys>
141 DAW_ATTRIB_INLINE daw::not_null<char const *>
142 mem_move_to_next_of_sse42( daw::not_null<char const *> first,
143 daw::not_null<char const *>
const last ) {
145 while( last - first >= 16 ) {
146 auto const val0 = uload16_char_data_simd( first );
147 auto const key_positions = ( mem_find_eq_simd<keys>( val0 ) | ... );
148 if( key_positions != 0 ) {
149 return first + find_lsb_set( key_positions );
153 auto val1 = __m128i{ };
154 auto const max_pos = last - first;
155 std::memcpy( &val1, first,
static_cast<std::size_t
>( max_pos ) );
156 auto const key_positions = ( mem_find_eq_simd<keys>( val1 ) | ... );
157 if( key_positions != 0 ) {
158 auto const offset = find_lsb_set( key_positions );
159 if( offset >= max_pos ) {
162 return first + offset;
169 DAW_ATTRIB_INLINE
constexpr UInt32
170 find_escaped_branchless( UInt32 &prev_escaped, UInt32 backslashes ) {
171 backslashes &= ~prev_escaped;
172 UInt32 follow_escape = ( backslashes << 1 ) | prev_escaped;
173 using even_bits = daw::constant<0x5555'5555_u32>;
175 UInt32
const odd_seq_start =
176 backslashes & ( ~even_bits::value ) & ( ~follow_escape );
177 UInt32 seq_start_on_even_bits = 0_u32;
179 auto r = odd_seq_start + backslashes;
180 seq_start_on_even_bits = 0x0000'FFFF_u32 & r;
184 UInt32 invert_mask = seq_start_on_even_bits << 1U;
186 return ( even_bits::value ^ invert_mask ) & follow_escape;
189 DAW_ATTRIB_INLINE UInt32 prefix_xor_simd( UInt32 bitmask ) {
190 __m128i
const all_ones = _mm_set1_epi8(
'\xFF' );
191 __m128i
const result = _mm_clmulepi64_si128(
192 _mm_set_epi32( 0, 0, 0,
static_cast<std::int32_t
>( bitmask ) ),
195 return to_uint32( _mm_cvtsi128_si32( result ) );
198 template<
bool is_unchecked_input>
199 inline daw::not_null<char const *> mem_skip_until_end_of_string_simd(
200 daw::not_null<char const *> first,
201 daw::not_null<char const *>
const last ) {
202 UInt32 prev_escapes = 0_u32;
203 while( last - first >= 16 ) {
204 auto const val0 = uload16_char_data_simd( first );
205 UInt32
const backslashes = mem_find_eq_simd<'\\'>( val0 );
206 UInt32
const escaped =
207 find_escaped_branchless( prev_escapes, backslashes );
208 UInt32
const quotes = mem_find_eq_simd<
'"'>( val0 ) & ( ~escaped );
209 UInt32
const in_string = prefix_xor_simd( quotes );
210 if( in_string != 0 ) {
211 first += find_lsb_set( in_string );
216 if constexpr( is_unchecked_input ) {
217 while( *first !=
'"' ) {
218 while( not key_table<
'"',
'\\'>[*first] ) {
221 if( *first ==
'"' ) {
227 while( DAW_LIKELY( first < last ) and *first !=
'"' ) {
228 while( DAW_LIKELY( first < last ) and
229 not key_table<
'"',
'\\'>[*first] ) {
232 if( first >= last ) {
235 if( *first ==
'"' ) {
241 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
245 template<
bool is_unchecked_input>
246 inline daw::not_null<char const *>
247 mem_skip_until_end_of_string_simd( daw::not_null<char const *> first,
248 daw::not_null<char const *>
const last,
249 std::ptrdiff_t &first_escape ) {
250 auto const first_first = first;
251 UInt32 prev_escapes = 0_u32;
252 while( last - first >= 16 ) {
253 auto const val0 = uload16_char_data_simd( first );
254 UInt32
const backslashes = mem_find_eq_simd<'\\'>( val0 );
255 if( ( backslashes != 0 ) & ( first_escape < 0 ) ) {
256 first_escape = find_lsb_set( backslashes );
258 UInt32
const escaped =
259 find_escaped_branchless( prev_escapes, backslashes );
260 UInt32
const quotes = mem_find_eq_simd<
'"'>( val0 ) & ( ~escaped );
261 UInt32
const in_string = prefix_xor_simd( quotes );
262 if( in_string != 0 ) {
263 first += find_lsb_set( in_string );
268 if constexpr( is_unchecked_input ) {
269 while( *first !=
'"' ) {
270 while( not key_table<
'"',
'\\'>[*first] ) {
273 if( *first ==
'"' ) {
276 if( first_escape < 0 ) {
277 first_escape = first - first_first;
282 while( DAW_LIKELY( first < last ) and *first !=
'"' ) {
283 while( DAW_LIKELY( first < last ) and
284 not key_table<
'"',
'\\'>[*first] ) {
287 if( first >= last ) {
290 if( *first ==
'"' ) {
293 if( first_escape < 0 ) {
294 first_escape = first - first_first;
299 return ( is_unchecked_input or DAW_LIKELY( first < last ) ) ? first
304 template<
bool is_unchecked_input,
char... keys>
305 DAW_ATTRIB_INLINE daw::not_null<char const *>
306 mem_move_to_next_of_runtime( daw::not_null<char const *> first,
307 daw::not_null<char const *> last ) {
308 if constexpr(
sizeof...( keys ) == 1 ) {
309 char const key[]{ keys... };
310 char const *ptr =
static_cast<char const *
>( std::memchr(
311 first, key[0],
static_cast<std::size_t
>( last - first ) ) );
312 if( ptr ==
nullptr ) {
317 DAW_CPP23_STATIC_LOCAL
constexpr auto eq =
321 while( is_unchecked_input or first < last ) {
322 char const c = *first;
323 if( nsc_or( eq( c, keys )... ) ) {
331 template<
bool is_unchecked_input,
typename ExecTag,
char... keys>
332 DAW_ATTRIB_FLATTEN daw::not_null<char const *>
333 mem_move_to_next_of( daw::not_null<char const *> first,
334 daw::not_null<char const *> last ) {
336#if defined( DAW_ALLOW_SSE42 )
337 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
338 return mem_move_to_next_of_sse42<is_unchecked_input, keys...>( first,
342 return mem_move_to_next_of_runtime<is_unchecked_input, keys...>( first,
346 template<
bool is_unchecked_input>
347 DAW_ATTRIB_INLINE daw::not_null<char const *>
348 mem_skip_until_end_of_string_runtime(
349 daw::not_null<char const *> first,
350 daw::not_null<char const *>
const last ) {
351 if constexpr( not is_unchecked_input ) {
357 while( is_unchecked_input or first < last ) {
366 first = mem_move_to_next_of<is_unchecked_input,
374 template<
bool is_unchecked_input>
375 DAW_ATTRIB_INLINE
constexpr daw::not_null<char const *>
376 mem_skip_until_end_of_string_constexpr(
377 daw::not_null<char const *> first,
378 daw::not_null<char const *>
const last ) {
379 if( first == last ) {
382 using char_t = std::remove_const_t<char const>;
385 if( is_unchecked_input or
386 DAW_LIKELY( *( last - 1 ) != char_t{
'\\' } ) ) {
387 while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
388 char const c = *first;
389 if( c == char_t{
'"' } ) {
392 if( c == char_t{
'\\' } ) {
401 while( is_unchecked_input or DAW_UNLIKELY( first < last ) ) {
402 char const c = *first;
403 if( c == char_t{
'"' } ) {
406 if( c == char_t{
'\\' } ) {
407 if( DAW_LIKELY( first + 1 < last ) ) {
421 template<
bool is_unchecked_input,
typename ExecTag>
422 DAW_ATTRIB_INLINE
constexpr daw::not_null<char const *>
423 mem_skip_until_end_of_string( daw::not_null<char const *> first,
424 daw::not_null<char const *>
const last ) {
425 if( use_constexpr_exec_mode<ExecTag>( ) ) {
426 return mem_skip_until_end_of_string_constexpr<is_unchecked_input>(
429#if defined( DAW_ALLOW_SSE42 )
430 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
431 return mem_skip_until_end_of_string_simd<is_unchecked_input>( first,
435 return mem_skip_until_end_of_string_runtime<is_unchecked_input>( first,
439 template<
bool is_unchecked_input>
440 DAW_ATTRIB_INLINE daw::not_null<char const *>
441 mem_skip_until_end_of_string_runtime(
442 daw::not_null<char const *> first,
443 daw::not_null<char const *>
const last, std::ptrdiff_t &first_escape ) {
444 auto first_first = first;
445 if constexpr( not is_unchecked_input ) {
451 while( is_unchecked_input or first < last ) {
456 if( first_escape < 0 ) {
457 first_escape = first - first_first;
459 if constexpr( is_unchecked_input ) {
462 first +=
static_cast<int>(
static_cast<bool>( last - first ) );
467 first = mem_move_to_next_of<is_unchecked_input,
475 template<
bool is_unchecked_input,
typename ExecTag>
476 DAW_ATTRIB_FLATINLINE daw::not_null<char const *>
477 mem_skip_until_end_of_string( daw::not_null<char const *> first,
478 daw::not_null<char const *>
const last,
479 std::ptrdiff_t &first_escape ) {
480#if defined( DAW_ALLOW_SSE42 )
481 if( not std::is_same_v<runtime_exec_tag, ExecTag> ) {
482 return mem_skip_until_end_of_string_simd<is_unchecked_input>(
483 first, last, first_escape );
486 return mem_skip_until_end_of_string_runtime<is_unchecked_input>(
487 first, last, first_escape );