DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
11#include "version.h"
12
13#include "daw_json_assert.h"
14#include "daw_json_exec_modes.h"
18
19#include <daw/daw_arith_traits.h>
20#include <daw/daw_construct_a.h>
21#include <daw/daw_cxmath.h>
22#include <daw/daw_remove_cvref.h>
23#include <daw/daw_uint_buffer.h>
24
25#include <cstddef>
26#include <daw/stdinc/data_access.h>
27#include <limits>
28
29#if defined( DAW_ALLOW_SSE42 )
30#include <emmintrin.h>
31#include <smmintrin.h>
32#include <tmmintrin.h>
33#include <xmmintrin.h>
34#if defined( DAW_HAS_MSVC_LIKE )
35#include <intrin.h>
36#endif
37#endif
38
39namespace daw::json {
40 inline namespace DAW_JSON_VER {
41 namespace json_details {
42 template<typename Signed, typename Unsigned>
43 DAW_ATTRIB_INLINE constexpr Signed to_signed( Unsigned &&u,
44 Signed sign ) {
45 if( sign <= Signed{ 0 } ) {
46 return static_cast<Signed>( -u );
47 }
48 return static_cast<Signed>( u );
49 }
50
51 [[nodiscard]]
52 DAW_ATTRIB_NONNULL( ) inline constexpr bool is_made_of_eight_digits_cx(
53 char const *ptr ) {
54 // The copy to local buffer is to get the compiler to treat it like a
55 // reinterpret_cast
56
57 std::byte const buff[8]{
58 static_cast<std::byte>( ptr[0] ), static_cast<std::byte>( ptr[1] ),
59 static_cast<std::byte>( ptr[2] ), static_cast<std::byte>( ptr[3] ),
60 static_cast<std::byte>( ptr[4] ), static_cast<std::byte>( ptr[5] ),
61 static_cast<std::byte>( ptr[6] ), static_cast<std::byte>( ptr[7] ) };
62
63 UInt64 val = UInt64( );
64 for( std::size_t n = 0; n < 8; ++n ) {
65 val |= to_uint64( buff[n] ) << ( 8 * n );
66 }
67 return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
68 ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
69 0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
70 4U ) ) == 0x3333'3333'3333'3333_u64 );
71 }
72
73 template<options::JsonRangeCheck RangeCheck, typename Unsigned,
74 typename MaxArithUnsigned>
75 using max_unsigned_t = daw::conditional_t<
76 daw::is_integral_v<Unsigned> or std::is_enum_v<Unsigned>,
77 daw::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
78 Unsigned, MaxArithUnsigned>,
79 Unsigned>;
80
81 // Constexpr'ified version from
82 // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
83 DAW_ATTRIB_NONNULL( )
84 inline constexpr UInt64 parse_8_digits( char const *const str ) {
85 auto const chunk = daw::to_uint64_buffer( str );
86 // 1-byte mask trick (works on 4 pairs of single digits)
87 auto const lower_digits =
88 ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
89 auto const upper_digits =
90 ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
91 auto const chunk2 = lower_digits + upper_digits;
92
93 // 2-byte mask trick (works on 2 pairs of two digits)
94 auto const lower_digits2 =
95 ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
96 auto const upper_digits2 =
97 ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
98 auto const chunk3 = lower_digits2 + upper_digits2;
99
100 // 4-byte mask trick (works on pair of four digits)
101 auto const lower_digits3 =
102 ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
103 auto const upper_digits3 =
104 ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
105 auto const chunk4 = lower_digits3 + upper_digits3;
106
107 return chunk4 & 0xFFFF'FFFF_u64;
108 }
109
110 static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
111 "8 digit parser does not work on this platform" );
112
113 DAW_ATTRIB_NONNULL( )
114 inline constexpr UInt64 parse_16_digits( char const *const str ) {
115 auto const upper = parse_8_digits( str );
116 auto const lower = parse_8_digits( str + 8 );
117 return upper * 100'000'000_u64 + lower;
118 }
119
120 static_assert( parse_16_digits( "1234567890123456" ) ==
121 1234567890123456_u64,
122 "16 digit parser does not work on this platform" );
123
124 template<typename T>
125 struct make_unsigned_with_bool : daw::make_unsigned<T> {};
126
127 template<>
128 struct make_unsigned_with_bool<bool> {
129 using type = bool;
130 };
131
132 template<typename Integer, typename T, typename ParseState>
133 [[nodiscard]] constexpr Integer
134 narrow_cast( T value, ParseState const &parse_state ) {
135 if constexpr( std::is_signed_v<T> ) {
136 if constexpr( std::is_signed_v<Integer> ) {
137 if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
138 return value;
139 } else if( value <= static_cast<T>(
140 ( std::numeric_limits<Integer>::max )( ) ) ) {
141 return static_cast<Integer>( value );
142 } else {
143 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
144 }
145 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
146 if( value >= 0 ) {
147 return value;
148 }
149 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
150 } else {
151 if( value >= 0 and
152 value <=
153 static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
154 return value;
155 }
156 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
157 }
158 } else if constexpr( std::is_signed_v<Integer> ) {
159 if constexpr( sizeof( T ) < sizeof( Integer ) ) {
160 return static_cast<Integer>( value );
161 } else {
162 if( value >
163 static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
164 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
165 }
166 return static_cast<Integer>( value );
167 }
168 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
169 return static_cast<Integer>( value );
170 } else {
171 if( value <=
172 static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
173 return static_cast<Integer>( value );
174 }
175 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
176 }
177 }
178
179 template<typename T>
180 using make_unsigned_with_bool_t =
181 typename make_unsigned_with_bool<T>::type;
182
183 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
184 typename ParseState>
185 [[nodiscard]] static constexpr Unsigned
186 unsigned_parser_known( constexpr_exec_tag const &,
187 ParseState &parse_state ) {
188 using CharT = typename ParseState::CharT;
189 // We know how many digits are in the number
190 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
191 using uresult_t =
192 max_unsigned_t<RangeChecked, make_unsigned_with_bool_t<Unsigned>,
193 UInt64>;
194 static_assert(
195 not static_cast<bool>( RangeChecked ) or
196 std::is_same_v<uresult_t, UInt64>,
197 "Range checking is only supported for std integral types" );
198
199 CharT *first = parse_state.first;
200 CharT *const last = parse_state.last;
201 uresult_t result = uresult_t( );
202
203 while( last - first >= 16 ) {
204 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
205 result += static_cast<uresult_t>( parse_16_digits( first ) );
206 first += 16;
207 }
208 if( last - first >= 8 ) {
209 result *= static_cast<uresult_t>( 100'000'000ULL );
210 result += static_cast<uresult_t>( parse_8_digits( first ) );
211 first += 8;
212 }
213 if constexpr( ParseState::is_zero_terminated_string ) {
214 auto dig = parse_digit( *first );
215 while( dig < 10U ) {
216 result *= 10U;
217 result += dig;
218 ++first;
219 dig = parse_digit( *first );
220 }
221 } else {
222 while( first < last ) {
223 result *= 10U;
224 result += parse_digit( *first );
225 ++first;
226 }
227 }
228 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
229 auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
230 std::size( parse_state );
232 ( ( result <= static_cast<uresult_t>(
233 ( daw::numeric_limits<result_t>::max )( ) ) ) &
234 ( count >= 0 ) ),
235 ErrorReason::NumberOutOfRange, parse_state );
236 }
237 parse_state.first = first;
238 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
239 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
240 } else {
241 return daw::construct_a<Unsigned>(
242 narrow_cast<Unsigned>( result, parse_state ) );
243 }
244 }
245
246 //**************************
247 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
248 typename ParseState>
249 [[nodiscard]] static constexpr Unsigned
250 unsigned_parser_not_known( constexpr_exec_tag const &,
251 ParseState &parse_state ) {
252 using CharT = typename ParseState::CharT;
253 // We do not know how long the string is
254 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
255 using uresult_t =
256 max_unsigned_t<RangeChecked, make_unsigned_with_bool_t<Unsigned>,
257 UInt64>;
258 static_assert(
259 not static_cast<bool>( RangeChecked ) or
260 std::is_same_v<uresult_t, UInt64>,
261 "Range checking is only supported for std integral types" );
262 daw_json_assert_weak( parse_state.has_more( ),
263 ErrorReason::UnexpectedEndOfData, parse_state );
264 CharT *first = parse_state.first;
265 CharT *const orig_first = first;
266 (void)orig_first; // only used inside if constexpr and gcc9 warns
267 CharT *const last = parse_state.last;
268 uresult_t result = uresult_t( );
269 bool has_eight =
270 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
271 if( has_eight & ( last - first >= 16 ) ) {
272 bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
273 while( has_sixteen ) {
274 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
275 result += static_cast<uresult_t>( parse_16_digits( first ) );
276 first += 16;
277 has_eight =
278 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
279 has_sixteen =
280 has_eight and
281 ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
282 : false );
283 }
284 }
285 if( has_eight ) {
286 result *= static_cast<uresult_t>( 100'000'000ULL );
287 result += static_cast<uresult_t>( parse_8_digits( first ) );
288 first += 8;
289 }
290 if constexpr( ParseState::is_zero_terminated_string ) {
291 auto dig = parse_digit( *first );
292 while( dig < 10U ) {
293 result *= 10U;
294 result += dig;
295 ++first;
296 dig = parse_digit( *first );
297 }
298 } else {
299 while( first < last ) {
300 auto const dig = parse_digit( *first );
301 if( dig >= 10U ) {
302 break;
303 }
304 result *= 10U;
305 result += dig;
306 ++first;
307 }
308 }
309
310 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
311 auto const count = static_cast<std::ptrdiff_t>(
312 daw::numeric_limits<result_t>::digits10 + 1 ) -
313 ( first - orig_first );
314 daw_json_ensure( count >= 0, ErrorReason::NumberOutOfRange,
315 parse_state );
316 }
317
318 parse_state.first = first;
319 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
320 return daw::construct_a<Unsigned>(
321 static_cast<Unsigned>( static_cast<result_t>( result ) ) );
322 } else {
323 return daw::construct_a<Unsigned>(
324 narrow_cast<Unsigned>( result, parse_state ) );
325 }
326 }
327
328#if false and defined( DAW_ALLOW_SSE42 )
329 /*
330 // Adapted from
331 //
332 //
333 https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
334 DAW_ATTRIB_NONNULL( )
335 inline UInt64 parse_eight_digits_unrolled( char const *ptr ) {
336 // this actually computes *16* values so we are being wasteful.
337 static __m128i const ascii0 = _mm_set1_epi8( '0' );
338
339 static __m128i const mul_1_10 =
340 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
341 );
342
343 static __m128i const mul_1_100 =
344 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
345
346 static __m128i const mul_1_10000 =
347 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
348
349 __m128i const input = _mm_sub_epi8(
350 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
351 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
352 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
353 __m128i const t3 = _mm_packus_epi32( t2, t2 );
354 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
355 return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
356 t4 ) ) ); // only captures the sum of the first 8 digits, drop the
357 rest
358 }
359
360 inline UInt64 parse_sixteen_digits_unrolled( char const *ptr ) {
361 static __m128i const ascii0 = _mm_set1_epi8( '0' );
362
363 static __m128i const mul_1_10 =
364 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
365 );
366
367 static __m128i const mul_1_100 =
368 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
369
370 static __m128i const mul_1_10000 =
371 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
372
373 __m128i const input = _mm_sub_epi8(
374 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
375 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
376 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
377 __m128i const t3 = _mm_packus_epi32( t2, t2 );
378 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
379 return to_uint64( _mm_cvtsi128_si64( t4 ) );
380 }
381
382 [[nodiscard]] inline bool
383 is_made_of_eight_digits_fast( char const *ptr ) {
384 UInt64 val;
385 memcpy( &val, ptr, sizeof( std::uint64_t ) );
386 return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
387 ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
388 )
389 >> 4_u64 ) ) == 0x3333333333333333_u64 );
390 }
391
392 template<typename Unsigned, options::JsonRangeCheck RangeChecked, bool, typename
393 ParseState>
394 [[nodiscard]] inline Unsigned
395 unsigned_parser( sse42_exec_tag const &, ParseState &parse_state ) {
396 daw_json_assert_weak( parse_state.has_more( ),
397 ErrorRange::UnexpectedEndOfData, parse_state
398 ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
399 result_t result = result_t( ); CharT *first = parse_state.first; CharT
400 *const last = parse_state.last; CharT *const orig_first =
401 first;
402 {
403 auto sz = last - first;
404 while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
405 if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
406 ) { result *= 100'000'000_u64; result += static_cast<result_t>(
407 parse_eight_digits_unrolled( first ) ); first += 8; break;
408 }
409 result *= 10'000'000'000'000'000_u64;
410 result +=
411 static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
412 sz -= 16;
413 first += 16;
414 }
415 }
416
417 auto dig = parse_digit( *first );
418 while( dig < 10U ) {
419 result *= 10U;
420 result += dig;
421 ++first;
422 dig = parse_digit( *first );
423 }
424 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
425 auto const count =
426 static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
427 - ( first - orig_first ); daw_json_ensure( (count >= 0) & (result <=
428 static_cast<result_t>( (daw::numeric_limits<Unsigned>::max)( ) )),
429 ErrorReason::NumberOutOfRange,
430 parse_state
431 );
432 }
433 parse_state.first = first;
434 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
435 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
436 } else {
437 return daw::construct_a<Unsigned>( narrow_cast<Unsigned>( result, parse_state ) );
438 }
439 }
440 */
441#endif
442
443 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
444 bool KnownBounds, typename ParseState>
445 [[nodiscard]] DAW_ATTRIB_INLINE static constexpr Unsigned
446 unsigned_parser( constexpr_exec_tag const &tag,
447 ParseState &parse_state ) {
448 if constexpr( KnownBounds ) {
449 return unsigned_parser_known<Unsigned, RangeChecked>( tag,
450 parse_state );
451 } else {
452 return unsigned_parser_not_known<Unsigned, RangeChecked>(
453 tag, parse_state );
454 }
455 }
456 } // namespace json_details
457 } // namespace DAW_JSON_VER
458} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
DAW_ATTRIB_NOINLINE void daw_json_error(ErrorReason reason)
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20