DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
12
18
19#include <daw/daw_arith_traits.h>
20#include <daw/daw_construct_a.h>
21#include <daw/daw_cxmath.h>
22#include <daw/daw_not_null.h>
23#include <daw/daw_remove_cvref.h>
24#include <daw/daw_restrict.h>
25#include <daw/daw_uint_buffer.h>
26
27#include <cstddef>
28#include <daw/stdinc/data_access.h>
29#include <limits>
30
31#if defined( DAW_ALLOW_SSE42 )
32#include <emmintrin.h>
33#include <smmintrin.h>
34#include <tmmintrin.h>
35#include <xmmintrin.h>
36#if defined( DAW_HAS_MSVC_LIKE )
37#include <intrin.h>
38#endif
39#endif
40
41namespace daw::json {
42 inline namespace DAW_JSON_VER {
43 namespace json_details {
44 template<typename Signed, typename Unsigned>
45 DAW_ATTRIB_INLINE constexpr Signed to_signed( Unsigned &&u,
46 Signed sign ) {
47 if( sign <= Signed{ 0 } ) {
48 return static_cast<Signed>( -u );
49 }
50 return static_cast<Signed>( u );
51 }
52
53 [[nodiscard]]
54 constexpr bool
55 is_made_of_eight_digits_cx( daw::not_null<char const *> const ptr ) {
56 // The copy to local buffer is to get the compiler to treat it like a
57 // reinterpret_cast
58
59 std::byte const buff[8]{ static_cast<std::byte>( ptr[0] ),
60 static_cast<std::byte>( ptr[1] ),
61 static_cast<std::byte>( ptr[2] ),
62 static_cast<std::byte>( ptr[3] ),
63 static_cast<std::byte>( ptr[4] ),
64 static_cast<std::byte>( ptr[5] ),
65 static_cast<std::byte>( ptr[6] ),
66 static_cast<std::byte>( ptr[7] ) };
67
68 auto val = UInt64( );
69 for( std::size_t n = 0; n < 8; ++n ) {
70 val |= to_uint64( buff[n] ) << ( 8 * n );
71 }
72 return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
73 ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
74 0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
75 4U ) ) == 0x3333'3333'3333'3333_u64 );
76 }
77
78 template<options::JsonRangeCheck RangeCheck, typename Unsigned,
79 typename MaxArithUnsigned>
80 using max_unsigned_t = daw::conditional_t<
81 daw::is_integral_v<Unsigned> or std::is_enum_v<Unsigned>,
82 daw::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
83 Unsigned, MaxArithUnsigned>,
84 Unsigned>;
85
86 // Constexpr'ified version from
87 // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
88 constexpr UInt64 parse_8_digits( daw::not_null<char const *> const str ) {
89 auto const chunk = daw::to_uint64_buffer( str.get( ) );
90 // 1-byte mask trick (works on 4 pairs of single digits)
91 auto const lower_digits =
92 ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
93 auto const upper_digits =
94 ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
95 auto const chunk2 = lower_digits + upper_digits;
96
97 // 2-byte mask trick (works on 2 pairs of two digits)
98 auto const lower_digits2 =
99 ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
100 auto const upper_digits2 =
101 ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
102 auto const chunk3 = lower_digits2 + upper_digits2;
103
104 // 4-byte mask trick (works on pair of four digits)
105 auto const lower_digits3 =
106 ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
107 auto const upper_digits3 =
108 ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
109 auto const chunk4 = lower_digits3 + upper_digits3;
110
111 return chunk4 & 0xFFFF'FFFF_u64;
112 }
113
114 static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
115 "8 digit parser does not work on this platform" );
116
117 constexpr UInt64
118 parse_16_digits( daw::not_null<char const *> const str ) {
119 auto const upper = parse_8_digits( str );
120 auto const lower = parse_8_digits( str + 8 );
121 return upper * 100'000'000_u64 + lower;
122 }
123
124 static_assert( parse_16_digits( "1234567890123456" ) ==
125 1234567890123456_u64,
126 "16 digit parser does not work on this platform" );
127
128 template<typename T>
129 struct make_unsigned_with_bool : daw::make_unsigned<T> {};
130
131 template<>
132 struct make_unsigned_with_bool<bool> {
133 using type = bool;
134 };
135
136 template<typename Integer, typename T, typename ParseState>
137 [[nodiscard]] constexpr Integer
138 narrow_cast( T value, ParseState const &parse_state ) {
139 if constexpr( std::is_signed_v<T> ) {
140 if constexpr( std::is_signed_v<Integer> ) {
141 if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
142 return value;
143 } else if( value <= static_cast<T>( daw::max_value<Integer> ) ) {
144 return static_cast<Integer>( value );
145 } else {
146 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
147 }
148 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
149 if( value >= 0 ) {
150 return value;
151 }
152 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
153 } else {
154 if( value >= 0 and
155 value <= static_cast<T>( daw::max_value<Integer> ) ) {
156 return value;
157 }
158 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
159 }
160 } else if constexpr( std::is_signed_v<Integer> ) {
161 if constexpr( sizeof( T ) < sizeof( Integer ) ) {
162 return static_cast<Integer>( value );
163 } else {
164 if( value > static_cast<T>( daw::max_value<Integer> ) ) {
165 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
166 }
167 return static_cast<Integer>( value );
168 }
169 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
170 return static_cast<Integer>( value );
171 } else {
172 if( value <= static_cast<T>( daw::max_value<Integer> ) ) {
173 return static_cast<Integer>( value );
174 }
175 daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
176 }
177 }
178
179 template<typename T>
180 using make_unsigned_with_bool_t =
181 typename make_unsigned_with_bool<T>::type;
182
183 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
184 typename ParseState>
185 [[nodiscard]] static constexpr Unsigned
186 unsigned_parser_known( ParseState &parse_state ) {
187 // We know how many digits are in the number
188 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
189 using uresult_t = max_unsigned_t<RangeChecked,
190 make_unsigned_with_bool_t<Unsigned>,
191 UInt64>;
192 static_assert(
193 not static_cast<bool>( RangeChecked ) or
194 std::is_same_v<uresult_t, UInt64>,
195 "Range checking is only supported for std integral types" );
196
197 daw::not_null<char const *> first = parse_state.first;
198 daw::not_null<char const *> const last = parse_state.last;
199 uresult_t result = uresult_t( );
200
201 while( last - first >= 16 ) {
202 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
203 result += static_cast<uresult_t>( parse_16_digits( first ) );
204 first += 16;
205 }
206 if( last - first >= 8 ) {
207 result *= static_cast<uresult_t>( 100'000'000ULL );
208 result += static_cast<uresult_t>( parse_8_digits( first ) );
209 first += 8;
210 }
211 if constexpr( ParseState::is_zero_terminated_string ) {
212 auto dig = parse_digit( *first );
213 while( dig < 10U ) {
214 result *= 10U;
215 result += dig;
216 ++first;
217 dig = parse_digit( *first );
218 }
219 } else {
220 while( first < last ) {
221 result *= 10U;
222 result += parse_digit( *first );
223 ++first;
224 }
225 }
226 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
227 auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
228 std::size( parse_state );
230 ( ( result <= static_cast<uresult_t>(
231 ( daw::numeric_limits<result_t>::max )( ) ) ) &
232 ( count >= 0 ) ),
233 ErrorReason::NumberOutOfRange,
234 parse_state );
235 }
236 parse_state.first = first;
237 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
238 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
239 } else {
240 return daw::construct_a<Unsigned>(
241 narrow_cast<Unsigned>( result, parse_state ) );
242 }
243 }
244
245 //**************************
246 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
247 typename ParseState>
248 [[nodiscard]] static constexpr Unsigned
249 unsigned_parser_not_known( ParseState &parse_state ) {
250 // We do not know how long the string is
251 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
252 using uresult_t = max_unsigned_t<RangeChecked,
253 make_unsigned_with_bool_t<Unsigned>,
254 UInt64>;
255 static_assert(
256 not static_cast<bool>( RangeChecked ) or
257 std::is_same_v<uresult_t, UInt64>,
258 "Range checking is only supported for std integral types" );
259 daw_json_assert_weak( parse_state.has_more( ),
260 ErrorReason::UnexpectedEndOfData,
261 parse_state );
262 daw::not_null<char const *> first = parse_state.first;
263 auto const orig_first = first;
264 (void)orig_first; // only used inside if constexpr and gcc9 warns
265 daw::not_null<char const *> const last = parse_state.last;
266 uresult_t result = uresult_t( );
267 bool has_eight =
268 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
269 if( has_eight & ( last - first >= 16 ) ) {
270 bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
271 while( has_sixteen ) {
272 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
273 result += static_cast<uresult_t>( parse_16_digits( first ) );
274 first += 16;
275 has_eight =
276 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
277 has_sixteen =
278 has_eight and
279 ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
280 : false );
281 }
282 }
283 if( has_eight ) {
284 result *= static_cast<uresult_t>( 100'000'000ULL );
285 result += static_cast<uresult_t>( parse_8_digits( first ) );
286 first += 8;
287 }
288 if constexpr( ParseState::is_zero_terminated_string ) {
289 auto dig = parse_digit( *first );
290 while( dig < 10U ) {
291 result *= 10U;
292 result += dig;
293 ++first;
294 dig = parse_digit( *first );
295 }
296 } else {
297 while( first < last ) {
298 auto const dig = parse_digit( *first );
299 if( dig >= 10U ) {
300 break;
301 }
302 result *= 10U;
303 result += dig;
304 ++first;
305 }
306 }
307
308 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
309 auto const count = static_cast<std::ptrdiff_t>(
310 daw::numeric_limits<result_t>::digits10 + 1 ) -
311 ( first - orig_first );
313 count >= 0, ErrorReason::NumberOutOfRange, parse_state );
314 }
315
316 parse_state.first = first;
317 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
318 return daw::construct_a<Unsigned>(
319 static_cast<Unsigned>( static_cast<result_t>( result ) ) );
320 } else {
321 return daw::construct_a<Unsigned>(
322 narrow_cast<Unsigned>( result, parse_state ) );
323 }
324 }
325
326#if false and defined( DAW_ALLOW_SSE42 )
327 /*
328 // Adapted from
329 //
330 //
331 https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
332 DAW_ATTRIB_NONNULL( )
333 inline UInt64 parse_eight_digits_unrolled( char const *ptr ) {
334 // this actually computes *16* values so we are being wasteful.
335 static __m128i const ascii0 = _mm_set1_epi8( '0' );
336
337 static __m128i const mul_1_10 =
338 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
339 );
340
341 static __m128i const mul_1_100 =
342 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
343
344 static __m128i const mul_1_10000 =
345 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
346
347 __m128i const input = _mm_sub_epi8(
348 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
349 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
350 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
351 __m128i const t3 = _mm_packus_epi32( t2, t2 );
352 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
353 return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
354 t4 ) ) ); // only captures the sum of the first 8 digits, drop the
355 rest
356 }
357
358 inline UInt64 parse_sixteen_digits_unrolled( char const *ptr ) {
359 static __m128i const ascii0 = _mm_set1_epi8( '0' );
360
361 static __m128i const mul_1_10 =
362 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
363 );
364
365 static __m128i const mul_1_100 =
366 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
367
368 static __m128i const mul_1_10000 =
369 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
370
371 __m128i const input = _mm_sub_epi8(
372 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
373 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
374 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
375 __m128i const t3 = _mm_packus_epi32( t2, t2 );
376 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
377 return to_uint64( _mm_cvtsi128_si64( t4 ) );
378 }
379
380 [[nodiscard]] inline bool
381 is_made_of_eight_digits_fast( char const *ptr ) {
382 UInt64 val;
383 memcpy( &val, ptr, sizeof( std::uint64_t ) );
384 return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
385 ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
386 )
387 >> 4_u64 ) ) == 0x3333333333333333_u64 );
388 }
389
390 template<typename Unsigned, options::JsonRangeCheck RangeChecked, bool, typename
391 ParseState>
392 [[nodiscard]] inline Unsigned
393 unsigned_parser( sse42_exec_tag const &, ParseState &parse_state ) {
394 daw_json_assert_weak( parse_state.has_more( ),
395 ErrorRange::UnexpectedEndOfData, parse_state
396 ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
397 result_t result = result_t( ); char const *first = parse_state.first; char const
398 *const last = parse_state.last; char const *const orig_first =
399 first;
400 {
401 auto sz = last - first;
402 while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
403 if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
404 ) { result *= 100'000'000_u64; result += static_cast<result_t>(
405 parse_eight_digits_unrolled( first ) ); first += 8; break;
406 }
407 result *= 10'000'000'000'000'000_u64;
408 result +=
409 static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
410 sz -= 16;
411 first += 16;
412 }
413 }
414
415 auto dig = parse_digit( *first );
416 while( dig < 10U ) {
417 result *= 10U;
418 result += dig;
419 ++first;
420 dig = parse_digit( *first );
421 }
422 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
423 auto const count =
424 static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
425 - ( first - orig_first ); daw_json_ensure( (count >= 0) & (result <=
426 static_cast<result_t>( (daw::numeric_limits<Unsigned>::max)( ) )),
427 ErrorReason::NumberOutOfRange,
428 parse_state
429 );
430 }
431 parse_state.first = first;
432 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
433 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
434 } else {
435 return daw::construct_a<Unsigned>( narrow_cast<Unsigned>( result, parse_state ) );
436 }
437 }
438 */
439#endif
440
441 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
442 bool KnownBounds, typename ParseState>
443 [[nodiscard]] DAW_ATTRIB_INLINE static constexpr Unsigned
444 unsigned_parser( ParseState &parse_state ) {
445 if constexpr( KnownBounds ) {
446 return unsigned_parser_known<Unsigned, RangeChecked>( parse_state );
447 } else {
448 return unsigned_parser_not_known<Unsigned, RangeChecked>(
449 parse_state );
450 }
451 }
452 } // namespace json_details
453 } // namespace DAW_JSON_VER
454} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
DAW_ATTRIB_NOINLINE void daw_json_error(ErrorReason reason)
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20