DAW JSON Link
Loading...
Searching...
No Matches
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1// Copyright (c) Darrell Wright
2//
3// Distributed under the Boost Software License, Version 1.0. (See accompanying
4// file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5//
6// Official repository: https://github.com/beached/daw_json_link
7//
8
9#pragma once
10
12
18
19#include <daw/daw_arith_traits.h>
20#include <daw/daw_construct_a.h>
21#include <daw/daw_cxmath.h>
22#include <daw/daw_not_null.h>
23#include <daw/daw_remove_cvref.h>
24#include <daw/daw_restrict.h>
25#include <daw/daw_uint_buffer.h>
26
27#include <cstddef>
28#include <daw/stdinc/data_access.h>
29#include <limits>
30
31#if defined( DAW_ALLOW_SSE42 )
32#include <emmintrin.h>
33#include <smmintrin.h>
34#include <tmmintrin.h>
35#include <xmmintrin.h>
36#if defined( DAW_HAS_MSVC_LIKE )
37#include <intrin.h>
38#endif
39#endif
40
41namespace daw::json {
42 inline namespace DAW_JSON_VER {
43 namespace json_details {
44 template<typename Signed, typename Unsigned>
45 DAW_ATTRIB_INLINE constexpr Signed to_signed( Unsigned &&u,
46 Signed sign ) {
47 if( sign <= Signed{ 0 } ) {
48 return static_cast<Signed>( -u );
49 }
50 return static_cast<Signed>( u );
51 }
52
53 [[nodiscard]]
54 constexpr bool
55 is_made_of_eight_digits_cx( daw::not_null<char const *> const ptr ) {
56 // The copy to local buffer is to get the compiler to treat it like a
57 // reinterpret_cast
58
59 std::byte const buff[8]{ static_cast<std::byte>( ptr[0] ),
60 static_cast<std::byte>( ptr[1] ),
61 static_cast<std::byte>( ptr[2] ),
62 static_cast<std::byte>( ptr[3] ),
63 static_cast<std::byte>( ptr[4] ),
64 static_cast<std::byte>( ptr[5] ),
65 static_cast<std::byte>( ptr[6] ),
66 static_cast<std::byte>( ptr[7] ) };
67
68 auto val = UInt64( );
69 for( std::size_t n = 0; n < 8; ++n ) {
70 val |= to_uint64( buff[n] ) << ( 8 * n );
71 }
72 return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
73 ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
74 0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
75 4U ) ) == 0x3333'3333'3333'3333_u64 );
76 }
77
78 template<options::JsonRangeCheck RangeCheck, typename Unsigned,
79 typename MaxArithUnsigned>
80 using max_unsigned_t = daw::conditional_t<
81 daw::is_integral_v<Unsigned> or std::is_enum_v<Unsigned>,
82 daw::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
83 Unsigned, MaxArithUnsigned>,
84 Unsigned>;
85
86 // Constexpr'ified version from
87 // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
88 constexpr UInt64 parse_8_digits( daw::not_null<char const *> const str ) {
89 auto const chunk = daw::to_uint64_buffer( str.get( ) );
90 // 1-byte mask trick (works on 4 pairs of single digits)
91 auto const lower_digits =
92 ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
93 auto const upper_digits =
94 ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
95 auto const chunk2 = lower_digits + upper_digits;
96
97 // 2-byte mask trick (works on 2 pairs of two digits)
98 auto const lower_digits2 =
99 ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
100 auto const upper_digits2 =
101 ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
102 auto const chunk3 = lower_digits2 + upper_digits2;
103
104 // 4-byte mask trick (works on pair of four digits)
105 auto const lower_digits3 =
106 ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
107 auto const upper_digits3 =
108 ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
109 auto const chunk4 = lower_digits3 + upper_digits3;
110
111 return chunk4 & 0xFFFF'FFFF_u64;
112 }
113
114 static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
115 "8 digit parser does not work on this platform" );
116
117 constexpr UInt64
118 parse_16_digits( daw::not_null<char const *> const str ) {
119 auto const upper = parse_8_digits( str );
120 auto const lower = parse_8_digits( str + 8 );
121 return upper * 100'000'000_u64 + lower;
122 }
123
124 static_assert( parse_16_digits( "1234567890123456" ) ==
125 1234567890123456_u64,
126 "16 digit parser does not work on this platform" );
127
128 template<typename T>
129 struct make_unsigned_with_bool : daw::make_unsigned<T> {};
130
131 template<>
132 struct make_unsigned_with_bool<bool> {
133 using type = bool;
134 };
135
136 template<typename Integer, typename T, typename ParseState>
137 [[nodiscard]] constexpr Integer
138 narrow_cast( T value, ParseState const &parse_state ) {
139 if constexpr( std::is_signed_v<T> ) {
140 if constexpr( std::is_signed_v<Integer> ) {
141 if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
142 return value;
143 } else if( value <= static_cast<T>( daw::max_value<Integer> ) ) {
144 return static_cast<Integer>( value );
145 } else {
147 true, ErrorReason::NumberOutOfRange, parse_state );
148 }
149 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
150 if( value >= 0 ) {
151 return value;
152 }
153 daw_json_error( true, ErrorReason::NumberOutOfRange, parse_state );
154 } else {
155 if( value >= 0 and
156 value <= static_cast<T>( daw::max_value<Integer> ) ) {
157 return value;
158 }
159 daw_json_error( true, ErrorReason::NumberOutOfRange, parse_state );
160 }
161 } else if constexpr( std::is_signed_v<Integer> ) {
162 if constexpr( sizeof( T ) < sizeof( Integer ) ) {
163 return static_cast<Integer>( value );
164 } else {
165 if( value > static_cast<T>( daw::max_value<Integer> ) ) {
167 true, ErrorReason::NumberOutOfRange, parse_state );
168 }
169 return static_cast<Integer>( value );
170 }
171 } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
172 return static_cast<Integer>( value );
173 } else {
174 if( value <= static_cast<T>( daw::max_value<Integer> ) ) {
175 return static_cast<Integer>( value );
176 }
177 daw_json_error( true, ErrorReason::NumberOutOfRange, parse_state );
178 }
179 }
180
181 template<typename T>
182 using make_unsigned_with_bool_t =
183 typename make_unsigned_with_bool<T>::type;
184
185 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
186 typename ParseState>
187 [[nodiscard]] static constexpr Unsigned
188 unsigned_parser_known( ParseState &parse_state ) {
189 // We know how many digits are in the number
190 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
191 using uresult_t = max_unsigned_t<RangeChecked,
192 make_unsigned_with_bool_t<Unsigned>,
193 UInt64>;
194 static_assert(
195 not static_cast<bool>( RangeChecked ) or
196 std::is_same_v<uresult_t, UInt64>,
197 "Range checking is only supported for std integral types" );
198
199 daw::not_null<char const *> first = parse_state.first;
200 daw::not_null<char const *> const last = parse_state.last;
201 uresult_t result = uresult_t( );
202
203 while( last - first >= 16 ) {
204 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
205 result += static_cast<uresult_t>( parse_16_digits( first ) );
206 first += 16;
207 }
208 if( last - first >= 8 ) {
209 result *= static_cast<uresult_t>( 100'000'000ULL );
210 result += static_cast<uresult_t>( parse_8_digits( first ) );
211 first += 8;
212 }
213 if constexpr( ParseState::is_zero_terminated_string ) {
214 auto dig = parse_digit( *first );
215 while( dig < 10U ) {
216 result *= 10U;
217 result += dig;
218 ++first;
219 dig = parse_digit( *first );
220 }
221 } else {
222 while( first < last ) {
223 result *= 10U;
224 result += parse_digit( *first );
225 ++first;
226 }
227 }
228 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
229 auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
230 std::size( parse_state );
232 ( ( result <= static_cast<uresult_t>(
233 ( daw::numeric_limits<result_t>::max )( ) ) ) &
234 ( count >= 0 ) ),
235 ErrorReason::NumberOutOfRange,
236 parse_state );
237 }
238 parse_state.first = first;
239 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
240 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
241 } else {
242 return daw::construct_a<Unsigned>(
243 narrow_cast<Unsigned>( result, parse_state ) );
244 }
245 }
246
247 //**************************
248 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
249 typename ParseState>
250 [[nodiscard]] static constexpr Unsigned
251 unsigned_parser_not_known( ParseState &parse_state ) {
252 // We do not know how long the string is
253 using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
254 using uresult_t = max_unsigned_t<RangeChecked,
255 make_unsigned_with_bool_t<Unsigned>,
256 UInt64>;
257 static_assert(
258 not static_cast<bool>( RangeChecked ) or
259 std::is_same_v<uresult_t, UInt64>,
260 "Range checking is only supported for std integral types" );
261 daw_json_assert_weak( parse_state.has_more( ),
262 ErrorReason::UnexpectedEndOfData,
263 parse_state );
264 daw::not_null<char const *> first = parse_state.first;
265 auto const orig_first = first;
266 (void)orig_first; // only used inside if constexpr and gcc9 warns
267 daw::not_null<char const *> const last = parse_state.last;
268 uresult_t result = uresult_t( );
269 bool has_eight =
270 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
271 if( has_eight & ( last - first >= 16 ) ) {
272 bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
273 while( has_sixteen ) {
274 result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
275 result += static_cast<uresult_t>( parse_16_digits( first ) );
276 first += 16;
277 has_eight =
278 last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
279 has_sixteen =
280 has_eight and
281 ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
282 : false );
283 }
284 }
285 if( has_eight ) {
286 result *= static_cast<uresult_t>( 100'000'000ULL );
287 result += static_cast<uresult_t>( parse_8_digits( first ) );
288 first += 8;
289 }
290 if constexpr( ParseState::is_zero_terminated_string ) {
291 auto dig = parse_digit( *first );
292 while( dig < 10U ) {
293 result *= 10U;
294 result += dig;
295 ++first;
296 dig = parse_digit( *first );
297 }
298 } else {
299 while( first < last ) {
300 auto const dig = parse_digit( *first );
301 if( dig >= 10U ) {
302 break;
303 }
304 result *= 10U;
305 result += dig;
306 ++first;
307 }
308 }
309
310 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
311 auto const count = static_cast<std::ptrdiff_t>(
312 daw::numeric_limits<result_t>::digits10 + 1 ) -
313 ( first - orig_first );
315 count >= 0, ErrorReason::NumberOutOfRange, parse_state );
316 }
317
318 parse_state.first = first;
319 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
320 return daw::construct_a<Unsigned>(
321 static_cast<Unsigned>( static_cast<result_t>( result ) ) );
322 } else {
323 return daw::construct_a<Unsigned>(
324 narrow_cast<Unsigned>( result, parse_state ) );
325 }
326 }
327
328#if false and defined( DAW_ALLOW_SSE42 )
329 /*
330 // Adapted from
331 //
332 //
333 https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
334 inline UInt64 parse_eight_digits_unrolled( daw::not_null<char const *> ptr ) {
335 // this actually computes *16* values so we are being wasteful.
336 static __m128i const ascii0 = _mm_set1_epi8( '0' );
337
338 static __m128i const mul_1_10 =
339 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
340 );
341
342 static __m128i const mul_1_100 =
343 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
344
345 static __m128i const mul_1_10000 =
346 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
347
348 __m128i const input = _mm_sub_epi8(
349 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
350 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
351 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
352 __m128i const t3 = _mm_packus_epi32( t2, t2 );
353 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
354 return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
355 t4 ) ) ); // only captures the sum of the first 8 digits, drop the
356 rest
357 }
358
359 inline UInt64 parse_sixteen_digits_unrolled( char const *ptr ) {
360 static __m128i const ascii0 = _mm_set1_epi8( '0' );
361
362 static __m128i const mul_1_10 =
363 _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
364 );
365
366 static __m128i const mul_1_100 =
367 _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
368
369 static __m128i const mul_1_10000 =
370 _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
371
372 __m128i const input = _mm_sub_epi8(
373 _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
374 __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
375 __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
376 __m128i const t3 = _mm_packus_epi32( t2, t2 );
377 __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
378 return to_uint64( _mm_cvtsi128_si64( t4 ) );
379 }
380
381 [[nodiscard]] inline bool
382 is_made_of_eight_digits_fast( char const *ptr ) {
383 UInt64 val;
384 memcpy( &val, ptr, sizeof( std::uint64_t ) );
385 return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
386 ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
387 )
388 >> 4_u64 ) ) == 0x3333333333333333_u64 );
389 }
390
391 template<typename Unsigned, options::JsonRangeCheck RangeChecked, bool, typename
392 ParseState>
393 [[nodiscard]] inline Unsigned
394 unsigned_parser( sse42_exec_tag const &, ParseState &parse_state ) {
395 daw_json_assert_weak( parse_state.has_more( ),
396 ErrorRange::UnexpectedEndOfData, parse_state
397 ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
398 result_t result = result_t( ); char const *first = parse_state.first; char const
399 *const last = parse_state.last; char const *const orig_first =
400 first;
401 {
402 auto sz = last - first;
403 while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
404 if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
405 ) { result *= 100'000'000_u64; result += static_cast<result_t>(
406 parse_eight_digits_unrolled( first ) ); first += 8; break;
407 }
408 result *= 10'000'000'000'000'000_u64;
409 result +=
410 static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
411 sz -= 16;
412 first += 16;
413 }
414 }
415
416 auto dig = parse_digit( *first );
417 while( dig < 10U ) {
418 result *= 10U;
419 result += dig;
420 ++first;
421 dig = parse_digit( *first );
422 }
423 if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
424 auto const count =
425 static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
426 - ( first - orig_first ); daw_json_ensure( (count >= 0) & (result <=
427 static_cast<result_t>( (daw::numeric_limits<Unsigned>::max)( ) )),
428 ErrorReason::NumberOutOfRange,
429 parse_state
430 );
431 }
432 parse_state.first = first;
433 if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
434 return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
435 } else {
436 return daw::construct_a<Unsigned>( narrow_cast<Unsigned>( result, parse_state ) );
437 }
438 }
439 */
440#endif
441
442 template<typename Unsigned, options::JsonRangeCheck RangeChecked,
443 bool KnownBounds, typename ParseState>
444 [[nodiscard]] DAW_ATTRIB_INLINE static constexpr Unsigned
445 unsigned_parser( ParseState &parse_state ) {
446 if constexpr( KnownBounds ) {
447 return unsigned_parser_known<Unsigned, RangeChecked>( parse_state );
448 } else {
449 return unsigned_parser_not_known<Unsigned, RangeChecked>(
450 parse_state );
451 }
452 }
453 } // namespace json_details
454 } // namespace DAW_JSON_VER
455} // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
DAW_ATTRIB_NOINLINE void daw_json_error(bool b, ErrorReason reason)
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition version.h:20