DAW JSON Link
daw_json_parse_unsigned_int.h
Go to the documentation of this file.
1 // Copyright (c) Darrell Wright
2 //
3 // Distributed under the Boost Software License, Version 1.0. (See accompanying
4 // file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
5 //
6 // Official repository: https://github.com/beached/daw_json_link
7 //
8 
9 #pragma once
10 
11 #include "version.h"
12 
13 #include "daw_json_assert.h"
14 #include "daw_json_exec_modes.h"
15 #include "daw_json_parse_digit.h"
17 #include "daw_json_type_options.h"
18 
19 #include <daw/daw_arith_traits.h>
20 #include <daw/daw_construct_a.h>
21 #include <daw/daw_cxmath.h>
22 #include <daw/daw_remove_cvref.h>
23 #include <daw/daw_uint_buffer.h>
24 
25 #include <cstddef>
26 #include <daw/stdinc/data_access.h>
27 #include <limits>
28 
29 #if defined( DAW_ALLOW_SSE42 )
30 #include <emmintrin.h>
31 #include <smmintrin.h>
32 #include <tmmintrin.h>
33 #include <xmmintrin.h>
34 #if defined( DAW_HAS_MSVC_LIKE )
35 #include <intrin.h>
36 #endif
37 #endif
38 
39 namespace daw::json {
40  inline namespace DAW_JSON_VER {
41  namespace json_details {
42  template<typename Signed, typename Unsigned>
43  DAW_ATTRIB_INLINE constexpr Signed to_signed( Unsigned &&u,
44  Signed sign ) {
45  if( sign <= Signed{ 0 } ) {
46  return static_cast<Signed>( -u );
47  }
48  return static_cast<Signed>( u );
49  }
50 
51  [[nodiscard]]
52  DAW_ATTRIB_NONNULL( ) inline constexpr bool is_made_of_eight_digits_cx(
53  char const *ptr ) {
54  // The copy to local buffer is to get the compiler to treat it like a
55  // reinterpret_cast
56 
57  std::byte const buff[8]{
58  static_cast<std::byte>( ptr[0] ), static_cast<std::byte>( ptr[1] ),
59  static_cast<std::byte>( ptr[2] ), static_cast<std::byte>( ptr[3] ),
60  static_cast<std::byte>( ptr[4] ), static_cast<std::byte>( ptr[5] ),
61  static_cast<std::byte>( ptr[6] ), static_cast<std::byte>( ptr[7] ) };
62 
63  UInt64 val = UInt64( );
64  for( std::size_t n = 0; n < 8; ++n ) {
65  val |= to_uint64( buff[n] ) << ( 8 * n );
66  }
67  return ( ( ( val & 0xF0F0'F0F0'F0F0'F0F0_u64 ) |
68  ( ( ( val + 0x0606'0606'0606'0606_u64 ) &
69  0xF0F0'F0F0'F0F0'F0F0_u64 ) >>
70  4U ) ) == 0x3333'3333'3333'3333_u64 );
71  }
72 
73  template<options::JsonRangeCheck RangeCheck, typename Unsigned,
74  typename MaxArithUnsigned>
75  using max_unsigned_t = daw::conditional_t<
76  daw::is_integral_v<Unsigned> or std::is_enum_v<Unsigned>,
77  daw::conditional_t<( sizeof( Unsigned ) > sizeof( MaxArithUnsigned ) ),
78  Unsigned, MaxArithUnsigned>,
79  Unsigned>;
80 
81  // Constexpr'ified version from
82  // https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html
83  DAW_ATTRIB_NONNULL( )
84  inline constexpr UInt64 parse_8_digits( char const *const str ) {
85  auto const chunk = daw::to_uint64_buffer( str );
86  // 1-byte mask trick (works on 4 pairs of single digits)
87  auto const lower_digits =
88  ( chunk & 0x0F'00'0F'00'0F'00'0F'00_u64 ) >> 8U;
89  auto const upper_digits =
90  ( chunk & 0x00'0F'00'0F'00'0F'00'0F_u64 ) * 10U;
91  auto const chunk2 = lower_digits + upper_digits;
92 
93  // 2-byte mask trick (works on 2 pairs of two digits)
94  auto const lower_digits2 =
95  ( chunk2 & 0x00'FF'00'00'00'FF'00'00_u64 ) >> 16U;
96  auto const upper_digits2 =
97  ( chunk2 & 0x00'00'00'FF'00'00'00'FF_u64 ) * 100U;
98  auto const chunk3 = lower_digits2 + upper_digits2;
99 
100  // 4-byte mask trick (works on pair of four digits)
101  auto const lower_digits3 =
102  ( chunk3 & 0x00'00'FF'FF'00'00'00'00_u64 ) >> 32U;
103  auto const upper_digits3 =
104  ( chunk3 & 0x00'00'00'00'00'00'FF'FF_u64 ) * 10000U;
105  auto const chunk4 = lower_digits3 + upper_digits3;
106 
107  return chunk4 & 0xFFFF'FFFF_u64;
108  }
109 
110  static_assert( parse_8_digits( "12345678" ) == 1234'5678_u64,
111  "8 digit parser does not work on this platform" );
112 
113  DAW_ATTRIB_NONNULL( )
114  inline constexpr UInt64 parse_16_digits( char const *const str ) {
115  auto const upper = parse_8_digits( str );
116  auto const lower = parse_8_digits( str + 8 );
117  return upper * 100'000'000_u64 + lower;
118  }
119 
120  static_assert( parse_16_digits( "1234567890123456" ) ==
121  1234567890123456_u64,
122  "16 digit parser does not work on this platform" );
123 
124  template<typename T>
125  struct make_unsigned_with_bool : daw::make_unsigned<T> {};
126 
127  template<>
128  struct make_unsigned_with_bool<bool> {
129  using type = bool;
130  };
131 
132  template<typename Integer, typename T, typename ParseState>
133  [[nodiscard]] constexpr Integer
134  narrow_cast( T value, ParseState const &parse_state ) {
135  if constexpr( std::is_signed_v<T> ) {
136  if constexpr( std::is_signed_v<Integer> ) {
137  if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
138  return value;
139  } else if( value <= static_cast<T>(
140  ( std::numeric_limits<Integer>::max )( ) ) ) {
141  return static_cast<Integer>( value );
142  } else {
143  daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
144  }
145  } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
146  if( value >= 0 ) {
147  return value;
148  }
149  daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
150  } else {
151  if( value >= 0 and
152  value <=
153  static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
154  return value;
155  }
156  daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
157  }
158  } else if constexpr( std::is_signed_v<Integer> ) {
159  if constexpr( sizeof( T ) < sizeof( Integer ) ) {
160  return static_cast<Integer>( value );
161  } else {
162  if( value >
163  static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
164  daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
165  }
166  return static_cast<Integer>( value );
167  }
168  } else if constexpr( sizeof( T ) <= sizeof( Integer ) ) {
169  return static_cast<Integer>( value );
170  } else {
171  if( value <=
172  static_cast<T>( ( std::numeric_limits<Integer>::max )( ) ) ) {
173  return static_cast<Integer>( value );
174  }
175  daw_json_error( ErrorReason::NumberOutOfRange, parse_state );
176  }
177  }
178 
179  template<typename T>
180  using make_unsigned_with_bool_t =
181  typename make_unsigned_with_bool<T>::type;
182 
183  template<typename Unsigned, options::JsonRangeCheck RangeChecked,
184  typename ParseState>
185  [[nodiscard]] static constexpr Unsigned
186  unsigned_parser_known( constexpr_exec_tag const &,
187  ParseState &parse_state ) {
188  using CharT = typename ParseState::CharT;
189  // We know how many digits are in the number
190  using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
191  using uresult_t =
192  max_unsigned_t<RangeChecked, make_unsigned_with_bool_t<Unsigned>,
193  UInt64>;
194  static_assert(
195  not static_cast<bool>( RangeChecked ) or
196  std::is_same_v<uresult_t, UInt64>,
197  "Range checking is only supported for std integral types" );
198 
199  CharT *first = parse_state.first;
200  CharT *const last = parse_state.last;
201  uresult_t result = uresult_t( );
202 
203  while( last - first >= 16 ) {
204  result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
205  result += static_cast<uresult_t>( parse_16_digits( first ) );
206  first += 16;
207  }
208  if( last - first >= 8 ) {
209  result *= static_cast<uresult_t>( 100'000'000ULL );
210  result += static_cast<uresult_t>( parse_8_digits( first ) );
211  first += 8;
212  }
213  if constexpr( ParseState::is_zero_terminated_string ) {
214  auto dig = parse_digit( *first );
215  while( dig < 10U ) {
216  result *= 10U;
217  result += dig;
218  ++first;
219  dig = parse_digit( *first );
220  }
221  } else {
222  while( first < last ) {
223  result *= 10U;
224  result += parse_digit( *first );
225  ++first;
226  }
227  }
228  if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
229  auto const count = ( daw::numeric_limits<result_t>::digits10 + 1U ) -
230  std::size( parse_state );
232  ( ( result <= static_cast<uresult_t>(
233  ( daw::numeric_limits<result_t>::max )( ) ) ) &
234  ( count >= 0 ) ),
235  ErrorReason::NumberOutOfRange, parse_state );
236  }
237  parse_state.first = first;
238  if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
239  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
240  } else {
241  return daw::construct_a<Unsigned>(
242  narrow_cast<Unsigned>( result, parse_state ) );
243  }
244  }
245 
246  //**************************
247  template<typename Unsigned, options::JsonRangeCheck RangeChecked,
248  typename ParseState>
249  [[nodiscard]] static constexpr Unsigned
250  unsigned_parser_not_known( constexpr_exec_tag const &,
251  ParseState &parse_state ) {
252  using CharT = typename ParseState::CharT;
253  // We do not know how long the string is
254  using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
255  using uresult_t =
256  max_unsigned_t<RangeChecked, make_unsigned_with_bool_t<Unsigned>,
257  UInt64>;
258  static_assert(
259  not static_cast<bool>( RangeChecked ) or
260  std::is_same_v<uresult_t, UInt64>,
261  "Range checking is only supported for std integral types" );
262  daw_json_assert_weak( parse_state.has_more( ),
263  ErrorReason::UnexpectedEndOfData, parse_state );
264  CharT *first = parse_state.first;
265  CharT *const orig_first = first;
266  (void)orig_first; // only used inside if constexpr and gcc9 warns
267  CharT *const last = parse_state.last;
268  uresult_t result = uresult_t( );
269  bool has_eight =
270  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
271  if( has_eight & ( last - first >= 16 ) ) {
272  bool has_sixteen = is_made_of_eight_digits_cx( first + 8 );
273  while( has_sixteen ) {
274  result *= static_cast<uresult_t>( 10'000'000'000'000'000ULL );
275  result += static_cast<uresult_t>( parse_16_digits( first ) );
276  first += 16;
277  has_eight =
278  last - first >= 8 ? is_made_of_eight_digits_cx( first ) : false;
279  has_sixteen =
280  has_eight and
281  ( last - first >= 16 ? is_made_of_eight_digits_cx( first + 8 )
282  : false );
283  }
284  }
285  if( has_eight ) {
286  result *= static_cast<uresult_t>( 100'000'000ULL );
287  result += static_cast<uresult_t>( parse_8_digits( first ) );
288  first += 8;
289  }
290  if constexpr( ParseState::is_zero_terminated_string ) {
291  auto dig = parse_digit( *first );
292  while( dig < 10U ) {
293  result *= 10U;
294  result += dig;
295  ++first;
296  dig = parse_digit( *first );
297  }
298  } else {
299  while( first < last ) {
300  auto const dig = parse_digit( *first );
301  if( dig >= 10U ) {
302  break;
303  }
304  result *= 10U;
305  result += dig;
306  ++first;
307  }
308  }
309 
310  if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
311  auto const count = static_cast<std::ptrdiff_t>(
312  daw::numeric_limits<result_t>::digits10 + 1 ) -
313  ( first - orig_first );
314  daw_json_ensure( count >= 0, ErrorReason::NumberOutOfRange,
315  parse_state );
316  }
317 
318  parse_state.first = first;
319  if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
320  return daw::construct_a<Unsigned>(
321  static_cast<Unsigned>( static_cast<result_t>( result ) ) );
322  } else {
323  return daw::construct_a<Unsigned>(
324  narrow_cast<Unsigned>( result, parse_state ) );
325  }
326  }
327 
328 #if false and defined( DAW_ALLOW_SSE42 )
329  /*
330  // Adapted from
331  //
332  //
333  https://github.com/lemire/simdjson/blob/102262c7abe64b517a36a6049b39d95f58bf4aea/src/haswell/numberparsing.h
334  DAW_ATTRIB_NONNULL( )
335  inline UInt64 parse_eight_digits_unrolled( char const *ptr ) {
336  // this actually computes *16* values so we are being wasteful.
337  static __m128i const ascii0 = _mm_set1_epi8( '0' );
338 
339  static __m128i const mul_1_10 =
340  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
341  );
342 
343  static __m128i const mul_1_100 =
344  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
345 
346  static __m128i const mul_1_10000 =
347  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
348 
349  __m128i const input = _mm_sub_epi8(
350  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
351  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
352  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
353  __m128i const t3 = _mm_packus_epi32( t2, t2 );
354  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
355  return to_uint64( static_cast<std::uint32_t>( _mm_cvtsi128_si32(
356  t4 ) ) ); // only captures the sum of the first 8 digits, drop the
357  rest
358  }
359 
360  inline UInt64 parse_sixteen_digits_unrolled( char const *ptr ) {
361  static __m128i const ascii0 = _mm_set1_epi8( '0' );
362 
363  static __m128i const mul_1_10 =
364  _mm_setr_epi8( 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1
365  );
366 
367  static __m128i const mul_1_100 =
368  _mm_setr_epi16( 100, 1, 100, 1, 100, 1, 100, 1 );
369 
370  static __m128i const mul_1_10000 =
371  _mm_setr_epi16( 10000, 1, 10000, 1, 10000, 1, 10000, 1 );
372 
373  __m128i const input = _mm_sub_epi8(
374  _mm_loadu_si128( reinterpret_cast<__m128i const *>( ptr ) ), ascii0 );
375  __m128i const t1 = _mm_maddubs_epi16( input, mul_1_10 );
376  __m128i const t2 = _mm_madd_epi16( t1, mul_1_100 );
377  __m128i const t3 = _mm_packus_epi32( t2, t2 );
378  __m128i const t4 = _mm_madd_epi16( t3, mul_1_10000 );
379  return to_uint64( _mm_cvtsi128_si64( t4 ) );
380  }
381 
382  [[nodiscard]] inline bool
383  is_made_of_eight_digits_fast( char const *ptr ) {
384  UInt64 val;
385  memcpy( &val, ptr, sizeof( std::uint64_t ) );
386  return ( ( ( val & 0xF0F0F0F0F0F0F0F0_u64 ) |
387  ( ( ( val + 0x0606060606060606_u64 ) & 0xF0F0F0F0F0F0F0F0_u64
388  )
389  >> 4_u64 ) ) == 0x3333333333333333_u64 );
390  }
391 
392  template<typename Unsigned, options::JsonRangeCheck RangeChecked, bool, typename
393  ParseState>
394  [[nodiscard]] inline Unsigned
395  unsigned_parser( sse42_exec_tag const &, ParseState &parse_state ) {
396  daw_json_assert_weak( parse_state.has_more( ),
397  ErrorRange::UnexpectedEndOfData, parse_state
398  ); using result_t = max_unsigned_t<RangeChecked, Unsigned, UInt64>;
399  result_t result = result_t( ); CharT *first = parse_state.first; CharT
400  *const last = parse_state.last; CharT *const orig_first =
401  first;
402  {
403  auto sz = last - first;
404  while( ( sz >= 8 ) & is_made_of_eight_digits_fast( first ) ) {
405  if( ( sz < 16 ) | ( not is_made_of_eight_digits_fast( first + 8 ) )
406  ) { result *= 100'000'000_u64; result += static_cast<result_t>(
407  parse_eight_digits_unrolled( first ) ); first += 8; break;
408  }
409  result *= 10'000'000'000'000'000_u64;
410  result +=
411  static_cast<result_t>( parse_sixteen_digits_unrolled( first ) );
412  sz -= 16;
413  first += 16;
414  }
415  }
416 
417  auto dig = parse_digit( *first );
418  while( dig < 10U ) {
419  result *= 10U;
420  result += dig;
421  ++first;
422  dig = parse_digit( *first );
423  }
424  if constexpr( RangeChecked != options::JsonRangeCheck::Never ) {
425  auto const count =
426  static_cast<intmax_t>( daw::numeric_limits<Unsigned>::digits10 + 1 )
427  - ( first - orig_first ); daw_json_ensure( (count >= 0) & (result <=
428  static_cast<result_t>( (daw::numeric_limits<Unsigned>::max)( ) )),
429  ErrorReason::NumberOutOfRange,
430  parse_state
431  );
432  }
433  parse_state.first = first;
434  if constexpr( RangeChecked == options::JsonRangeCheck::Never ) {
435  return daw::construct_a<Unsigned>( static_cast<Unsigned>( result ) );
436  } else {
437  return daw::construct_a<Unsigned>( narrow_cast<Unsigned>( result, parse_state ) );
438  }
439  }
440  */
441 #endif
442 
443  template<typename Unsigned, options::JsonRangeCheck RangeChecked,
444  bool KnownBounds, typename ParseState>
445  [[nodiscard]] DAW_ATTRIB_INLINE static constexpr Unsigned
446  unsigned_parser( constexpr_exec_tag const &tag,
447  ParseState &parse_state ) {
448  if constexpr( KnownBounds ) {
449  return unsigned_parser_known<Unsigned, RangeChecked>( tag,
450  parse_state );
451  } else {
452  return unsigned_parser_not_known<Unsigned, RangeChecked>(
453  tag, parse_state );
454  }
455  }
456  } // namespace json_details
457  } // namespace DAW_JSON_VER
458 } // namespace daw::json
#define daw_json_assert_weak(Bool,...)
Assert that Bool is true when in Checked Input mode If false pass rest of args to daw_json_error.
#define daw_json_ensure(Bool,...)
Ensure that Bool is true. If false pass rest of args to daw_json_error.
DAW_ATTRIB_NOINLINE void daw_json_error(ErrorReason reason)
std::bool_constant< is_zero_terminated_string_v< T > > is_zero_terminated_string
Customization point traits.
#define DAW_JSON_VER
The version string used in namespace definitions. Must be a valid namespace name.
Definition: version.h:25