From 83276b057b02f125e09870efa2bae6404cecc81c Mon Sep 17 00:00:00 2001 From: Quinton Miller Date: Fri, 15 Nov 2024 20:16:20 +0800 Subject: [PATCH] handle non-ascii whitespace --- spec/std/string_spec.cr | 3 +++ src/float/fast_float.cr | 35 +++++++++++----------------- src/float/fast_float/parse_number.cr | 8 +------ src/string.cr | 6 +++-- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 595ece196bac..3c63b7b23460 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -504,6 +504,7 @@ describe "String" do " 1234.56 ".to_f?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f } " 1234.56foo".to_f?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f.should eq(1234.56_f64) "123.45 x".to_f64(strict: false).should eq(123.45_f64) expect_raises(ArgumentError) { "x1.2".to_f64 } "x1.2".to_f64?.should be_nil @@ -548,6 +549,7 @@ describe "String" do " 1234.56 ".to_f32?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f32 } " 1234.56foo".to_f32?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f32.should eq(1234.56_f32) "123.45 x".to_f32(strict: false).should eq(123.45_f32) expect_raises(ArgumentError) { "x1.2".to_f32 } "x1.2".to_f32?.should be_nil @@ -591,6 +593,7 @@ describe "String" do " 1234.56 ".to_f64?(whitespace: false).should be_nil expect_raises(ArgumentError) { " 1234.56foo".to_f64 } " 1234.56foo".to_f64?.should be_nil + "\u{A0}\u{2028}\u{2029}1234.56\u{A0}\u{2028}\u{2029}".to_f64.should eq(1234.56_f64) "123.45 x".to_f64(strict: false).should eq(123.45_f64) expect_raises(ArgumentError) { "x1.2".to_f64 } "x1.2".to_f64?.should be_nil diff --git a/src/float/fast_float.cr b/src/float/fast_float.cr index cbb3305657d1..010476db4bca 100644 --- a/src/float/fast_float.cr +++ b/src/float/fast_float.cr @@ -42,11 +42,14 @@ struct Float finish = start + str.bytesize options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) - ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options, whitespace: whitespace) - if ret.ec == Errno::NONE - if trailing_chars_allowed?(ret.ptr, finish, whitespace, strict) - value - end + if whitespace + start += str.calc_excess_left + finish -= str.calc_excess_right + end + + ret = BinaryFormat_Float64.new.from_chars_advanced(start, finish, pointerof(value), options) + if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) + value end end @@ -56,24 +59,14 @@ struct Float finish = start + str.bytesize options = ParseOptionsT(typeof(str.to_unsafe.value)).new(format: :general) - ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options, whitespace: whitespace) - if ret.ec == Errno::NONE - if trailing_chars_allowed?(ret.ptr, finish, whitespace, strict) - value - end + if whitespace + start += str.calc_excess_left + finish -= str.calc_excess_right end - end - private def self.trailing_chars_allowed?(ptr, finish, whitespace, strict) - if strict - if whitespace - while ptr < finish && ptr.value.unsafe_chr.ascii_whitespace? - ptr += 1 - end - end - ptr == finish - else - true + ret = BinaryFormat_Float32.new.from_chars_advanced(start, finish, pointerof(value), options) + if ret.ec == Errno::NONE && (!strict || ret.ptr == finish) + value end end end diff --git a/src/float/fast_float/parse_number.cr b/src/float/fast_float/parse_number.cr index ef9663dca74e..3c1ac4c1cb24 100644 --- a/src/float/fast_float/parse_number.cr +++ b/src/float/fast_float/parse_number.cr @@ -172,18 +172,12 @@ module Float::FastFloat FromCharsResultT(UC).new(ptr, ec) end - # NOTE(crystal): *whitespace* is a normal argument - def from_chars_advanced(first : UC*, last : UC*, value : T*, options : ParseOptionsT(UC), whitespace : Bool) : FromCharsResultT(UC) forall UC + def from_chars_advanced(first : UC*, last : UC*, value : T*, options : ParseOptionsT(UC)) : FromCharsResultT(UC) forall UC {% raise "only some floating-point types are supported" unless T == Float32 || T == Float64 %} # TODO(crystal): support UInt16 and UInt32 {% raise "only UInt8 is supported" unless UC == UInt8 %} - if whitespace - while first != last && first.value.unsafe_chr.ascii_whitespace? - first += 1 - end - end if first == last return FromCharsResultT(UC).new(first, Errno::EINVAL) end diff --git a/src/string.cr b/src/string.cr index 5cfb1d1f4c54..4014cb0ff52a 100644 --- a/src/string.cr +++ b/src/string.cr @@ -2105,7 +2105,8 @@ class String remove_excess_left(excess_left) end - private def calc_excess_right + # :nodoc: + def calc_excess_right if single_byte_optimizable? i = bytesize - 1 while i >= 0 && to_unsafe[i].unsafe_chr.ascii_whitespace? @@ -2143,7 +2144,8 @@ class String bytesize - byte_index end - private def calc_excess_left + # :nodoc: + def calc_excess_left if single_byte_optimizable? excess_left = 0 # All strings end with '\0', and it's not a whitespace