From 7530731e2dbe0b7119a98c07693626cf8c0a0291 Mon Sep 17 00:00:00 2001 From: ScottPJones Date: Tue, 2 Oct 2018 11:19:02 -0400 Subject: [PATCH] =?UTF-8?q?Change=20in=20uppercase=20of=20=C3=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Project.toml | 12 +++++++----- src/ChrBase.jl | 3 +-- src/casefold.jl | 18 ++++++++++++------ 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index d94688c..26e4c59 100644 --- a/Project.toml +++ b/Project.toml @@ -4,13 +4,15 @@ keywords = ["Characters"] license = "MIT" desc = "Basic functionality for Chr type" authors = ["ScottPJones "] -version = "0.1.5" +version = "0.1.6" [deps] -CharSetEncodings = "cb9422de-a9d8-5b68-86db-ff05833ab307" -ModuleInterfaceTools = "5cb8414e-7aab-5a03-a681-351269c074bf" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StrAPI = "69e7dfc3-c4d0-5e14-8d95-d6042a05b383" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +ModuleInterfaceTools = "5cb8414e-7aab-5a03-a681-351269c074bf" + +StrAPI = "69e7dfc3-c4d0-5e14-8d95-d6042a05b383" +CharSetEncodings = "cb9422de-a9d8-5b68-86db-ff05833ab307" diff --git a/src/ChrBase.jl b/src/ChrBase.jl index 06f4bdc..62ccfc5 100644 --- a/src/ChrBase.jl +++ b/src/ChrBase.jl @@ -23,8 +23,7 @@ using ModuleInterfaceTools write_utf8, write_utf16, _write_utf8_2, _write_utf8_3, _write_utf8_4, _write_ucs2, _lowercase_l, _uppercase_l, _lowercase_u, _uppercase_u, _titlecase_u, _islower_a, _islower_u, _isupper_a, _isupper_l, _isupper_al, _isupper_u, - _can_upper_latin, _can_upper_only_latin, _can_upper_ch, _can_lower_ch, - _can_upper, _can_upper_l + _can_upper_ch, _can_lower_ch, _can_upper, _can_upper_l @api develop! _isvalid_chr diff --git a/src/casefold.jl b/src/casefold.jl index cf9063c..2a60b24 100644 --- a/src/casefold.jl +++ b/src/casefold.jl @@ -28,15 +28,21 @@ uppercase(ch::LatinChr) = LatinChr(_uppercase_l(codepoint(ch))) function uppercase(ch::_LatinChr) cb = codepoint(ch) _can_upper(cb) && return _LatinChr(cb - 0x20) - # We don't uppercase 0xdf, the ß character - cb == 0xb5 ? UCS2Chr(0x39c) : (cb == 0xff ? UCS2Chr(0x178) : ch) + # We didn't used to uppercase 0xdf, the ß character, now we do + !V6_COMPAT && cb == 0xdf && return UCS2Chr(0x1e9e) + cb == 0xb5 ? UCS2Chr(0x39c) : cb == 0xff ? UCS2Chr(0x178) : ch end titlecase(ch::LatinChars) = uppercase(ch) -_can_upper_latin(ch) = _can_upper(ch) | (ch == 0xb5) | (ch == 0xff) -_can_upper_only_latin(ch) = _can_upper_l(ch) | (ch == 0xb5) | (ch == 0xff) +@static if V6_COMPAT + @inline _can_upper_ch(ch) = + (ch <= 0x7f + ? _islower_a(ch) + : (ch > 0xff ? _islower_u(ch) : ifelse(c > 0xdf, c != 0xf7, c == 0xb5))) +else + @inline _can_upper_ch(ch) = + ch <= 0x7f ? _islower_a(ch) : (ch <= 0xff ? _is_lower_l(ch) : _islower_u(ch)) +end -@inline _can_upper_ch(ch) = - ch <= 0x7f ? _islower_a(ch) : (ch <= 0xff ? _can_upper_only_latin(ch) : _islower_u(ch)) @inline _can_lower_ch(ch) = ch <= 0x7f ? _isupper_a(ch) : (ch <= 0xff ? _isupper_l(ch) : _isupper_u(ch))