From 6dea3600b1cc81e9a0a7c3643497c9453dc24738 Mon Sep 17 00:00:00 2001 From: ScottPJones Date: Tue, 2 Oct 2018 17:55:11 -0400 Subject: [PATCH] New tests for uppercase changes --- Project.toml | 2 +- src/ChrBase.jl | 4 ++-- src/casefold.jl | 25 ++++++++----------------- src/unicode.jl | 6 +++--- test/runtests.jl | 15 +++++++++++++++ 5 files changed, 29 insertions(+), 23 deletions(-) diff --git a/Project.toml b/Project.toml index 26e4c59..d23046e 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,7 @@ keywords = ["Characters"] license = "MIT" desc = "Basic functionality for Chr type" authors = ["ScottPJones "] -version = "0.1.6" +version = "0.1.7" [deps] Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" diff --git a/src/ChrBase.jl b/src/ChrBase.jl index 62ccfc5..79dbbd5 100644 --- a/src/ChrBase.jl +++ b/src/ChrBase.jl @@ -22,8 +22,8 @@ using ModuleInterfaceTools codepoint_cse, codepoint_rng, codepoint_adj, utf8proc_error, write_utf8, write_utf16, _write_utf8_2, _write_utf8_3, _write_utf8_4, _write_ucs2, _lowercase_l, _uppercase_l, _lowercase_u, _uppercase_u, _titlecase_u, - _islower_a, _islower_u, _isupper_a, _isupper_l, _isupper_al, _isupper_u, - _can_upper_ch, _can_lower_ch, _can_upper, _can_upper_l + _islower_a, _islower_l, _islower_u, _isupper_a, _isupper_l, _isupper_al, _isupper_u, + _can_upper, _can_upper_l @api develop! _isvalid_chr diff --git a/src/casefold.jl b/src/casefold.jl index 2a60b24..5daaa1b 100644 --- a/src/casefold.jl +++ b/src/casefold.jl @@ -5,8 +5,12 @@ Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones Licensed under MIT License, see LICENSE.md =# -_lowercase_l(ch) = ifelse(_isupper_al(ch), ch + 0x20, ch) -_uppercase_l(ch) = ifelse(_can_upper(ch), ch - 0x20, ch) +_wide_upper(ch) = + ifelse(ch == 0xb5, 0x39c, + ifelse(ch == 0xff, 0x178, ifelse(!V6_COMPAT && ch == 0xdf, 0x1e9e, ch%UInt16))) + +_lowercase_l(ch) = _isupper_al(ch) ? ch + 0x20 : ch +_uppercase_l(ch) = _can_upper(ch) ? ch - 0x20 : _wide_upper(ch) _lowercase(ch) = is_latin(ch) ? _lowercase_l(ch) : _lowercase_u(ch) _uppercase(ch) = is_latin(ch) ? _uppercase_l(ch) : _uppercase_u(ch) @@ -16,8 +20,8 @@ lowercase(ch::T) where {T<:Chr} = T(_lowercase(codepoint(ch))) uppercase(ch::T) where {T<:Chr} = T(_uppercase(codepoint(ch))) titlecase(ch::T) where {T<:Chr} = T(_titlecase(codepoint(ch))) -lowercase(ch::ASCIIChr) = ifelse(_isupper_a(ch), ASCIIChr(ch + 0x20), ch) -uppercase(ch::ASCIIChr) = ifelse(_islower_a(ch), ASCIIChr(ch - 0x20), ch) +lowercase(ch::ASCIIChr) = _isupper_a(ch) ? ASCIIChr(ch + 0x20) : ch +uppercase(ch::ASCIIChr) = _islower_a(ch) ? ASCIIChr(ch - 0x20) : ch titlecase(ch::ASCIIChr) = uppercase(ch) lowercase(ch::T) where {T<:LatinChars} = T(_lowercase_l(codepoint(ch))) @@ -33,16 +37,3 @@ function uppercase(ch::_LatinChr) cb == 0xb5 ? UCS2Chr(0x39c) : cb == 0xff ? UCS2Chr(0x178) : ch end titlecase(ch::LatinChars) = uppercase(ch) - -@static if V6_COMPAT - @inline _can_upper_ch(ch) = - (ch <= 0x7f - ? _islower_a(ch) - : (ch > 0xff ? _islower_u(ch) : ifelse(c > 0xdf, c != 0xf7, c == 0xb5))) -else - @inline _can_upper_ch(ch) = - ch <= 0x7f ? _islower_a(ch) : (ch <= 0xff ? _is_lower_l(ch) : _islower_u(ch)) -end - -@inline _can_lower_ch(ch) = - ch <= 0x7f ? _isupper_a(ch) : (ch <= 0xff ? _isupper_l(ch) : _isupper_u(ch)) diff --git a/src/unicode.jl b/src/unicode.jl index 45a7dfe..5f5793c 100644 --- a/src/unicode.jl +++ b/src/unicode.jl @@ -77,9 +77,9 @@ const _isalnum_mask = _isnumeric_mask | _isalpha_mask const _isnumeric_a = _isdigit @inline _ispunct_a(ch) = ((UInt128(1) << ch) & 0x2800_0000_b800_0001_8c00_f7ee_0000_0000) != 0 -@inline _isspace_a(ch) = (ch == 32) | (9 <= ch <= 13) -@inline _islower_a(ch) = (ch - 'a'%UInt8) < 26 -@inline _isupper_a(ch) = (ch - 'A'%UInt8) < 26 +@inline _isspace_a(ch) = (ch == 0x20) | (0x9 <= ch <= 0xd) +@inline _islower_a(ch) = (ch%UInt8 - 'a'%UInt8) < 0x1a +@inline _isupper_a(ch) = (ch%UInt8 - 'A'%UInt8) < 0x1a @inline _isalpha_a(ch) = _islower_a(ch) | _isupper_a(ch) @inline _isalnum_a(ch) = _isdigit(ch) | _isalpha_a(ch) @inline _isprint_a(ch) = 0x20 <= ch < 0x7f diff --git a/test/runtests.jl b/test/runtests.jl index 4f4064b..dd0a2fc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,6 +20,21 @@ for C in (ASCIIChr, LatinChr, UCS2Chr, UTF32Chr, Char) @test last(rng) === C('\x7f') end + C != Char && @testset "Casefold character" begin + for c = 0:UInt(maxch) + is_valid(C, c) || continue + ch = C(c) + cj = Char(c) + uj = uppercase(cj) + if uj <= maxch + uc = uppercase(ch) + uc == uj || println(" $c: $maxch $uc $uj") + @test uc == uj + end + @test lowercase(ch) == lowercase(cj) + end + end + @testset "Edge conditions" begin for (val, pass) in ( (0, true), (0xd7ff, true),