Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #78 Implementing Number Format Parser #97

Merged
merged 21 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
90f1d33
Issue #78 - Initial (non-working) attempt.
Dec 3, 2021
5404ec5
Getting closer to the desired solution.
Dec 6, 2021
4907814
Even closer.
Dec 7, 2021
5f1539d
It appears I have the number format tests passing now.
Dec 7, 2021
ab50d86
Stop ignoring the number format validation tests.
Dec 7, 2021
5d29ba2
Fixing unit tests to match newer parser behaviour.
Dec 7, 2021
1fd4227
Added unit test to ensure error when maximum fractional digits exceeded.
Dec 7, 2021
ed0ccbb
Refactoring debug logging of exceptions a little.
Dec 7, 2021
481197c
Removing file which I don't believe should be committed to source con…
Dec 7, 2021
577b866
Ensuring we support custom grouping and decimal separator chars.
Dec 7, 2021
d2f608c
Some more improvements and tests.
Dec 7, 2021
08a7bc3
Fix failing tests which don't contain any digits to parse.
Dec 8, 2021
6610f49
Supporting additional unusual quote chars.
Dec 8, 2021
60f69ca
Ensuring that percent and per-mille symbols appropriately scale the v…
Dec 8, 2021
71cd3ab
interesting WIP on supporting negative sub-patterns I don't want to l…
Dec 8, 2021
df7e9ae
Got the negative sub-pattern functionality working.
Dec 9, 2021
5d238be
Ensuring we support +- sign chars in the exponent digits section. Inc…
Dec 9, 2021
399840b
Match on a more generic list of +/- chars in one location.
Dec 9, 2021
629ba64
Match on a more generic list of +/- chars in one location.
Dec 9, 2021
27b8411
Merge remote-tracking branch 'origin/master' into robons-78-number-fo…
Dec 13, 2021
93bc99d
Addressing PR review comments.
Dec 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .bsp/sbt.json

This file was deleted.

210 changes: 71 additions & 139 deletions src/main/scala/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import com.fasterxml.jackson.databind.node.{
ObjectNode,
TextNode
}
import com.ibm.icu
import CSVValidation.traits.LoggerExtensions.LogDebugException
import CSVValidation.traits.NumberParser
import com.typesafe.scalalogging.Logger
import errors.ErrorWithoutContext
import org.joda.time.{DateTime, DateTimeZone}
Expand Down Expand Up @@ -464,12 +465,19 @@ case class Column private (
lazy val maxExclusiveDateTime: Option[ZonedDateTime] =
maxExclusive.map(v => mapJodaDateTimeToZonedDateTime(DateTime.parse(v)))

lazy val numberFormat: NumberFormat =
NumberFormat(
format.flatMap(f => f.pattern),
format.flatMap(f => f.groupChar),
format.flatMap(f => f.decimalChar)
)
lazy val numberParserForFormat: Either[String, NumberParser] =
try {
Right(
LdmlNumberFormatParser(
format.flatMap(f => f.groupChar).getOrElse(','),
format.flatMap(f => f.decimalChar).getOrElse('.')
).getParserForFormat(format.flatMap(f => f.pattern).get)
)
} catch {
case e =>
cjfairchild marked this conversation as resolved.
Show resolved Hide resolved
logger.debug(e)
Left(e.getMessage)
}

private def mapJodaDateTimeToZonedDateTime(jDt: DateTime) = {
val zoneId: ZoneId = jDt.getZone.toTimeZone.toZoneId
Expand Down Expand Up @@ -657,6 +665,7 @@ case class Column private (
s"(?<=[0-9])$decimalChar(?=[0-9])".r.replaceAllIn(newValue, ".")
case None => newValue
}

newValue
}

Expand All @@ -682,8 +691,7 @@ case class Column private (
Right(BigDecimal(newValue))
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_decimal", e.getMessage))
}
} else {
Expand All @@ -695,13 +703,8 @@ case class Column private (
)
}
} else {
numericParser(value) match {
case Right(parsedValue) =>
parsedValue match {
case bigD: icu.math.BigDecimal => Right(bigD.toBigDecimal)
case _ => Right(BigDecimal(parsedValue.longValue()))
}

parseNumberAgainstFormat(value) match {
case Right(parsedValue) => Right(parsedValue)
case Left(warning) =>
Left(ErrorWithoutContext("invalid_decimal", warning))
}
Expand All @@ -728,8 +731,7 @@ case class Column private (
Right(replaceInfWithInfinity(newValue).toDouble)
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_double", e.getMessage))
}
} else {
Expand All @@ -741,9 +743,9 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_double", w))
case Right(parsedValue) => Right(parsedValue.doubleValue())
case Right(parsedValue) => Right(parsedValue.doubleValue)
}
}
}
Expand All @@ -754,7 +756,7 @@ case class Column private (
if (patternIsEmpty()) {
val newValue = standardisedValue(value)
if (Column.validFloatDatatypeRegex.pattern.matcher(newValue).matches()) {
Right(newValue.toFloat)
Right(replaceInfWithInfinity(newValue).toFloat)
} else {
Left(
ErrorWithoutContext(
Expand All @@ -764,9 +766,9 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_float", w))
case Right(parsedValue) => Right(parsedValue.floatValue())
case Right(parsedValue) => Right(parsedValue.floatValue)
}
}
}
Expand All @@ -790,8 +792,7 @@ case class Column private (
Right(new BigInteger(newValue))
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_integer", e.getMessage))
}
} else {
Expand All @@ -803,7 +804,7 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Right(parsedValue) => convertToBigIntegerValue(parsedValue)
case Left(warning) =>
Left(ErrorWithoutContext("invalid_integer", warning))
Expand All @@ -812,20 +813,10 @@ case class Column private (
}

private def convertToBigIntegerValue(
parsedValue: Number
parsedValue: BigDecimal
): Either[ErrorWithoutContext, BigInteger] = {
try {
val bigIntValue = parsedValue match {
case _: java.lang.Long | _: Integer | _: java.lang.Short =>
BigInteger.valueOf(parsedValue.longValue())
case bigDecimalValue: icu.math.BigDecimal =>
bigDecimalValue.toBigIntegerExact
case _ =>
throw new IllegalArgumentException(
s"Unexpected type ${parsedValue.getClass}"
)
}
Right(bigIntValue)
Right(parsedValue.toBigInt.bigInteger)
} catch {
case e =>
Left(ErrorWithoutContext("invalid_integer", e.getMessage))
Expand All @@ -847,8 +838,7 @@ case class Column private (
Right(newValue.toLong)
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_long", e.getMessage))
}
} else {
Expand All @@ -860,13 +850,12 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_long", w))
case Right(parsedValue) => {
parsedValue match {
case _: java.lang.Long | _: java.lang.Integer | _: java.lang.Short |
_: java.lang.Byte =>
Right(parsedValue.longValue())
try {
Right(parsedValue.longValue)
} catch {
case _ =>
Left(
ErrorWithoutContext(
Expand Down Expand Up @@ -894,8 +883,7 @@ case class Column private (
Right(newValue.toInt)
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_int", e.getMessage))
}
} else {
Expand All @@ -907,31 +895,18 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_int", w))
case Right(parsedNumber) => {
parsedNumber match {
case _: java.lang.Long | _: java.lang.Integer | _: java.lang.Short |
_: java.lang.Byte => {
val parsedValue = parsedNumber.longValue()
if (parsedValue > Int.MaxValue || parsedValue < Int.MinValue)
Left(
ErrorWithoutContext(
"invalid_int",
s"Outside Int Range ${Int.MinValue} - ${Int.MaxValue} (inclusive)"
)
)
else Right(parsedValue.intValue())
}
case _ =>
Left(
ErrorWithoutContext(
"invalid_int",
s"Outside Int Range ${Int.MinValue} - ${Int.MaxValue} (inclusive)"
)
case Right(parsedNumber) =>
val parsedValue = parsedNumber.longValue
if (parsedValue > Int.MaxValue || parsedValue < Int.MinValue)
Left(
ErrorWithoutContext(
"invalid_int",
s"Outside Int Range ${Int.MinValue} - ${Int.MaxValue} (inclusive)"
)
}
}
)
else Right(parsedValue.intValue)
}
}
}
Expand All @@ -950,8 +925,7 @@ case class Column private (
Right(newValue.toShort)
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_short", e.getMessage))
}
} else {
Expand All @@ -963,30 +937,18 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_short", w))
case Right(parsedNumber) => {
parsedNumber match {
case _: java.lang.Long | _: java.lang.Integer | _: java.lang.Short |
_: java.lang.Byte => {
val parsedValue = parsedNumber.longValue()
if (parsedValue > Short.MaxValue || parsedValue < Short.MinValue)
Left(
ErrorWithoutContext(
"invalid_short",
s"Outside Short Range ${Short.MinValue} - ${Short.MaxValue} (inclusive)"
)
)
else Right(parsedValue.shortValue())
}
case _ =>
Left(
ErrorWithoutContext(
"invalid_short",
s"Outside Short Range ${Short.MinValue} - ${Short.MaxValue} (inclusive)"
)
val parsedValue = parsedNumber.longValue
if (parsedValue > Short.MaxValue || parsedValue < Short.MinValue)
Left(
ErrorWithoutContext(
"invalid_short",
s"Outside Short Range ${Short.MinValue} - ${Short.MaxValue} (inclusive)"
)
}
)
else Right(parsedValue.shortValue())
}
}
}
Expand All @@ -1005,8 +967,7 @@ case class Column private (
Right(value.toByte)
} catch {
case e: Throwable =>
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
logger.debug(e)
Left(ErrorWithoutContext("invalid_byte", e.getMessage))
}
} else {
Expand All @@ -1018,31 +979,18 @@ case class Column private (
)
}
} else {
numericParser(value) match {
parseNumberAgainstFormat(value) match {
case Left(w) => Left(ErrorWithoutContext("invalid_byte", w))
case Right(parsedNumber) => {
parsedNumber match {
case _: java.lang.Long | _: java.lang.Integer | _: java.lang.Short |
_: java.lang.Byte => {
val parsedValue = parsedNumber.byteValue()
if (parsedValue > Byte.MaxValue || parsedValue < Byte.MinValue)
Left(
ErrorWithoutContext(
"invalid_byte",
s"Outside Byte Range ${Byte.MinValue} - ${Byte.MaxValue} (inclusive)"
)
)
else Right(parsedValue.byteValue())
}
case _ =>
Left(
ErrorWithoutContext(
"invalid_byte",
s"Outside Byte Range ${Byte.MinValue} - ${Byte.MaxValue} (inclusive)"
)
case Right(parsedNumber) =>
val parsedValue = parsedNumber.byteValue
if (parsedValue > Byte.MaxValue || parsedValue < Byte.MinValue)
Left(
ErrorWithoutContext(
"invalid_byte",
s"Outside Byte Range ${Byte.MinValue} - ${Byte.MaxValue} (inclusive)"
)
}
}
)
else Right(parsedValue.byteValue())
}
}
}
Expand Down Expand Up @@ -1202,29 +1150,13 @@ case class Column private (
}
}

def numericParser(
def parseNumberAgainstFormat(
value: String
): Either[String, Number] = {
try {
val parsedNumber = numberFormat.parse(value)
val paredNumberInString = numberFormat.format(parsedNumber)
val originalValueWithoutPlusesOrMinuses = stripUnquotedPlusMinus(value)
val parsedNumberWithoutPlusesOrMinuses = stripUnquotedPlusMinus(
paredNumberInString
)
if (
originalValueWithoutPlusesOrMinuses != parsedNumberWithoutPlusesOrMinuses
)
Left("Value does not match expected UTS-35 format")
else Right(parsedNumber)
} catch {
case e: Throwable => {
logger.debug(e.getMessage)
logger.debug(e.getStackTrace.mkString("\n"))
Left(e.getMessage)
}
): Either[String, BigDecimal] =
numberParserForFormat match {
case Right(numericParser) => numericParser.parse(value)
case Left(err) => Left(err)
}
}

def containsUnquotedChar(value: String, char: Char): Boolean = {
var insideQuotes = false
Expand Down
9 changes: 8 additions & 1 deletion src/main/scala/PropertyChecker.scala
Original file line number Diff line number Diff line change
Expand Up @@ -883,7 +883,14 @@ object PropertyChecker {
val patternNode = format.path("pattern")
val patternString: Option[String] =
if (patternNode.isMissingNode) None else Some(patternNode.asText())
NumberFormat(patternString, groupChar, decimalChar)

patternString
.map(pattern =>
LdmlNumberFormatParser(
groupChar.getOrElse(','),
decimalChar.getOrElse('.')
).getParserForFormat(pattern)
)
} catch {
case e: NumberFormatError => {
format.asInstanceOf[ObjectNode].remove("pattern")
Expand Down
Loading