Skip to content

Commit

Permalink
print LT/dictionary version
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Dec 17, 2023
1 parent 8e9ef6e commit ed3f362
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/main/groovy/ua/net/nlp/tools/TextUtils.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ public class TextUtils {
@Option(names = ["-o", "--output"], arity="1", description = ["Output file"])
String output
@Option(names = ["-q", "--quiet"], description = ["Less output"])
boolean quiet
public boolean quiet
@Option(names= ["-h", "--help"], usageHelp= true, description= "Show this help message and exit.")
boolean helpRequested
@Option(names = ["-n", "--outputFormat"], arity="1", description = "Output format: {xml (default), json, txt}", defaultValue = "xml")
Expand Down
25 changes: 18 additions & 7 deletions src/main/groovy/ua/net/nlp/tools/tag/TagTextCore.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ import picocli.CommandLine.ParameterException


class TagTextCore {
public static final Pattern PUNCT_PATTERN = Pattern.compile(/[,.:;!?\/()\[\]{}«»„“"'…\u2013\u2014\u201D\u201C•■♦-]+/)

public static final Pattern PUNCT_PATTERN = Pattern.compile(/[,.:;!?\/()\[\]{}«»„“"'…\u2013\u2014\u201D\u201C•■♦-]+/) // "
public static final Pattern SYMBOL_PATTERN = Pattern.compile(/[%&@$*+=<>\u00A0-\u00BF\u2000-\u20CF\u2100-\u218F\u2200-\u22FF]+/)
static final Pattern UNKNOWN_PATTERN = Pattern.compile(/(.*-)?[а-яіїєґА-ЯІЇЄҐ][а-яіїєґА-ЯІЇЄҐ'\u02BC\u2019]+(-.*)?/)
static final Pattern NON_UK_PATTERN = Pattern.compile(/^[\#№u2013-]|[\u2013-]$|[ыэъё]|[а-яіїєґ][a-z]|[a-z][а-яіїєґ]/, Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE)
Expand All @@ -62,9 +62,9 @@ class TagTextCore {
super(str);
this.stats = stats
}
}
TagStats stats = new TagStats()
}

TagStats stats = new TagStats()
DisambigStats disambigStats = new DisambigStats()
SemTags semTags = new SemTags()
ModZheleh modZheleh = new ModZheleh(langTool)
Expand Down Expand Up @@ -179,10 +179,14 @@ class TagTextCore {
? langTool.analyzeSentences( text.split("\n") as List )
: langTool.analyzeText(text)
}

@CompileStatic
List<List<TTR>> tagTextCore(List<AnalyzedSentence> analyzedSentences, TagStats stats) {
public List<List<TTR>> tagTextCore(List<AnalyzedSentence> analyzedSentences) {
tagTextCore(analyzedSentences, null);
}

@CompileStatic
List<List<TTR>> tagTextCore(List<AnalyzedSentence> analyzedSentences, TagStats stats) {
List<List<TTR>> taggedSentences =
analyzedSentences.parallelStream().map { AnalyzedSentence analyzedSentence ->

Expand Down Expand Up @@ -665,6 +669,13 @@ class TagTextCore {
return
}

if( ! options.quiet ) {
println("LT version: ${JLanguageTool.VERSION}")
def dictUkVersionRes = Ukrainian.class.getClassLoader().getResourceAsStream('org/languagetool/resource/uk/VERSION')
def dictUkversion = dictUkVersionRes ? dictUkVersionRes.text : "<unknown>"
println("dict_uk version: ${dictUkversion}")
}

// TODO: quick hack to support multiple files
if( options.inputFiles && options.inputFiles != ["-"] ) {

Expand Down

0 comments on commit ed3f362

Please sign in to comment.