Skip to content

Commit

Permalink
Implement unicode script extensions with ucd-parse
Browse files Browse the repository at this point in the history
  • Loading branch information
raskad committed Nov 12, 2023
1 parent df9730f commit 5204617
Show file tree
Hide file tree
Showing 8 changed files with 23,152 additions and 23,520 deletions.
1 change: 1 addition & 0 deletions gen-unicode/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ edition = "2021"

[dependencies]
codegen = "0.1.3"
ucd-parse = "0.1.12"
4 changes: 4 additions & 0 deletions gen-unicode/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ This crate generates unicode tables and code specific for regress.
curl -L http://ftp.unicode.org/Public/UNIDATA/emoji/emoji-data.txt -o emoji-data.txt
curl -L http://ftp.unicode.org/Public/UNIDATA/PropList.txt -o PropList.txt
curl -L http://ftp.unicode.org/Public/UNIDATA/Scripts.txt -o Scripts.txt
mkdir /tmp/ucd-15.0.0
cd /tmp/ucd-15.0.0
curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip
unzip UCD.zip
```

2. Run this crate and redirect the output in the specific rs file in the regress crate:
Expand Down
49 changes: 33 additions & 16 deletions gen-unicode/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ mod binary_properties;
mod case_folding;
mod general_category_values;
mod scripts;

use codegen::Scope;
use std::{fs::OpenOptions, io::Write};
use ucd_parse::{parse, PropertyValueAlias, Script, ScriptExtension};

// Should match unicode.rs.
const CODE_POINT_BITS: u32 = 20;
Expand All @@ -14,7 +16,25 @@ const MAX_CODE_POINT: u32 = (1 << CODE_POINT_BITS) - 1;
// Our length is stored with a bias of -1, so no need to subtract 1.
const MAX_LENGTH: u32 = 1 << LENGTH_BITS;

pub(crate) struct GenUnicode {
pub(crate) scope: Scope,
pub(crate) scope_tests: Scope,
pub(crate) property_value_aliases: Vec<PropertyValueAlias>,
pub(crate) scripts: Vec<Script>,
pub(crate) script_extensions: Vec<ScriptExtension>,
}

fn main() {
let ucd_path = "/tmp/ucd-15.0.0";

let mut gen = GenUnicode {
scope: Scope::new(),
scope_tests: Scope::new(),
property_value_aliases: parse(ucd_path).expect("could not parse PropertyValueAliases.txt"),
scripts: parse(ucd_path).expect("could not parse Scripts.txt"),
script_extensions: parse(ucd_path).expect("could not parse ScriptExtensions.txt"),
};

let file_unicode_tables_path = "../src/unicodetables.rs";
let file_tests_path = "../tests/unicode_property_escapes.rs";

Expand All @@ -40,30 +60,27 @@ fn main() {
.write_all(b"// DO NOT EDIT! This file is autogenerated from gen-unicode.\n")
.expect("Failed to write to tests file");

let mut scope = Scope::new();
scope.import("crate::unicode", "CodePointRange");
scope.import("crate::unicode", "CodePointRangeUnpacked");
scope.import("crate::unicode", "FoldRange");
gen.scope.import("crate::unicode", "CodePointRange");
gen.scope.import("crate::unicode", "CodePointRangeUnpacked");
gen.scope.import("crate::unicode", "FoldRange");

binary_properties::generate(&mut scope);
case_folding::generate_folds(&mut scope);
scripts::generate(&mut scope);
general_category_values::generate(&mut scope);
binary_properties::generate(&mut gen.scope);
case_folding::generate_folds(&mut gen.scope);
general_category_values::generate(&mut gen.scope);
gen.generate_scripts();

let mut scope_tests = Scope::new();
scope_tests.import("common", "*");
scope_tests.raw("pub mod common;");
gen.scope_tests.import("common", "*");
gen.scope_tests.raw("pub mod common;");

binary_properties::generate_tests(&mut scope_tests);
scripts::generate_tests(&mut scope_tests);
general_category_values::generate_tests(&mut scope_tests);
binary_properties::generate_tests(&mut gen.scope_tests);
general_category_values::generate_tests(&mut gen.scope_tests);

file_unicode_tables
.write_all(scope.to_string().as_bytes())
.write_all(gen.scope.to_string().as_bytes())
.expect("Failed to write to unicode tables file");

file_tests
.write_all(scope_tests.to_string().as_bytes())
.write_all(gen.scope_tests.to_string().as_bytes())
.expect("Failed to write to tests file");

std::process::Command::new("cargo")
Expand Down
Loading

0 comments on commit 5204617

Please sign in to comment.