Skip to content

Commit

Permalink
Added tests for the number of records in the processed environment st…
Browse files Browse the repository at this point in the history
…atements.
  • Loading branch information
mihaeladuta committed Feb 7, 2024
1 parent 6d7c9bc commit 4a749f1
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 5 deletions.
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@ charset-normalizer==3.3.2
conda-inject==1.3.1
ConfigArgParse==1.7
connection_pool==0.0.3
cramjam==2.8.1
datrie==0.8.2
dill==0.3.8
docutils==0.20.1
dpath==2.1.6
et-xmlfile==1.1.0
fastjsonschema==2.19.1
fastparquet==2023.10.1
fsspec==2024.2.0
gitdb==4.0.11
GitPython==3.1.41
humanfriendly==10.0
Expand Down
12 changes: 9 additions & 3 deletions src/REF2021_processing/read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
},
"groups": {
"records": 2036,
}
}
},
},
},
"environment_statements": {
"unit": {
Expand All @@ -54,13 +54,19 @@
"input_extension": ".txt",
"name": "EnvironmentStatementsUnitLevel",
"output_path": "data/processed/environment_statements/prepared/",
"tests": {
"records": 1874,
},
},
"institution": {
"extracted_path": "data/processed/environment_statements/extracted/institution/",
"name": "EnvironmentStatementsInstitutionLevel",
"input_extension": ".txt",
"prefix": "Institution environment statement - ",
"output_path": "data/processed/environment_statements/prepared/",
"tests": {
"records": 143,
},
},
},
"results": {
Expand All @@ -71,7 +77,7 @@
"output_path": "data/processed/sheets/",
"tests": {
"records": 1888,
}
},
},
}

Expand Down
44 changes: 42 additions & 2 deletions tests/submissions_results_data_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import REF2021_processing.read_write as rw


def test_processed_submissions_record_numbers():
def test_processed_submissions_records():
"""Test if the processed submissions files have the expected number of records."""

source = "submissions"
Expand All @@ -21,7 +21,7 @@ def test_processed_submissions_record_numbers():
), f"{sheet_name}: {records} records, expected {expected_records}"


def test_processed_results_record_numbers():
def test_processed_results_records():
"""Test if the processed results file has the expected number of records."""

source = "results"
Expand All @@ -36,3 +36,43 @@ def test_processed_results_record_numbers():
assert (
records == expected_records
), f"{sheet_name}: {records} records, expected {expected_records}"


def test_processed_institution_environment_statements_records():
"""Test if the processed institution environment statements
file has the expected number of records.
"""

source = "environment_statements"
level = "institution"
level_name = rw.SOURCES[source][level]["name"]
fpath = os.path.join(
rw.PROJECT_PATH,
f"{rw.SOURCES[source][level]['output_path']}{level_name}{rw.OUTPUT_EXTENSION}",
)
pf = ParquetFile(fpath)
records = pf.count()
expected_records = rw.SOURCES[source][level]["tests"]["records"]
assert (
records == expected_records
), f"{level_name}: {records} records, expected {expected_records}"


def test_processed_unit_environment_statements_records():
"""Test if the processed unit environment statements
file has the expected number of records.
"""

source = "environment_statements"
level = "unit"
level_name = rw.SOURCES[source][level]["name"]
fpath = os.path.join(
rw.PROJECT_PATH,
f"{rw.SOURCES[source][level]['output_path']}{level_name}{rw.OUTPUT_EXTENSION}",
)
pf = ParquetFile(fpath)
records = pf.count()
expected_records = rw.SOURCES[source][level]["tests"]["records"]
assert (
records == expected_records
), f"{level_name}: {records} records, expected {expected_records}"

0 comments on commit 4a749f1

Please sign in to comment.