Skip to content

Commit

Permalink
Create pylint.yml (#46)
Browse files Browse the repository at this point in the history
* Fixed the pylint error about the UPPER_CASE naming style
* added python-app to run the tests when the main branch is pushed or a PR is approved
* Removed unused dependencies from the project and updated README.md
* Try to fix the TensorFlow reshaping error
  • Loading branch information
ChaoPang authored Sep 5, 2024
1 parent f41cdc1 commit ce5c263
Show file tree
Hide file tree
Showing 12 changed files with 72 additions and 489 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: Python application

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
build:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10.0
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
PYTHONPATH=./: pytest
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ tar -xvf omop_synthea.tar .
```
Convert the OMOP dataset to the MEDS format
```console
meds_etl_omop omop_synthea synthea_meds
pip install meds_etl==0.3.6;
meds_etl_omop omop_synthea synthea_meds;
```
Convert MEDS to the meds_reader database to get the patient level data
```console
Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ dependencies = [
"dask==2024.1.1",
"dask[dataframe]==2024.1.1",
"datasets==2.16.1",
"docarray==0.40.0",
"docarray[hnswlib]==0.40.0",
"docarray[weaviate]==0.40.0",
"evaluate==0.4.1",
"fast-ml==3.68",
"fastparquet==0.8.1",
Expand Down Expand Up @@ -58,7 +55,6 @@ dependencies = [
"transformers==4.39.3",
"Werkzeug==3.0.1",
"wandb==0.17.8",
"Whoosh==2.7.4",
"xgboost==2.0.3"
]

Expand Down
18 changes: 9 additions & 9 deletions src/cehrbert/config/output_names.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
parquet_data_path = 'patient_sequence'
qualified_concept_list_path = 'qualified_concept_list'
time_attention_model_path = 'time_aware_model.h5'
bert_model_validation_path = 'bert_model.h5'
mortality_data_path = 'mortality'
heart_failure_data_path = 'heart_failure'
hospitalization_data_path = 'hospitalization'
information_content_data_path = 'information_content'
concept_similarity_path = 'concept_similarity'
PARQUET_DATA_PATH = 'patient_sequence'
QUALIFIED_CONCEPT_LIST_PATH = 'qualified_concept_list'
TIME_ATTENTION_MODEL_PATH = 'time_aware_model.h5'
BERT_MODEL_VALIDATION_PATH = 'bert_model.h5'
MORTALITY_DATA_PATH = 'mortality'
HEART_FAILURE_DATA_PATH = 'heart_failure'
HOSPITALIZATION_DATA_PATH = 'hospitalization'
INFORMATION_CONTENT_DATA_PATH = 'information_content'
CONCEPT_SIMILARITY_PATH = 'concept_similarity'
16 changes: 8 additions & 8 deletions src/cehrbert/evaluations/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def evaluate_sequence_models(args):
time_attention_tokenizer_path = find_tokenizer_path(args.time_attention_model_folder)
time_aware_model_path = os.path.join(
args.time_attention_model_folder,
p.time_attention_model_path
p.TIME_ATTENTION_MODEL_PATH
)
BiLstmModelEvaluator(
dataset=dataset,
Expand All @@ -83,7 +83,7 @@ def evaluate_sequence_models(args):
validate_folder(args.vanilla_bert_model_folder)
bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)
BertFeedForwardModelEvaluator(
dataset=dataset,
evaluation_folder=args.evaluation_folder,
Expand All @@ -108,7 +108,7 @@ def evaluate_sequence_models(args):
validate_folder(args.vanilla_bert_model_folder)
bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)
SlidingBertModelEvaluator(
dataset=dataset,
evaluation_folder=args.evaluation_folder,
Expand All @@ -134,7 +134,7 @@ def evaluate_sequence_models(args):
validate_folder(args.vanilla_bert_model_folder)
bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)
BertLstmModelEvaluator(
dataset=dataset,
evaluation_folder=args.evaluation_folder,
Expand All @@ -160,7 +160,7 @@ def evaluate_sequence_models(args):
if RANDOM_VANILLA_BERT_LSTM in args.model_evaluators:
validate_folder(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)
bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
visit_tokenizer_path = find_visit_tokenizer_path(args.vanilla_bert_model_folder)

Expand Down Expand Up @@ -195,7 +195,7 @@ def evaluate_sequence_models(args):
if HIERARCHICAL_BERT_LSTM in args.model_evaluators:
validate_folder(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)

bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_visit_tokenizer_path = find_visit_tokenizer_path(args.vanilla_bert_model_folder)
Expand Down Expand Up @@ -227,7 +227,7 @@ def evaluate_sequence_models(args):
if HIERARCHICAL_BERT_POOLING in args.model_evaluators:
validate_folder(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)

bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_visit_tokenizer_path = find_visit_tokenizer_path(args.vanilla_bert_model_folder)
Expand Down Expand Up @@ -259,7 +259,7 @@ def evaluate_sequence_models(args):
if RANDOM_HIERARCHICAL_BERT_LSTM in args.model_evaluators:
validate_folder(args.vanilla_bert_model_folder)
bert_model_path = os.path.join(args.vanilla_bert_model_folder,
p.bert_model_validation_path)
p.BERT_MODEL_VALIDATION_PATH)

bert_tokenizer_path = find_tokenizer_path(args.vanilla_bert_model_folder)
bert_visit_tokenizer_path = find_visit_tokenizer_path(args.vanilla_bert_model_folder)
Expand Down
Loading

0 comments on commit ce5c263

Please sign in to comment.