Skip to content

Commit

Permalink
feat: add magic-pdf-dev case
Browse files Browse the repository at this point in the history
  • Loading branch information
quyuan committed Sep 12, 2024
1 parent fea2b7b commit 0142085
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test
run: |
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
source ~/.bashrc && cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
Expand Down
File renamed without changes.
3 changes: 2 additions & 1 deletion tests/test_cli/conf/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"code_path": os.environ.get('GITHUB_WORKSPACE'),
"pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
"pdf_res_path": "/tmp/magic-pdf",
"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl"
"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl",
"s3_pdf_path": "s3://llm-qatest-pnorm/mineru/test/test.pdf"
}
20 changes: 19 additions & 1 deletion tests/test_cli/test_cli_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import magic_pdf.model as model_config
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter

from magic_pdf.rw.S3ReaderWriter import S3ReaderWriter
model_config.__use_inside_model__ = True
pdf_res_path = conf.conf['pdf_res_path']
code_path = conf.conf['code_path']
Expand Down Expand Up @@ -248,6 +248,24 @@ def test_pdf_dev_cli_pdf_json_ocr(self):
os.system(cmd)


@pytest.mark.P1
def test_s3_sdk_suto(self):
pdf_ak = os.environ.get('pdf_ak', "")
pdf_sk = os.environ.get('pdf_sk', "")
pdf_bucket = os.environ.get('bucket', "")
pdf_endpoint = os.environ.get('pdf_endpoint', "")
s3_pdf_path = conf.conf["s3_pdf_path"]
image_dir = "s3://" + pdf_bucket + "/mineru/test/test.md"
s3pdf_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint)
s3image_cli = S3ReaderWriter(pdf_ak, pdf_sk, pdf_endpoint, parent_path=image_dir)
pdf_bytes = s3pdf_cli.read(s3_pdf_path, mode=s3pdf_cli.MODE_BIN)
jso_useful_key = {"_pdf_type": "", "model_list": []}
pipe = UNIPipe(pdf_bytes, jso_useful_key, s3image_cli)
pipe.pipe_classify()
pipe.pipe_analyze()
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
assert len(md_content) > 0


if __name__ == '__main__':
Expand Down

0 comments on commit 0142085

Please sign in to comment.