Skip to content

Commit

Permalink
fix: add magic-pdf-dev case
Browse files Browse the repository at this point in the history
  • Loading branch information
quyuan committed Sep 12, 2024
1 parent 20212a3 commit 8df8737
Show file tree
Hide file tree
Showing 13 changed files with 178 additions and 60 deletions.
4 changes: 4 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,7 @@ specific requirements.
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.



$^1$
26 changes: 26 additions & 0 deletions mv_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import os
import shutil

def move_pdfs(root_folder, destination_folder):
# 遍历根目录及其子目录中的所有文件
for root, dirs, files in os.walk(root_folder):
for file in files:
if file.endswith('.pdf'):
# 构建完整的文件路径
src_path = os.path.join(root, file)
# 构建目标路径
dst_path = os.path.join(destination_folder, file)

# 移动文件
shutil.move(src_path, dst_path)
print(f'Moved {file} to {destination_folder}')

# 使用方法
root_folder = r'D:\mineru\datasets\datasets' # 源文件夹路径
destination_folder = r'D:\mineru\datasets\pdf' # 目标文件夹路径

# 创建目标文件夹如果不存在
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)

move_pdfs(root_folder, destination_folder)
Binary file added tests.zip
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/retry_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ while true; do
# prepare env
source activate MinerU
pip install -r requirements-qa.txt
pip install magic-pdf[full]==0.7.0b1 --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
pip uninstall magic-pdf
pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
exit_code=$?
if [ $exit_code -eq 0 ]; then
Expand Down
6 changes: 3 additions & 3 deletions tests/test_cli/conf/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
conf = {
"code_path": os.environ.get('GITHUB_WORKSPACE'),
"pdf_dev_path" : os.environ.get('GITHUB_WORKSPACE') + "/tests/test_cli/pdf_dev",
"pdf_res_path": "/tmp/magic-pdf"
}

"pdf_res_path": "/tmp/magic-pdf",
"jsonl_path": "s3://llm-qatest-pnorm/mineru/test/line1.jsonl"
}
1 change: 1 addition & 0 deletions tests/test_cli/pdf_dev/line1.jsonl

Large diffs are not rendered by default.

54 changes: 0 additions & 54 deletions tests/test_cli/test_bench.py

This file was deleted.

50 changes: 50 additions & 0 deletions tests/test_cli/test_cli_sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,56 @@ def test_pdf_clit_ocr(self):
common.cli_count_folders_and_check_contents(
os.path.join(res_path, demo_name, 'ocr'))

@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_txt(self):
"""magic_pdf_dev cli local txt."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
logging.info(cmd)
os.system(cmd)


@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_ocr(self):
"""magic_pdf_dev cli local ocr."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
logging.info(cmd)
os.system(cmd)

@pytest.mark.P1
def test_pdf_dev_cli_local_jsonl_auto(self):
"""magic_pdf_dev cli local auto."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'auto')
logging.info(cmd)
os.system(cmd)

@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_txt(self):
"""magic_pdf_dev cli s3 txt."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, "txt")
logging.info(cmd)
os.system(cmd)


@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_ocr(self):
"""magic_pdf_dev cli s3 ocr."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s -m %s' % (jsonl_path, 'ocr')
logging.info(cmd)
os.system(cmd)

@pytest.mark.P1
def test_pdf_dev_cli_s3_jsonl_auto(self):
"""magic_pdf_dev cli s3 auto."""
jsonl_path = os.path.join(pdf_dev_path, 'line1.jsonl')
cmd = 'magic-pdf-dev --jsonl %s --method %s' % (jsonl_path, 'auto')
logging.info(cmd)
os.system(cmd)


if __name__ == '__main__':
pytest.main()
Empty file.
36 changes: 36 additions & 0 deletions tests/test_cli/test_performence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
test performance
"""
import os
import shutil
import json
from lib import calculate_score
import pytest
from conf import conf

code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = conf.conf["pdf_dev_path"]
pdf_res_path = conf.conf["pdf_res_path"]

class TestTable():
"""
test table
"""
def test_perf_close_table(self):
"""
test perf when close table
"""




def get_score():
"""
get score
"""
score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
score.calculate_similarity_total("mineru", pdf_dev_path)
res = score.summary_scores()
return res


54 changes: 54 additions & 0 deletions tests/test_cli/test_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
test table case
"""
import os
import shutil
import json
from lib import calculate_score
import pytest
from conf import conf

code_path = os.environ.get('GITHUB_WORKSPACE')
pdf_dev_path = conf.conf["pdf_dev_path"]
pdf_res_path = conf.conf["pdf_res_path"]

class TestTable():
"""
test table
"""
def test_paddle_table_master_cuda(self):
"""
select table: paddle table master,mode is cuda
"""
def test_paddle_table_master_cpu(self):
"""
select table: paddle table master, mode is cpu
"""
def test_st_table_cuda(self):
"""
select table: ST, mode is cuda
"""

def test_st_table_cpu(self):
"""
select table: ST, mode is cpu
"""

def test_close_table_cuda(self):
"""
close table, mode is cuda
"""




def get_score():
"""
get score
"""
score = calculate_score.Scoring(os.path.join(pdf_dev_path, "result.json"))
score.calculate_similarity_total("mineru", pdf_dev_path)
res = score.summary_scores()
return res


File renamed without changes
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

class TestppTableModel(unittest.TestCase):
def test_image2html(self):
img = Image.open("tests/test_table/assets/table.jpg")
img = Image.open("tests/unittest/test_table/assets/table.jpg")
# 修改table模型路径
config = {"device": "cuda",
"model_dir": "D:/models/PDF-Extract-Kit/models/TabRec/TableMaster"}
"model_dir": "/home/quyuan/PDF-Extract-Kit/models/TabRec/TableMaster"}
table_model = ppTableModel(config)
res = table_model.img2html(img)
true_value = """<td><table border="1"><thead><tr><td><b>Methods</b></td><td><b>R</b></td><td><b>P</b></td><td><b>F</b></td><td><b>FPS</b></td></tr></thead><tbody><tr><td>SegLink [26]</td><td>70.0</td><td>86.0</td><td>77.0</td><td>8.9</td></tr><tr><td>PixelLink [4]</td><td>73.2</td><td>83.0</td><td>77.8</td><td>-</td></tr><tr><td>TextSnake [18]</td><td>73.9</td><td>83.2</td><td>78.3</td><td>1.1</td></tr><tr><td>TextField [37]</td><td>75.9</td><td>87.4</td><td>81.3</td><td>5.2 </td></tr><tr><td>MSR[38]</td><td>76.7</td><td>87.4</td><td>81.7</td><td>-</td></tr><tr><td>FTSN[3]</td><td>77.1</td><td>87.6</td><td>82.0</td><td>-</td></tr><tr><td>LSE[30]</td><td>81.7</td><td>84.2</td><td>82.9</td><td>-</td></tr><tr><td>CRAFT [2]</td><td>78.2</td><td>88.2</td><td>82.9</td><td>8.6</td></tr><tr><td>MCN [16]</td><td>79</td><td>88.</td><td>83</td><td>-</td></tr><tr><td>ATRR[35]</td><td>82.1</td><td>85.2</td><td>83.6</td><td>-</td></tr><tr><td>PAN [34]</td><td>83.8</td><td>84.4</td><td>84.1</td><td>30.2</td></tr><tr><td>DB[12]</td><td>79.2</td><td>91.5</td><td>84.9</td><td>32.0</td></tr><tr><td>DRRG [41]</td><td>82.30</td><td>88.05</td><td>85.08</td><td>-</td></tr><tr><td>Ours (SynText)</td><td>80.68</td><td>85.40</td><td>82.97</td><td>12.68</td></tr><tr><td>Ours (MLT-17)</td><td>84.54</td><td>86.62</td><td>85.57</td><td>12.31</td></tr></tbody></table></td>\n"""
Expand Down

0 comments on commit 8df8737

Please sign in to comment.