From ac3ea3af745b69a05d4d8df105a789473ba3ef12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=8A=9F?= Date: Thu, 26 Oct 2023 18:27:43 -0700 Subject: [PATCH] text and image examples --- examples/extract_images.rs | 31 +++++++++++++++++++++++++++++++ examples/extract_text.rs | 21 +++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 examples/extract_images.rs create mode 100644 examples/extract_text.rs diff --git a/examples/extract_images.rs b/examples/extract_images.rs new file mode 100644 index 0000000..51afc23 --- /dev/null +++ b/examples/extract_images.rs @@ -0,0 +1,31 @@ +use std::io::Write; + +fn main() -> Result<(), Box> { + let filename: String = std::env::args() + .collect::>() + .get(1) + .expect("missing filename") + .to_owned(); + let document = mupdf::document::Document::open(&filename)?; + + let mut image_num: u32 = 0; + + for page in document.pages()? { + let text_page = page?.to_text_page(mupdf::text_page::TextPageOptions::PRESERVE_IMAGES)?; + + for block in text_page.blocks() { + if let Some(image) = block.image() { + let pixmap = image.to_pixmap()?; + let mut bytes: Vec = vec![]; + pixmap.write_to(&mut bytes, mupdf::pixmap::ImageFormat::PNG)?; + + let mut output_file = std::fs::File::create(format!("output_{}.png", image_num))?; + output_file.write_all(&bytes)?; + + image_num += 1; + } + } + } + + Ok(()) +} diff --git a/examples/extract_text.rs b/examples/extract_text.rs new file mode 100644 index 0000000..5db52d5 --- /dev/null +++ b/examples/extract_text.rs @@ -0,0 +1,21 @@ +fn main() -> Result<(), Box> { + let filename: String = std::env::args() + .collect::>() + .get(1) + .expect("missing filename") + .to_owned(); + let document = mupdf::document::Document::open(&filename)?; + + for page in document.pages()? { + let text_page = page?.to_text_page(mupdf::text_page::TextPageOptions::empty())?; + + for block in text_page.blocks() { + for line in block.lines() { + let chars: String = line.chars().map(|c| c.char().unwrap()).collect(); + println!("line: {}", chars); + } + } + } + + Ok(()) +}