diff --git a/src/Smalot/PdfParser/PDFObject.php b/src/Smalot/PdfParser/PDFObject.php index 2f8f0396..07aff537 100644 --- a/src/Smalot/PdfParser/PDFObject.php +++ b/src/Smalot/PdfParser/PDFObject.php @@ -187,13 +187,12 @@ public function cleanContent(?string $content): string // Now that all strings and dictionaries are hidden, the only // PDF commands left should all be plain text. - // Detect MIME-type of the current string and prevent reading + // Detect text encoding of the current string to prevent reading // content streams that are images, etc. This prevents PHP // error messages when JPEG content is sent to this function // by the sample file '12249.pdf' from: // https://github.com/smalot/pdfparser/issues/458 - $finfo = new \finfo(\FILEINFO_MIME); - if (false === strpos($finfo->buffer($content), 'text/plain')) { + if (false === mb_detect_encoding($content, null, true)) { return ''; }