diff --git a/hakyll.cabal b/hakyll.cabal index 969f50c4f..fea35bb0e 100644 --- a/hakyll.cabal +++ b/hakyll.cabal @@ -130,7 +130,6 @@ Library Hakyll.Web.Paginate Hakyll.Web.Pandoc Hakyll.Web.Pandoc.Biblio - Hakyll.Web.Pandoc.FileType Hakyll.Web.Redirect Hakyll.Web.Tags Hakyll.Web.Template @@ -236,7 +235,6 @@ Test-suite hakyll-tests Hakyll.Web.CompressCss.Tests Hakyll.Web.Html.RelativizeUrls.Tests Hakyll.Web.Html.Tests - Hakyll.Web.Pandoc.FileType.Tests Hakyll.Web.Template.Context.Tests Hakyll.Web.Template.Tests TestSuite.Util diff --git a/lib/Hakyll.hs b/lib/Hakyll.hs index 7b64bcb8b..e517e8c0d 100644 --- a/lib/Hakyll.hs +++ b/lib/Hakyll.hs @@ -23,7 +23,6 @@ module Hakyll , module Hakyll.Web.Pandoc , module Hakyll.Web.Paginate , module Hakyll.Web.Pandoc.Biblio - , module Hakyll.Web.Pandoc.FileType , module Hakyll.Web.Redirect , module Hakyll.Web.Tags , module Hakyll.Web.Template @@ -54,7 +53,6 @@ import Hakyll.Web.Html.RelativizeUrls import Hakyll.Web.Paginate import Hakyll.Web.Pandoc import Hakyll.Web.Pandoc.Biblio -import Hakyll.Web.Pandoc.FileType import Hakyll.Web.Redirect import Hakyll.Web.Tags import Hakyll.Web.Template diff --git a/lib/Hakyll/Web/Pandoc.hs b/lib/Hakyll/Web/Pandoc.hs index eec0a8a9a..25b975fdc 100644 --- a/lib/Hakyll/Web/Pandoc.hs +++ b/lib/Hakyll/Web/Pandoc.hs @@ -4,6 +4,7 @@ module Hakyll.Web.Pandoc ( -- * The basic building blocks readPandoc , readPandocWith + , readPandocLBSWith , writePandoc , writePandocWith , renderPandoc @@ -23,14 +24,20 @@ module Hakyll.Web.Pandoc -------------------------------------------------------------------------------- import qualified Data.Set as S +import Data.Char (toLower) +import Data.Bifunctor (second) +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TLE import Text.Pandoc import Text.Pandoc.Error (PandocError (..)) +import System.FilePath (splitExtension) -------------------------------------------------------------------------------- import Hakyll.Core.Compiler +import Hakyll.Core.Identifier import Hakyll.Core.Item -import Hakyll.Web.Pandoc.FileType -------------------------------------------------------------------------------- @@ -40,6 +47,72 @@ readPandoc -> Compiler (Item Pandoc) -- ^ Resulting document readPandoc = readPandocWith defaultHakyllReaderOptions +-------------------------------------------------------------------------------- +-- | Get the pandoc reader/writer name for a filename, determined by extension. +-- Adapted from https://github.com/jgm/pandoc/blob/1.19.2.1/pandoc.hs#L1015 +defaultRWName :: String -> FilePath -> String +defaultRWName fallback = + uncurry defaultRWName' . second (map toLower) . splitExtension + where + defaultRWName' _ ".adoc" = "asciidoc" + defaultRWName' _ ".ctx" = "context" + defaultRWName' _ ".db" = "docbook" + defaultRWName' _ ".dbk" = "docbook" -- Hakyll backwards compatibility + defaultRWName' _ ".htm" = "html" + defaultRWName' _ ".xhtml" = "html" + defaultRWName' f ".lhs" = (++"+lhs") $ + case defaultRWName fallback f of + -- If no extension is given, default to Markdown + LiterateHaskell + "" -> "markdown" + -- Otherwise, LaTeX + LiterateHaskell or whatever the user specified + x -> x + defaultRWName' _ ".md" = "markdown" + defaultRWName' _ ".mdn" = "markdown" -- Hakyll backwards compatibility + defaultRWName' _ ".mdown" = "markdown" -- Hakyll backwards compatibility + defaultRWName' _ ".mdwn" = "markdown" -- Hakyll backwards compatibility + defaultRWName' _ ".mkd" = "markdown" -- Hakyll backwards compatibility + defaultRWName' _ ".mkdwn" = "markdown" -- Hakyll backwards compatibility + defaultRWName' _ ".page" = "markdown" + --defaultRWName' _ ".pdf" = "latex" + defaultRWName' _ ".roff" = "ms" + defaultRWName' _ ".tex" = "latex" + defaultRWName' _ ".ltx" = "latex" + defaultRWName' _ ".texi" = "texinfo" + defaultRWName' _ ".text" = "plain" + defaultRWName' _ ".txt" = "plain" + defaultRWName' _ ".wiki" = "mediawiki" + defaultRWName' _ ['.',y] | y `elem` ['1'..'9'] = "man" + defaultRWName' _ ('.':xs) = xs + defaultRWName' _ _ = fallback + +-------------------------------------------------------------------------------- +defaultReaderName :: Identifier -> String +defaultReaderName = defaultRWName "markdown" . toFilePath + +-------------------------------------------------------------------------------- +--defaultWriterName :: Identifier -> String +--defaultWriterName = defaultRWName "html" . toFilePath + +-------------------------------------------------------------------------------- +getReaderForIdentifier :: Identifier -> (String, Reader) +getReaderForIdentifier ident = + let readerName = defaultReaderName ident in + case getReader readerName of + Left _ -> error $ + "Hakyll.Web.readPandocWith: I don't know how to read a file of " ++ + "the type " ++ show readerName ++ " for: " ++ show ident + Right x -> (readerName, x) + +-------------------------------------------------------------------------------- +-- | Transform the Either monad to the Compiler monad +processPandocResult :: Item (Either PandocError Pandoc) -> Compiler (Item Pandoc) +processPandocResult = traverse inner + where + inner (Left (ParseFailure err)) = fail $ + "Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ err + inner (Left (ParsecError _ err)) = fail $ + "Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ show err + inner (Right item') = return item' -------------------------------------------------------------------------------- -- | Read a string using pandoc, with the supplied options @@ -48,29 +121,32 @@ readPandocWith -> Item String -- ^ String to read -> Compiler (Item Pandoc) -- ^ Resulting document readPandocWith ropt item = - case traverse (reader ropt (itemFileType item)) item of - Left (ParseFailure err) -> fail $ - "Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ err - Left (ParsecError _ err) -> fail $ - "Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ show err - Right item' -> return item' + processPandocResult =<< + (unsafeCompiler $ traverse (reader ropt (itemIdentifier item)) item) where - reader ro t = case t of - DocBook -> readDocBook ro - Html -> readHtml ro - LaTeX -> readLaTeX ro - LiterateHaskell t' -> reader (addExt ro Ext_literate_haskell) t' - Markdown -> readMarkdown ro - MediaWiki -> readMediaWiki ro - OrgMode -> readOrg ro - Rst -> readRST ro - Textile -> readTextile ro - _ -> error $ - "Hakyll.Web.readPandocWith: I don't know how to read a file of " ++ - "the type " ++ show t ++ " for: " ++ show (itemIdentifier item) - - addExt ro e = ro {readerExtensions = S.insert e $ readerExtensions ro} + reader ro i = case getReaderForIdentifier i of + (_, StringReader r) -> r ro + (ext, ByteStringReader _) -> error $ + "Hakyll.Web.readPandocWith: files of the type " ++ (show ext) ++ + " must be read by the ByteString-capable reader, for: " ++ (show i) + -- graceful degradation is impossible, as the UTF8 encoding mangles + -- any invalid characters; at best, could try rereading the file? +-------------------------------------------------------------------------------- +-- | Read a bytestring using pandoc, with the supplied options; gracefully +-- decodes the bytestring into a String whenever accidentally used for a +-- non-binary input format +readPandocLBSWith + :: ReaderOptions -- ^ Parser options + -> Item BL.ByteString -- ^ ByteString to read + -> Compiler (Item Pandoc) -- ^ Resulting document +readPandocLBSWith ropt item = + processPandocResult =<< + (unsafeCompiler $ traverse (reader ropt (itemIdentifier item)) item) + where + reader ro i = case getReaderForIdentifier i of + (_, StringReader r) -> r ro . TL.unpack . TLE.decodeUtf8 + (_, ByteStringReader r) -> \bs -> (return . second fst) =<< r ro bs -------------------------------------------------------------------------------- -- | Write a document (as HTML) using pandoc, with the default options @@ -138,7 +214,19 @@ pandocCompilerWithTransformM :: ReaderOptions -> WriterOptions -> Compiler (Item String) pandocCompilerWithTransformM ropt wopt f = writePandocWith wopt <$> - (traverse f =<< readPandocWith ropt =<< getResourceBody) + (traverse f =<< useUnderlyingReader) + where + getUnderlyingReader :: Compiler (Either String Reader) + getUnderlyingReader = getReader . defaultReaderName <$> getUnderlying + innerRead :: Either String Reader -> Compiler (Item Pandoc) + innerRead (Left _) = + readPandocLBSWith ropt =<< getResourceLBS + innerRead (Right (ByteStringReader _)) = + readPandocLBSWith ropt =<< getResourceLBS + innerRead (Right (StringReader _)) = + readPandocWith ropt =<< getResourceBody + useUnderlyingReader :: Compiler (Item Pandoc) + useUnderlyingReader = getUnderlyingReader >>= innerRead -------------------------------------------------------------------------------- diff --git a/lib/Hakyll/Web/Pandoc/FileType.hs b/lib/Hakyll/Web/Pandoc/FileType.hs deleted file mode 100644 index 3636e41b2..000000000 --- a/lib/Hakyll/Web/Pandoc/FileType.hs +++ /dev/null @@ -1,74 +0,0 @@ --------------------------------------------------------------------------------- --- | A module dealing with pandoc file extensions and associated file types -module Hakyll.Web.Pandoc.FileType - ( FileType (..) - , fileType - , itemFileType - ) where - - --------------------------------------------------------------------------------- -import System.FilePath (splitExtension) - - --------------------------------------------------------------------------------- -import Hakyll.Core.Identifier -import Hakyll.Core.Item - - --------------------------------------------------------------------------------- --- | Datatype to represent the different file types Hakyll can deal with by --- default -data FileType - = Binary - | Css - | DocBook - | Html - | LaTeX - | LiterateHaskell FileType - | Markdown - | MediaWiki - | OrgMode - | PlainText - | Rst - | Textile - deriving (Eq, Ord, Show, Read) - - --------------------------------------------------------------------------------- --- | Get the file type for a certain file. The type is determined by extension. -fileType :: FilePath -> FileType -fileType = uncurry fileType' . splitExtension - where - fileType' _ ".css" = Css - fileType' _ ".dbk" = DocBook - fileType' _ ".htm" = Html - fileType' _ ".html" = Html - fileType' f ".lhs" = LiterateHaskell $ case fileType f of - -- If no extension is given, default to Markdown + LiterateHaskell - Binary -> Markdown - -- Otherwise, LaTeX + LiterateHaskell or whatever the user specified - x -> x - fileType' _ ".markdown" = Markdown - fileType' _ ".mediawiki" = MediaWiki - fileType' _ ".md" = Markdown - fileType' _ ".mdn" = Markdown - fileType' _ ".mdown" = Markdown - fileType' _ ".mdwn" = Markdown - fileType' _ ".mkd" = Markdown - fileType' _ ".mkdwn" = Markdown - fileType' _ ".org" = OrgMode - fileType' _ ".page" = Markdown - fileType' _ ".rst" = Rst - fileType' _ ".tex" = LaTeX - fileType' _ ".text" = PlainText - fileType' _ ".textile" = Textile - fileType' _ ".txt" = PlainText - fileType' _ ".wiki" = MediaWiki - fileType' _ _ = Binary -- Treat unknown files as binary - - --------------------------------------------------------------------------------- --- | Get the file type for the current file -itemFileType :: Item a -> FileType -itemFileType = fileType . toFilePath . itemIdentifier diff --git a/tests/Hakyll/Web/Pandoc/FileType/Tests.hs b/tests/Hakyll/Web/Pandoc/FileType/Tests.hs deleted file mode 100644 index 779710724..000000000 --- a/tests/Hakyll/Web/Pandoc/FileType/Tests.hs +++ /dev/null @@ -1,26 +0,0 @@ --------------------------------------------------------------------------------- -{-# LANGUAGE OverloadedStrings #-} -module Hakyll.Web.Pandoc.FileType.Tests - ( tests - ) where - - --------------------------------------------------------------------------------- -import Test.Tasty (TestTree, testGroup) -import Test.Tasty.HUnit ((@=?)) - - --------------------------------------------------------------------------------- -import Hakyll.Web.Pandoc.FileType -import TestSuite.Util - - --------------------------------------------------------------------------------- -tests :: TestTree -tests = testGroup "Hakyll.Web.Pandoc.FileType.Tests" $ - fromAssertions "fileType" - [ Markdown @=? fileType "index.md" - , Rst @=? fileType "about/foo.rst" - , LiterateHaskell Markdown @=? fileType "posts/bananas.lhs" - , LiterateHaskell LaTeX @=? fileType "posts/bananas.tex.lhs" - ] diff --git a/tests/TestSuite.hs b/tests/TestSuite.hs index 4260eabe5..4c62c2f8a 100644 --- a/tests/TestSuite.hs +++ b/tests/TestSuite.hs @@ -22,7 +22,6 @@ import qualified Hakyll.Core.Util.String.Tests import qualified Hakyll.Web.CompressCss.Tests import qualified Hakyll.Web.Html.RelativizeUrls.Tests import qualified Hakyll.Web.Html.Tests -import qualified Hakyll.Web.Pandoc.FileType.Tests import qualified Hakyll.Web.Template.Context.Tests import qualified Hakyll.Web.Template.Tests @@ -43,7 +42,6 @@ main = defaultMain $ testGroup "Hakyll" , Hakyll.Web.CompressCss.Tests.tests , Hakyll.Web.Html.RelativizeUrls.Tests.tests , Hakyll.Web.Html.Tests.tests - , Hakyll.Web.Pandoc.FileType.Tests.tests , Hakyll.Web.Template.Context.Tests.tests , Hakyll.Web.Template.Tests.tests ]