Skip to content

Commit

Permalink
Web.Pandoc: refactor reader selection
Browse files Browse the repository at this point in the history
Input file formats are no longer restricted to the variants of a union
on Hakyll's side (Web.Pandoc.FileType) which must be updated whenever
Pandoc adds a new input format. Instead, the getReader function from
Pandoc is used, in conjunction with a file-extension-synonym mapping
similar to the one used by Pandoc's command-line application.

(TODO raise an issue on Pandoc to factor their filename-to-reader/writer
translation into a public API.)

With the recent addition of non-textual input formats, there is now also
a split between String and Lazy ByteString input; pandocCompiler handles
this under-the-hood, and for other usecases there is a new function
readPandocLBSWith, whose input must be from Compiler.getResourceLBS
instead of Compiler.getResourceBody.

Since the only lossless reading of binary data is to read it immediately
to a ByteString, it would have been ideal for readPandoc and
readPandocWith to have accepted "Item ByteString", but changing this
would be even more breaking than removing FileType.

Breaks anyone who depended on FileType.
  • Loading branch information
mmirate committed Jul 10, 2017
1 parent a0a819a commit 12ad66e
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 129 deletions.
2 changes: 0 additions & 2 deletions hakyll.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ Library
Hakyll.Web.Paginate
Hakyll.Web.Pandoc
Hakyll.Web.Pandoc.Biblio
Hakyll.Web.Pandoc.FileType
Hakyll.Web.Redirect
Hakyll.Web.Tags
Hakyll.Web.Template
Expand Down Expand Up @@ -236,7 +235,6 @@ Test-suite hakyll-tests
Hakyll.Web.CompressCss.Tests
Hakyll.Web.Html.RelativizeUrls.Tests
Hakyll.Web.Html.Tests
Hakyll.Web.Pandoc.FileType.Tests
Hakyll.Web.Template.Context.Tests
Hakyll.Web.Template.Tests
TestSuite.Util
Expand Down
2 changes: 0 additions & 2 deletions lib/Hakyll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ module Hakyll
, module Hakyll.Web.Pandoc
, module Hakyll.Web.Paginate
, module Hakyll.Web.Pandoc.Biblio
, module Hakyll.Web.Pandoc.FileType
, module Hakyll.Web.Redirect
, module Hakyll.Web.Tags
, module Hakyll.Web.Template
Expand Down Expand Up @@ -54,7 +53,6 @@ import Hakyll.Web.Html.RelativizeUrls
import Hakyll.Web.Paginate
import Hakyll.Web.Pandoc
import Hakyll.Web.Pandoc.Biblio
import Hakyll.Web.Pandoc.FileType
import Hakyll.Web.Redirect
import Hakyll.Web.Tags
import Hakyll.Web.Template
Expand Down
134 changes: 111 additions & 23 deletions lib/Hakyll/Web/Pandoc.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ module Hakyll.Web.Pandoc
( -- * The basic building blocks
readPandoc
, readPandocWith
, readPandocLBSWith
, writePandoc
, writePandocWith
, renderPandoc
Expand All @@ -23,14 +24,20 @@ module Hakyll.Web.Pandoc

--------------------------------------------------------------------------------
import qualified Data.Set as S
import Data.Char (toLower)
import Data.Bifunctor (second)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TLE
import Text.Pandoc
import Text.Pandoc.Error (PandocError (..))
import System.FilePath (splitExtension)


--------------------------------------------------------------------------------
import Hakyll.Core.Compiler
import Hakyll.Core.Identifier
import Hakyll.Core.Item
import Hakyll.Web.Pandoc.FileType


--------------------------------------------------------------------------------
Expand All @@ -40,6 +47,72 @@ readPandoc
-> Compiler (Item Pandoc) -- ^ Resulting document
readPandoc = readPandocWith defaultHakyllReaderOptions

--------------------------------------------------------------------------------
-- | Get the pandoc reader/writer name for a filename, determined by extension.
-- Adapted from https://github.com/jgm/pandoc/blob/1.19.2.1/pandoc.hs#L1015
defaultRWName :: String -> FilePath -> String
defaultRWName fallback =
uncurry defaultRWName' . second (map toLower) . splitExtension
where
defaultRWName' _ ".adoc" = "asciidoc"
defaultRWName' _ ".ctx" = "context"
defaultRWName' _ ".db" = "docbook"
defaultRWName' _ ".dbk" = "docbook" -- Hakyll backwards compatibility
defaultRWName' _ ".htm" = "html"
defaultRWName' _ ".xhtml" = "html"
defaultRWName' f ".lhs" = (++"+lhs") $
case defaultRWName fallback f of
-- If no extension is given, default to Markdown + LiterateHaskell
"" -> "markdown"
-- Otherwise, LaTeX + LiterateHaskell or whatever the user specified
x -> x
defaultRWName' _ ".md" = "markdown"
defaultRWName' _ ".mdn" = "markdown" -- Hakyll backwards compatibility
defaultRWName' _ ".mdown" = "markdown" -- Hakyll backwards compatibility
defaultRWName' _ ".mdwn" = "markdown" -- Hakyll backwards compatibility
defaultRWName' _ ".mkd" = "markdown" -- Hakyll backwards compatibility
defaultRWName' _ ".mkdwn" = "markdown" -- Hakyll backwards compatibility
defaultRWName' _ ".page" = "markdown"
--defaultRWName' _ ".pdf" = "latex"
defaultRWName' _ ".roff" = "ms"
defaultRWName' _ ".tex" = "latex"
defaultRWName' _ ".ltx" = "latex"
defaultRWName' _ ".texi" = "texinfo"
defaultRWName' _ ".text" = "plain"
defaultRWName' _ ".txt" = "plain"
defaultRWName' _ ".wiki" = "mediawiki"
defaultRWName' _ ['.',y] | y `elem` ['1'..'9'] = "man"
defaultRWName' _ ('.':xs) = xs
defaultRWName' _ _ = fallback

--------------------------------------------------------------------------------
defaultReaderName :: Identifier -> String
defaultReaderName = defaultRWName "markdown" . toFilePath

--------------------------------------------------------------------------------
--defaultWriterName :: Identifier -> String
--defaultWriterName = defaultRWName "html" . toFilePath

--------------------------------------------------------------------------------
getReaderForIdentifier :: Identifier -> (String, Reader)
getReaderForIdentifier ident =
let readerName = defaultReaderName ident in
case getReader readerName of
Left _ -> error $
"Hakyll.Web.readPandocWith: I don't know how to read a file of " ++
"the type " ++ show readerName ++ " for: " ++ show ident
Right x -> (readerName, x)

--------------------------------------------------------------------------------
-- | Transform the Either monad to the Compiler monad
processPandocResult :: Item (Either PandocError Pandoc) -> Compiler (Item Pandoc)
processPandocResult = traverse inner
where
inner (Left (ParseFailure err)) = fail $
"Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ err
inner (Left (ParsecError _ err)) = fail $
"Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ show err
inner (Right item') = return item'

--------------------------------------------------------------------------------
-- | Read a string using pandoc, with the supplied options
Expand All @@ -48,29 +121,32 @@ readPandocWith
-> Item String -- ^ String to read
-> Compiler (Item Pandoc) -- ^ Resulting document
readPandocWith ropt item =
case traverse (reader ropt (itemFileType item)) item of
Left (ParseFailure err) -> fail $
"Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ err
Left (ParsecError _ err) -> fail $
"Hakyll.Web.Pandoc.readPandocWith: parse failed: " ++ show err
Right item' -> return item'
processPandocResult =<<
(unsafeCompiler $ traverse (reader ropt (itemIdentifier item)) item)
where
reader ro t = case t of
DocBook -> readDocBook ro
Html -> readHtml ro
LaTeX -> readLaTeX ro
LiterateHaskell t' -> reader (addExt ro Ext_literate_haskell) t'
Markdown -> readMarkdown ro
MediaWiki -> readMediaWiki ro
OrgMode -> readOrg ro
Rst -> readRST ro
Textile -> readTextile ro
_ -> error $
"Hakyll.Web.readPandocWith: I don't know how to read a file of " ++
"the type " ++ show t ++ " for: " ++ show (itemIdentifier item)

addExt ro e = ro {readerExtensions = S.insert e $ readerExtensions ro}
reader ro i = case getReaderForIdentifier i of
(_, StringReader r) -> r ro
(ext, ByteStringReader _) -> error $
"Hakyll.Web.readPandocWith: files of the type " ++ (show ext) ++
" must be read by the ByteString-capable reader, for: " ++ (show i)
-- graceful degradation is impossible, as the UTF8 encoding mangles
-- any invalid characters; at best, could try rereading the file?

--------------------------------------------------------------------------------
-- | Read a bytestring using pandoc, with the supplied options; gracefully
-- decodes the bytestring into a String whenever accidentally used for a
-- non-binary input format
readPandocLBSWith
:: ReaderOptions -- ^ Parser options
-> Item BL.ByteString -- ^ ByteString to read
-> Compiler (Item Pandoc) -- ^ Resulting document
readPandocLBSWith ropt item =
processPandocResult =<<
(unsafeCompiler $ traverse (reader ropt (itemIdentifier item)) item)
where
reader ro i = case getReaderForIdentifier i of
(_, StringReader r) -> r ro . TL.unpack . TLE.decodeUtf8
(_, ByteStringReader r) -> \bs -> (return . second fst) =<< r ro bs

--------------------------------------------------------------------------------
-- | Write a document (as HTML) using pandoc, with the default options
Expand Down Expand Up @@ -138,7 +214,19 @@ pandocCompilerWithTransformM :: ReaderOptions -> WriterOptions
-> Compiler (Item String)
pandocCompilerWithTransformM ropt wopt f =
writePandocWith wopt <$>
(traverse f =<< readPandocWith ropt =<< getResourceBody)
(traverse f =<< useUnderlyingReader)
where
getUnderlyingReader :: Compiler (Either String Reader)
getUnderlyingReader = getReader . defaultReaderName <$> getUnderlying
innerRead :: Either String Reader -> Compiler (Item Pandoc)
innerRead (Left _) =
readPandocLBSWith ropt =<< getResourceLBS
innerRead (Right (ByteStringReader _)) =
readPandocLBSWith ropt =<< getResourceLBS
innerRead (Right (StringReader _)) =
readPandocWith ropt =<< getResourceBody
useUnderlyingReader :: Compiler (Item Pandoc)
useUnderlyingReader = getUnderlyingReader >>= innerRead


--------------------------------------------------------------------------------
Expand Down
74 changes: 0 additions & 74 deletions lib/Hakyll/Web/Pandoc/FileType.hs

This file was deleted.

26 changes: 0 additions & 26 deletions tests/Hakyll/Web/Pandoc/FileType/Tests.hs

This file was deleted.

2 changes: 0 additions & 2 deletions tests/TestSuite.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ import qualified Hakyll.Core.Util.String.Tests
import qualified Hakyll.Web.CompressCss.Tests
import qualified Hakyll.Web.Html.RelativizeUrls.Tests
import qualified Hakyll.Web.Html.Tests
import qualified Hakyll.Web.Pandoc.FileType.Tests
import qualified Hakyll.Web.Template.Context.Tests
import qualified Hakyll.Web.Template.Tests

Expand All @@ -43,7 +42,6 @@ main = defaultMain $ testGroup "Hakyll"
, Hakyll.Web.CompressCss.Tests.tests
, Hakyll.Web.Html.RelativizeUrls.Tests.tests
, Hakyll.Web.Html.Tests.tests
, Hakyll.Web.Pandoc.FileType.Tests.tests
, Hakyll.Web.Template.Context.Tests.tests
, Hakyll.Web.Template.Tests.tests
]

0 comments on commit 12ad66e

Please sign in to comment.