{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeFamilies #-}
module Text.HTML.TagStream.ByteString where

import qualified Blaze.ByteString.Builder as B
import           Control.Applicative
import           Control.Monad (unless)
import qualified Control.Monad.Fail as Fail
import           Data.Attoparsec.ByteString.Char8
import           Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import           Data.Conduit
import qualified Data.Conduit.List as CL
import           Data.Default
import           Data.Functor.Identity (runIdentity)
import           Data.Monoid
import           Data.Text.Encoding
import qualified Text.XML.Stream.Parse as XML

import           Text.HTML.TagStream.Entities
import           Text.HTML.TagStream.Types
import           Text.HTML.TagStream.Utils (splitAccum)

type Token = Token' ByteString
type Attr = Attr' ByteString

{--
 - match quoted string, can fail.
 -}
quoted :: Char -> Parser ByteString
quoted :: Char -> Parser ByteString
quoted Char
q = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'\\',Char
q))
                    Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q Parser Char -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
""
                      Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\\' Parser Char -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 (Char -> Parser ByteString
quoted Char
q) )

quotedOr :: Parser ByteString -> Parser ByteString
quotedOr :: Parser ByteString -> Parser ByteString
quotedOr Parser ByteString
p = Parser Char -> Parser (Maybe Char)
forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'"',Char
'\''))) Parser (Maybe Char)
-> (Maybe Char -> Parser ByteString) -> Parser ByteString
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
             Parser ByteString
-> (Char -> Parser ByteString) -> Maybe Char -> Parser ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser ByteString
p Char -> Parser ByteString
quoted

{--
 - attribute value, can't fail.
 -}
attrValue :: Parser ByteString
attrValue :: Parser ByteString
attrValue = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - attribute name, at least one char, can fail when meet tag end.
 - might match self-close tag end "/>" , make sure match `tagEnd' first.
 -}
attrName :: Parser ByteString
attrName :: Parser ByteString
attrName = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$
             Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'>')
                    Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'/',Char
'>',Char
'=') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)

{--
 - tag end, return self-close or not, can fail.
 -}
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char Char
'>' Parser Char -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
     Parser Bool -> Parser Bool -> Parser Bool
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"/>" Parser ByteString -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True

{--
 - attribute pair or tag end, can fail if tag end met.
 -}
attr :: Parser Attr
attr :: Parser Attr
attr = (,) (ByteString -> ByteString -> Attr)
-> Parser ByteString -> Parser ByteString (ByteString -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
attrName Parser ByteString (ByteString -> Attr)
-> Parser ByteString () -> Parser ByteString (ByteString -> Attr)
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace
           Parser ByteString (ByteString -> Attr)
-> Parser ByteString -> Parser Attr
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Parser Char -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char Char
'=') Parser Bool -> (Bool -> Parser ByteString) -> Parser ByteString
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                 Parser ByteString -> Parser ByteString -> Bool -> Parser ByteString
forall a. a -> a -> Bool -> a
cond (Parser ByteString ()
skipSpace Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString
attrValue)
                      (ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
"")
               )

{--
 - all attributes before tag end. can't fail.
 -}
attrs :: Parser ([Attr], Bool)
attrs :: Parser ([Attr], Bool)
attrs = [Attr] -> Parser ([Attr], Bool)
loop []
  where
    loop :: [Attr] -> Parser ([Attr], Bool)
loop [Attr]
acc = Parser ByteString ()
skipSpace Parser ByteString ()
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> Either Bool Attr
forall a b. a -> Either a b
Left (Bool -> Either Bool Attr)
-> Parser Bool -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Attr -> Either Bool Attr
forall a b. b -> Either a b
Right (Attr -> Either Bool Attr)
-> Parser Attr -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Attr
attr) Parser ByteString (Either Bool Attr)
-> (Either Bool Attr -> Parser ([Attr], Bool))
-> Parser ([Attr], Bool)
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
               (Bool -> Parser ([Attr], Bool))
-> (Attr -> Parser ([Attr], Bool))
-> Either Bool Attr
-> Parser ([Attr], Bool)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
                 (([Attr], Bool) -> Parser ([Attr], Bool)
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (([Attr], Bool) -> Parser ([Attr], Bool))
-> (Bool -> ([Attr], Bool)) -> Bool -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Attr] -> [Attr]
forall a. [a] -> [a]
reverse [Attr]
acc,))
                 ([Attr] -> Parser ([Attr], Bool)
loop ([Attr] -> Parser ([Attr], Bool))
-> (Attr -> [Attr]) -> Attr -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
:[Attr]
acc))

{--
 - comment tag without prefix.
 -}
comment :: Parser Token
comment :: Parser Token
comment = ByteString -> Token
forall s. s -> Token' s
Comment (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
comment'
  where comment' :: Parser ByteString
comment' = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-')
                            Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( ByteString -> Parser ByteString
string ByteString
"-->" Parser ByteString -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
""
                              Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 Parser ByteString
comment' )

{--
 - tags begine with <! , e.g. <!DOCTYPE ...>
 -}
special :: Parser Token
special :: Parser Token
special = ByteString -> ByteString -> Token
forall s. s -> s -> Token' s
Special
          (ByteString -> ByteString -> Token)
-> Parser ByteString -> Parser ByteString (ByteString -> Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
                       Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
                       Parser ByteString -> Parser ByteString () -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace )
          Parser ByteString (ByteString -> Token)
-> Parser ByteString -> Parser Token
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') Parser Token -> Parser Char -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'

{--
 - parse a tag, can fail.
 -}
tag :: Parser Token
tag :: Parser Token
tag = do
    TagType
t <-    ByteString -> Parser ByteString
string ByteString
"/"     Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
        Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"!" Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
        Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
    case TagType
t of
        TagType
TagTypeClose ->
            ByteString -> Token
forall s. s -> Token' s
TagClose (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>')
            Parser Token -> Parser Char -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
        TagType
TagTypeSpecial -> Parser ByteString -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (ByteString -> Parser ByteString
string ByteString
"--") Parser Bool -> (Bool -> Parser Token) -> Parser Token
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                          Parser Token -> Parser Token -> Bool -> Parser Token
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
        TagType
TagTypeNormal -> do
            ByteString
name <- (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'<',Char
'>',Char
'/') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
            ([Attr]
as, Bool
close) <- Parser ([Attr], Bool)
attrs
            Token -> Parser Token
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ ByteString -> [Attr] -> Bool -> Token
forall s. s -> [Attr' s] -> Bool -> Token' s
TagOpen ByteString
name [Attr]
as Bool
close

{--
 - record incomplete tag for streamline processing.
 -}
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> ByteString
S.cons Char
'<' (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString

{--
 - parse text node. consume at least one char, to make sure progress.
 -}
text :: Parser Token
text :: Parser Token
text = ByteString -> Token
forall s. s -> Token' s
Text (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 ((Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'<'))

-- | Decode the HTML entities e.g. @&amp;@ in some text into @&@.
decodeEntitiesBS :: Monad m => Conduit Token m Token
decodeEntitiesBS :: forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS =
  Dec Builder ByteString -> Conduit Token m Token
forall (m :: * -> *) builder string.
(Monad m, Monoid builder, Monoid string, IsString string,
 Eq string) =>
Dec builder string -> Conduit (Token' string) m (Token' string)
decodeEntities
    Dec { decToS :: Builder -> ByteString
decToS     = Builder -> ByteString
B.toByteString
        , decBreak :: (Char -> Bool) -> ByteString -> Attr
decBreak   = (Char -> Bool) -> ByteString -> Attr
S.break
        , decBuilder :: ByteString -> Builder
decBuilder = ByteString -> Builder
B.fromByteString
        , decDrop :: Int -> ByteString -> ByteString
decDrop    = Int -> ByteString -> ByteString
S.drop
        , decEntity :: ByteString -> Maybe ByteString
decEntity  = ByteString -> Maybe ByteString
forall {f :: * -> *}. MonadThrow f => ByteString -> f ByteString
decodeEntity
        , decUncons :: ByteString -> Maybe (Char, ByteString)
decUncons  = ByteString -> Maybe (Char, ByteString)
S.uncons }
  where decodeEntity :: ByteString -> f ByteString
decodeEntity ByteString
entity =
          (Text -> ByteString) -> f Text -> f ByteString
forall a b. (a -> b) -> f a -> f b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ByteString
encodeUtf8
          (f Text -> f ByteString) -> f Text -> f ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ConduitT () ByteString f ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [ByteString
"&",ByteString
entity,ByteString
";"]
          ConduitT () ByteString f ()
-> ConduitT ByteString Event f () -> ConduitT () Event f ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= ParseSettings -> ConduitT ByteString Event f ()
forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
XML.parseBytes ParseSettings
forall a. Default a => a
def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
          ConduitT () Event f () -> Sink Event f Text -> f Text
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Sink Event f Text
forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content

token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag Parser Token -> Parser Token -> Parser Token
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
    Parser Token -> Parser Token -> Parser Token
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text

{--
 - treat script tag specially, can't fail.
 -}
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd Token
t = [Token] -> [Token]
forall a. [a] -> [a]
reverse ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Token] -> Parser [Token]
loop [Token
t]
              Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[]) (Token -> [Token])
-> (ByteString -> Token) -> ByteString -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString -> ByteString
S.append ByteString
script (ByteString -> [Token]) -> Parser ByteString -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString
  where
    script :: ByteString
script = Builder -> ByteString
B.toByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ (ByteString -> ByteString) -> Token -> Builder
showToken ByteString -> ByteString
forall a. a -> a
id Token
t
    loop :: [Token] -> Parser [Token]
loop [Token]
acc = (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc) (Token -> [Token]) -> Parser Token -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Token
scriptEnd
           Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Token
text Parser Token -> (Token -> Parser [Token]) -> Parser [Token]
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Token] -> Parser [Token]
loop ([Token] -> Parser [Token])
-> (Token -> [Token]) -> Token -> Parser [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc))
    scriptEnd :: Parser Token
scriptEnd = ByteString -> Parser ByteString
string ByteString
"</script>" Parser ByteString -> Parser Token -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Token -> Parser Token
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Token
forall s. s -> Token' s
TagClose ByteString
"script")

html :: Parser [Token]
html :: Parser [Token]
html = Parser [Token]
tokens Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Token] -> Parser [Token]
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
  where
    tokens :: Parser [Token]
    tokens :: Parser [Token]
tokens = do
        Token
t <- Parser Token
token
        case Token
t of
            (TagOpen ByteString
name [Attr]
_ Bool
close)
              | Bool -> Bool
not Bool
close Bool -> Bool -> Bool
&& ByteString
nameByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString
"script"
                -> [Token] -> [Token] -> [Token]
forall a. [a] -> [a] -> [a]
(++) ([Token] -> [Token] -> [Token])
-> Parser [Token] -> Parser ByteString ([Token] -> [Token])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Token -> Parser [Token]
tillScriptEnd Token
t Parser ByteString ([Token] -> [Token])
-> Parser [Token] -> Parser [Token]
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser [Token]
html
            Token
_ -> (Token
tToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:) ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [Token]
html

decode :: ByteString -> Either String [Token]
decode :: ByteString -> Either String [Token]
decode = ([Token] -> [Token])
-> Either String [Token] -> Either String [Token]
forall a b. (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Token] -> [Token]
forall {t :: * -> *}. Foldable t => t Token -> [Token]
decodeEntitiesBS' (Either String [Token] -> Either String [Token])
-> (ByteString -> Either String [Token])
-> ByteString
-> Either String [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html
  where
    decodeEntitiesBS' :: t Token -> [Token]
decodeEntitiesBS' t Token
tokens = Identity [Token] -> [Token]
forall a. Identity a -> a
runIdentity (Identity [Token] -> [Token]) -> Identity [Token] -> [Token]
forall a b. (a -> b) -> a -> b
$ (Token -> ConduitT () Token Identity ())
-> t Token -> ConduitT () Token Identity ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT () Token Identity ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield t Token
tokens ConduitT () Token Identity ()
-> Sink Token Identity [Token] -> Identity [Token]
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Conduit Token Identity Token
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS Conduit Token Identity Token
-> Sink Token Identity [Token] -> Sink Token Identity [Token]
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$ Sink Token Identity [Token]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
CL.consume


{--
 - Utils {{{
 -}

atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast Int
0 Parser ByteString
p = Parser ByteString
p
atLeast Int
n Parser ByteString
p = Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser ByteString -> Parser ByteString
atLeast (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Parser ByteString
p

cond :: a -> a -> Bool -> a
cond :: forall a. a -> a -> Bool -> a
cond a
a1 a
a2 Bool
b = if Bool
b then a
a1 else a
a2

(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
(||.) = (Bool -> Bool -> Bool) -> f Bool -> f Bool -> f Bool
forall a b c. (a -> b -> c) -> f a -> f b -> f c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)

in2 :: Eq a => (a,a) -> a -> Bool
in2 :: forall a. Eq a => (a, a) -> a -> Bool
in2 (a
a1,a
a2) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2

in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: forall a. Eq a => (a, a, a) -> a -> Bool
in3 (a
a1,a
a2,a
a3) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a3

boolP :: Parser a -> Parser Bool
boolP :: forall a. Parser a -> Parser Bool
boolP Parser a
p = Parser a
p Parser a -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True Parser Bool -> Parser Bool -> Parser Bool
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False

maybeP :: Parser a -> Parser (Maybe a)
maybeP :: forall a. Parser a -> Parser (Maybe a)
maybeP Parser a
p = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser ByteString (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p Parser ByteString (Maybe a)
-> Parser ByteString (Maybe a) -> Parser ByteString (Maybe a)
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe a -> Parser ByteString (Maybe a)
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
-- }}}

-- {{{ encode tokens
cc :: [ByteString] -> B.Builder
cc :: [ByteString] -> Builder
cc = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([ByteString] -> [Builder]) -> [ByteString] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Builder) -> [ByteString] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Builder
B.fromByteString

showToken :: (ByteString -> ByteString) -> Token -> B.Builder
showToken :: (ByteString -> ByteString) -> Token -> Builder
showToken ByteString -> ByteString
hl (TagOpen ByteString
name [Attr]
as Bool
close) =
    [ByteString] -> Builder
cc ([ByteString] -> Builder) -> [ByteString] -> Builder
forall a b. (a -> b) -> a -> b
$ [ByteString -> ByteString
hl ByteString
"<", ByteString
name]
      [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Attr -> ByteString) -> [Attr] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Attr -> ByteString
showAttr [Attr]
as
      [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl (if Bool
close then ByteString
"/>" else ByteString
">")]
  where
    showAttr :: Attr -> ByteString
    showAttr :: Attr -> ByteString
showAttr (ByteString
key, ByteString
value) = [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString
" ", ByteString
key, ByteString -> ByteString
hl ByteString
"=\""] [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Char -> ByteString) -> String -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Char -> ByteString
escape (ByteString -> String
S.unpack ByteString
value) [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl ByteString
"\""]
    escape :: Char -> ByteString
escape Char
'"' = ByteString
"\\\""
    escape Char
'\\' = ByteString
"\\\\"
    escape Char
c = Char -> ByteString
S.singleton Char
c
showToken ByteString -> ByteString
hl (TagClose ByteString
name) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"</", ByteString
name, ByteString -> ByteString
hl ByteString
">"]
showToken ByteString -> ByteString
_ (Text ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
showToken ByteString -> ByteString
hl (Comment ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"<!--", ByteString
s, ByteString -> ByteString
hl ByteString
"-->"]
showToken ByteString -> ByteString
hl (Special ByteString
name ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"<!", ByteString
name, ByteString
" ", ByteString
s, ByteString -> ByteString
hl ByteString
">"]
showToken ByteString -> ByteString
_ (Incomplete ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
-- }}}

-- {{{ Stream
tokenStream :: Fail.MonadFail m
#if MIN_VERSION_conduit(1, 0, 0)
            => Conduit ByteString m Token
#else
            => GInfConduit ByteString m Token
#endif
tokenStream :: forall (m :: * -> *). MonadFail m => Conduit ByteString m Token
tokenStream =
    ByteString -> ConduitT ByteString Token m ()
forall {m :: * -> *}.
MonadFail m =>
ByteString -> ConduitT ByteString Token m ()
loop ByteString
S.empty ConduitT ByteString Token m ()
-> ConduitT Token Token m () -> ConduitT ByteString Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS
  where
#if MIN_VERSION_conduit(1, 0, 0)
    loop :: ByteString -> ConduitT ByteString Token m ()
loop ByteString
accum = ConduitT ByteString Token m (Maybe ByteString)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT ByteString Token m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Token m ())
-> ConduitT ByteString Token m ()
forall a b.
ConduitT ByteString Token m a
-> (a -> ConduitT ByteString Token m b)
-> ConduitT ByteString Token m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Token m ()
-> (ByteString -> ConduitT ByteString Token m ())
-> Maybe ByteString
-> ConduitT ByteString Token m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> () -> ConduitT ByteString Token m ()
forall {m :: * -> *} {b} {i}.
Monad m =>
ByteString -> b -> ConduitT i Token m b
close ByteString
accum ()) (ByteString -> ByteString -> ConduitT ByteString Token m ()
push ByteString
accum)
#else
    loop accum = awaitE >>= either (close accum) (push accum)
#endif

    push :: ByteString -> ByteString -> ConduitT ByteString Token m ()
push ByteString
accum ByteString
input =
        case Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html (ByteString
accum ByteString -> ByteString -> ByteString
`S.append` ByteString
input) of
            Right ([Token] -> (ByteString, [Token])
forall s. Monoid s => [Token' s] -> (s, [Token' s])
splitAccum -> (ByteString
accum', [Token]
tokens)) -> (Token -> ConduitT ByteString Token m ())
-> [Token] -> ConduitT ByteString Token m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT ByteString Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [Token]
tokens ConduitT ByteString Token m ()
-> ConduitT ByteString Token m () -> ConduitT ByteString Token m ()
forall a b.
ConduitT ByteString Token m a
-> ConduitT ByteString Token m b -> ConduitT ByteString Token m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> ConduitT ByteString Token m ()
loop ByteString
accum'
            Left String
err -> String -> ConduitT ByteString Token m ()
forall a. String -> ConduitT ByteString Token m a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err

    close :: ByteString -> b -> ConduitT i Token m b
close ByteString
s b
r = do
        Bool -> ConduitT i Token m () -> ConduitT i Token m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
s) (ConduitT i Token m () -> ConduitT i Token m ())
-> ConduitT i Token m () -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ Token -> ConduitT i Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Token -> ConduitT i Token m ()) -> Token -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
forall s. s -> Token' s
Text ByteString
s
        b -> ConduitT i Token m b
forall a. a -> ConduitT i Token m a
forall (m :: * -> *) a. Monad m => a -> m a
return b
r

-- }}}