{-# LANGUAGE OverloadedStrings, TupleSections, ViewPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeFamilies #-}
module Text.HTML.TagStream.ByteString where
import qualified Blaze.ByteString.Builder as B
import Control.Applicative
import Control.Monad (unless)
import qualified Control.Monad.Fail as Fail
import Data.Attoparsec.ByteString.Char8
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.Conduit
import qualified Data.Conduit.List as CL
import Data.Default
import Data.Functor.Identity (runIdentity)
import Data.Monoid
import Data.Text.Encoding
import qualified Text.XML.Stream.Parse as XML
import Text.HTML.TagStream.Entities
import Text.HTML.TagStream.Types
import Text.HTML.TagStream.Utils (splitAccum)
type Token = Token' ByteString
type Attr = Attr' ByteString
quoted :: Char -> Parser ByteString
quoted :: Char -> Parser ByteString
quoted Char
q = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'\\',Char
q))
Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Char -> Parser Char
char Char
q Parser Char -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
""
Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\\' Parser Char -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 (Char -> Parser ByteString
quoted Char
q) )
quotedOr :: Parser ByteString -> Parser ByteString
quotedOr :: Parser ByteString -> Parser ByteString
quotedOr Parser ByteString
p = Parser Char -> Parser (Maybe Char)
forall a. Parser a -> Parser (Maybe a)
maybeP ((Char -> Bool) -> Parser Char
satisfy ((Char, Char) -> Char -> Bool
forall a. Eq a => (a, a) -> a -> Bool
in2 (Char
'"',Char
'\''))) Parser (Maybe Char)
-> (Maybe Char -> Parser ByteString) -> Parser ByteString
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser ByteString
-> (Char -> Parser ByteString) -> Maybe Char -> Parser ByteString
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Parser ByteString
p Char -> Parser ByteString
quoted
attrValue :: Parser ByteString
attrValue :: Parser ByteString
attrValue = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
attrName :: Parser ByteString
attrName :: Parser ByteString
attrName = Parser ByteString -> Parser ByteString
quotedOr (Parser ByteString -> Parser ByteString)
-> Parser ByteString -> Parser ByteString
forall a b. (a -> b) -> a -> b
$
Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'>')
Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'/',Char
'>',Char
'=') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
tagEnd :: Parser Bool
tagEnd :: Parser Bool
tagEnd = Char -> Parser Char
char Char
'>' Parser Char -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
Parser Bool -> Parser Bool -> Parser Bool
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"/>" Parser ByteString -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True
attr :: Parser Attr
attr :: Parser Attr
attr = (,) (ByteString -> ByteString -> Attr)
-> Parser ByteString -> Parser ByteString (ByteString -> Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
attrName Parser ByteString (ByteString -> Attr)
-> Parser ByteString () -> Parser ByteString (ByteString -> Attr)
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace
Parser ByteString (ByteString -> Attr)
-> Parser ByteString -> Parser Attr
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( Parser Char -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (Char -> Parser Char
char Char
'=') Parser Bool -> (Bool -> Parser ByteString) -> Parser ByteString
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser ByteString -> Parser ByteString -> Bool -> Parser ByteString
forall a. a -> a -> Bool -> a
cond (Parser ByteString ()
skipSpace Parser ByteString () -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString
attrValue)
(ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ByteString
"")
)
attrs :: Parser ([Attr], Bool)
attrs :: Parser ([Attr], Bool)
attrs = [Attr] -> Parser ([Attr], Bool)
loop []
where
loop :: [Attr] -> Parser ([Attr], Bool)
loop [Attr]
acc = Parser ByteString ()
skipSpace Parser ByteString ()
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Bool -> Either Bool Attr
forall a b. a -> Either a b
Left (Bool -> Either Bool Attr)
-> Parser Bool -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Bool
tagEnd Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
-> Parser ByteString (Either Bool Attr)
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Attr -> Either Bool Attr
forall a b. b -> Either a b
Right (Attr -> Either Bool Attr)
-> Parser Attr -> Parser ByteString (Either Bool Attr)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Attr
attr) Parser ByteString (Either Bool Attr)
-> (Either Bool Attr -> Parser ([Attr], Bool))
-> Parser ([Attr], Bool)
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
(Bool -> Parser ([Attr], Bool))
-> (Attr -> Parser ([Attr], Bool))
-> Either Bool Attr
-> Parser ([Attr], Bool)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
(([Attr], Bool) -> Parser ([Attr], Bool)
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (([Attr], Bool) -> Parser ([Attr], Bool))
-> (Bool -> ([Attr], Bool)) -> Bool -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Attr] -> [Attr]
forall a. [a] -> [a]
reverse [Attr]
acc,))
([Attr] -> Parser ([Attr], Bool)
loop ([Attr] -> Parser ([Attr], Bool))
-> (Attr -> [Attr]) -> Attr -> Parser ([Attr], Bool)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Attr -> [Attr] -> [Attr]
forall a. a -> [a] -> [a]
:[Attr]
acc))
comment :: Parser Token
= ByteString -> Token
forall s. s -> Token' s
Comment (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
comment'
where comment' :: Parser ByteString
comment' = ByteString -> ByteString -> ByteString
S.append (ByteString -> ByteString -> ByteString)
-> Parser ByteString
-> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-')
Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> ( ByteString -> Parser ByteString
string ByteString
"-->" Parser ByteString -> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ByteString -> Parser ByteString
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return ByteString
""
Parser ByteString -> Parser ByteString -> Parser ByteString
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 Parser ByteString
comment' )
special :: Parser Token
special :: Parser Token
special = ByteString -> ByteString -> Token
forall s. s -> s -> Token' s
Special
(ByteString -> ByteString -> Token)
-> Parser ByteString -> Parser ByteString (ByteString -> Token)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ( Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Char
satisfy (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'-') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace))
Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill ((Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
Parser ByteString -> Parser ByteString () -> Parser ByteString
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
skipSpace )
Parser ByteString (ByteString -> Token)
-> Parser ByteString -> Parser Token
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>') Parser Token -> Parser Char -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
tag :: Parser Token
tag :: Parser Token
tag = do
TagType
t <- ByteString -> Parser ByteString
string ByteString
"/" Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeClose
Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Parser ByteString
string ByteString
"!" Parser ByteString
-> Parser ByteString TagType -> Parser ByteString TagType
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeSpecial
Parser ByteString TagType
-> Parser ByteString TagType -> Parser ByteString TagType
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TagType -> Parser ByteString TagType
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return TagType
TagTypeNormal
case TagType
t of
TagType
TagTypeClose ->
ByteString -> Token
forall s. s -> Token' s
TagClose (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'>')
Parser Token -> Parser Char -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
'>'
TagType
TagTypeSpecial -> Parser ByteString -> Parser Bool
forall a. Parser a -> Parser Bool
boolP (ByteString -> Parser ByteString
string ByteString
"--") Parser Bool -> (Bool -> Parser Token) -> Parser Token
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
Parser Token -> Parser Token -> Bool -> Parser Token
forall a. a -> a -> Bool -> a
cond Parser Token
comment Parser Token
special
TagType
TagTypeNormal -> do
ByteString
name <- (Char -> Bool) -> Parser ByteString
takeTill ((Char, Char, Char) -> Char -> Bool
forall a. Eq a => (a, a, a) -> a -> Bool
in3 (Char
'<',Char
'>',Char
'/') (Char -> Bool) -> (Char -> Bool) -> Char -> Bool
forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
||. Char -> Bool
isSpace)
([Attr]
as, Bool
close) <- Parser ([Attr], Bool)
attrs
Token -> Parser Token
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ ByteString -> [Attr] -> Bool -> Token
forall s. s -> [Attr' s] -> Bool -> Token' s
TagOpen ByteString
name [Attr]
as Bool
close
incomplete :: Parser Token
incomplete :: Parser Token
incomplete = ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> ByteString
S.cons Char
'<' (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString
text :: Parser Token
text :: Parser Token
text = ByteString -> Token
forall s. s -> Token' s
Text (ByteString -> Token) -> Parser ByteString -> Parser Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Int -> Parser ByteString -> Parser ByteString
atLeast Int
1 ((Char -> Bool) -> Parser ByteString
takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'<'))
decodeEntitiesBS :: Monad m => Conduit Token m Token
decodeEntitiesBS :: forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS =
Dec Builder ByteString -> Conduit Token m Token
forall (m :: * -> *) builder string.
(Monad m, Monoid builder, Monoid string, IsString string,
Eq string) =>
Dec builder string -> Conduit (Token' string) m (Token' string)
decodeEntities
Dec { decToS :: Builder -> ByteString
decToS = Builder -> ByteString
B.toByteString
, decBreak :: (Char -> Bool) -> ByteString -> Attr
decBreak = (Char -> Bool) -> ByteString -> Attr
S.break
, decBuilder :: ByteString -> Builder
decBuilder = ByteString -> Builder
B.fromByteString
, decDrop :: Int -> ByteString -> ByteString
decDrop = Int -> ByteString -> ByteString
S.drop
, decEntity :: ByteString -> Maybe ByteString
decEntity = ByteString -> Maybe ByteString
forall {f :: * -> *}. MonadThrow f => ByteString -> f ByteString
decodeEntity
, decUncons :: ByteString -> Maybe (Char, ByteString)
decUncons = ByteString -> Maybe (Char, ByteString)
S.uncons }
where decodeEntity :: ByteString -> f ByteString
decodeEntity ByteString
entity =
(Text -> ByteString) -> f Text -> f ByteString
forall a b. (a -> b) -> f a -> f b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> ByteString
encodeUtf8
(f Text -> f ByteString) -> f Text -> f ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ConduitT () ByteString f ()
forall (m :: * -> *) a i. Monad m => [a] -> ConduitT i a m ()
CL.sourceList [ByteString
"&",ByteString
entity,ByteString
";"]
ConduitT () ByteString f ()
-> ConduitT ByteString Event f () -> ConduitT () Event f ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
$= ParseSettings -> ConduitT ByteString Event f ()
forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
XML.parseBytes ParseSettings
forall a. Default a => a
def { psDecodeEntities :: DecodeEntities
XML.psDecodeEntities = DecodeEntities
XML.decodeHtmlEntities }
ConduitT () Event f () -> Sink Event f Text -> f Text
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Sink Event f Text
forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
XML.content
token :: Parser Token
token :: Parser Token
token = Char -> Parser Char
char Char
'<' Parser Char -> Parser Token -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Parser Token
tag Parser Token -> Parser Token -> Parser Token
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
incomplete)
Parser Token -> Parser Token -> Parser Token
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
text
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd :: Token -> Parser [Token]
tillScriptEnd Token
t = [Token] -> [Token]
forall a. [a] -> [a]
reverse ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Token] -> Parser [Token]
loop [Token
t]
Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[]) (Token -> [Token])
-> (ByteString -> Token) -> ByteString -> [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Token
forall s. s -> Token' s
Incomplete (ByteString -> Token)
-> (ByteString -> ByteString) -> ByteString -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString -> ByteString
S.append ByteString
script (ByteString -> [Token]) -> Parser ByteString -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
takeByteString
where
script :: ByteString
script = Builder -> ByteString
B.toByteString (Builder -> ByteString) -> Builder -> ByteString
forall a b. (a -> b) -> a -> b
$ (ByteString -> ByteString) -> Token -> Builder
showToken ByteString -> ByteString
forall a. a -> a
id Token
t
loop :: [Token] -> Parser [Token]
loop [Token]
acc = (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc) (Token -> [Token]) -> Parser Token -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Token
scriptEnd
Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Token
text Parser Token -> (Token -> Parser [Token]) -> Parser [Token]
forall a b.
Parser ByteString a
-> (a -> Parser ByteString b) -> Parser ByteString b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Token] -> Parser [Token]
loop ([Token] -> Parser [Token])
-> (Token -> [Token]) -> Token -> Parser [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Token -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:[Token]
acc))
scriptEnd :: Parser Token
scriptEnd = ByteString -> Parser ByteString
string ByteString
"</script>" Parser ByteString -> Parser Token -> Parser Token
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Token -> Parser Token
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Token
forall s. s -> Token' s
TagClose ByteString
"script")
html :: Parser [Token]
html :: Parser [Token]
html = Parser [Token]
tokens Parser [Token] -> Parser [Token] -> Parser [Token]
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Token] -> Parser [Token]
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
where
tokens :: Parser [Token]
tokens :: Parser [Token]
tokens = do
Token
t <- Parser Token
token
case Token
t of
(TagOpen ByteString
name [Attr]
_ Bool
close)
| Bool -> Bool
not Bool
close Bool -> Bool -> Bool
&& ByteString
nameByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
==ByteString
"script"
-> [Token] -> [Token] -> [Token]
forall a. [a] -> [a] -> [a]
(++) ([Token] -> [Token] -> [Token])
-> Parser [Token] -> Parser ByteString ([Token] -> [Token])
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Token -> Parser [Token]
tillScriptEnd Token
t Parser ByteString ([Token] -> [Token])
-> Parser [Token] -> Parser [Token]
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser [Token]
html
Token
_ -> (Token
tToken -> [Token] -> [Token]
forall a. a -> [a] -> [a]
:) ([Token] -> [Token]) -> Parser [Token] -> Parser [Token]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser [Token]
html
decode :: ByteString -> Either String [Token]
decode :: ByteString -> Either String [Token]
decode = ([Token] -> [Token])
-> Either String [Token] -> Either String [Token]
forall a b. (a -> b) -> Either String a -> Either String b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Token] -> [Token]
forall {t :: * -> *}. Foldable t => t Token -> [Token]
decodeEntitiesBS' (Either String [Token] -> Either String [Token])
-> (ByteString -> Either String [Token])
-> ByteString
-> Either String [Token]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html
where
decodeEntitiesBS' :: t Token -> [Token]
decodeEntitiesBS' t Token
tokens = Identity [Token] -> [Token]
forall a. Identity a -> a
runIdentity (Identity [Token] -> [Token]) -> Identity [Token] -> [Token]
forall a b. (a -> b) -> a -> b
$ (Token -> ConduitT () Token Identity ())
-> t Token -> ConduitT () Token Identity ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT () Token Identity ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield t Token
tokens ConduitT () Token Identity ()
-> Sink Token Identity [Token] -> Identity [Token]
forall (m :: * -> *) a b.
Monad m =>
Source m a -> Sink a m b -> m b
$$ Conduit Token Identity Token
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS Conduit Token Identity Token
-> Sink Token Identity [Token] -> Sink Token Identity [Token]
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$ Sink Token Identity [Token]
forall (m :: * -> *) a o. Monad m => ConduitT a o m [a]
CL.consume
atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast :: Int -> Parser ByteString -> Parser ByteString
atLeast Int
0 Parser ByteString
p = Parser ByteString
p
atLeast Int
n Parser ByteString
p = Char -> ByteString -> ByteString
S.cons (Char -> ByteString -> ByteString)
-> Parser Char -> Parser ByteString (ByteString -> ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Char
anyChar Parser ByteString (ByteString -> ByteString)
-> Parser ByteString -> Parser ByteString
forall a b.
Parser ByteString (a -> b)
-> Parser ByteString a -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> Parser ByteString -> Parser ByteString
atLeast (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Parser ByteString
p
cond :: a -> a -> Bool -> a
cond :: forall a. a -> a -> Bool -> a
cond a
a1 a
a2 Bool
b = if Bool
b then a
a1 else a
a2
(||.) :: Applicative f => f Bool -> f Bool -> f Bool
||. :: forall (f :: * -> *). Applicative f => f Bool -> f Bool -> f Bool
(||.) = (Bool -> Bool -> Bool) -> f Bool -> f Bool -> f Bool
forall a b c. (a -> b -> c) -> f a -> f b -> f c
forall (f :: * -> *) a b c.
Applicative f =>
(a -> b -> c) -> f a -> f b -> f c
liftA2 Bool -> Bool -> Bool
(||)
in2 :: Eq a => (a,a) -> a -> Bool
in2 :: forall a. Eq a => (a, a) -> a -> Bool
in2 (a
a1,a
a2) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2
in3 :: Eq a => (a,a,a) -> a -> Bool
in3 :: forall a. Eq a => (a, a, a) -> a -> Bool
in3 (a
a1,a
a2,a
a3) a
a = a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a1 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a2 Bool -> Bool -> Bool
|| a
aa -> a -> Bool
forall a. Eq a => a -> a -> Bool
==a
a3
boolP :: Parser a -> Parser Bool
boolP :: forall a. Parser a -> Parser Bool
boolP Parser a
p = Parser a
p Parser a -> Parser Bool -> Parser Bool
forall a b.
Parser ByteString a -> Parser ByteString b -> Parser ByteString b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
True Parser Bool -> Parser Bool -> Parser Bool
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool -> Parser Bool
forall a. a -> Parser ByteString a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Bool
False
maybeP :: Parser a -> Parser (Maybe a)
maybeP :: forall a. Parser a -> Parser (Maybe a)
maybeP Parser a
p = a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> Parser a -> Parser ByteString (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser a
p Parser ByteString (Maybe a)
-> Parser ByteString (Maybe a) -> Parser ByteString (Maybe a)
forall a.
Parser ByteString a -> Parser ByteString a -> Parser ByteString a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe a -> Parser ByteString (Maybe a)
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
forall a. Maybe a
Nothing
cc :: [ByteString] -> B.Builder
cc :: [ByteString] -> Builder
cc = [Builder] -> Builder
forall a. Monoid a => [a] -> a
mconcat ([Builder] -> Builder)
-> ([ByteString] -> [Builder]) -> [ByteString] -> Builder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Builder) -> [ByteString] -> [Builder]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Builder
B.fromByteString
showToken :: (ByteString -> ByteString) -> Token -> B.Builder
showToken :: (ByteString -> ByteString) -> Token -> Builder
showToken ByteString -> ByteString
hl (TagOpen ByteString
name [Attr]
as Bool
close) =
[ByteString] -> Builder
cc ([ByteString] -> Builder) -> [ByteString] -> Builder
forall a b. (a -> b) -> a -> b
$ [ByteString -> ByteString
hl ByteString
"<", ByteString
name]
[ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Attr -> ByteString) -> [Attr] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Attr -> ByteString
showAttr [Attr]
as
[ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl (if Bool
close then ByteString
"/>" else ByteString
">")]
where
showAttr :: Attr -> ByteString
showAttr :: Attr -> ByteString
showAttr (ByteString
key, ByteString
value) = [ByteString] -> ByteString
S.concat ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ [ByteString
" ", ByteString
key, ByteString -> ByteString
hl ByteString
"=\""] [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ (Char -> ByteString) -> String -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Char -> ByteString
escape (ByteString -> String
S.unpack ByteString
value) [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> ByteString
hl ByteString
"\""]
escape :: Char -> ByteString
escape Char
'"' = ByteString
"\\\""
escape Char
'\\' = ByteString
"\\\\"
escape Char
c = Char -> ByteString
S.singleton Char
c
showToken ByteString -> ByteString
hl (TagClose ByteString
name) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"</", ByteString
name, ByteString -> ByteString
hl ByteString
">"]
showToken ByteString -> ByteString
_ (Text ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
showToken ByteString -> ByteString
hl (Comment ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"<!--", ByteString
s, ByteString -> ByteString
hl ByteString
"-->"]
showToken ByteString -> ByteString
hl (Special ByteString
name ByteString
s) = [ByteString] -> Builder
cc [ByteString -> ByteString
hl ByteString
"<!", ByteString
name, ByteString
" ", ByteString
s, ByteString -> ByteString
hl ByteString
">"]
showToken ByteString -> ByteString
_ (Incomplete ByteString
s) = ByteString -> Builder
B.fromByteString ByteString
s
tokenStream :: Fail.MonadFail m
#if MIN_VERSION_conduit(1, 0, 0)
=> Conduit ByteString m Token
#else
=> GInfConduit ByteString m Token
#endif
tokenStream :: forall (m :: * -> *). MonadFail m => Conduit ByteString m Token
tokenStream =
ByteString -> ConduitT ByteString Token m ()
forall {m :: * -> *}.
MonadFail m =>
ByteString -> ConduitT ByteString Token m ()
loop ByteString
S.empty ConduitT ByteString Token m ()
-> ConduitT Token Token m () -> ConduitT ByteString Token m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= ConduitT Token Token m ()
forall (m :: * -> *). Monad m => Conduit Token m Token
decodeEntitiesBS
where
#if MIN_VERSION_conduit(1, 0, 0)
loop :: ByteString -> ConduitT ByteString Token m ()
loop ByteString
accum = ConduitT ByteString Token m (Maybe ByteString)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT ByteString Token m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString Token m ())
-> ConduitT ByteString Token m ()
forall a b.
ConduitT ByteString Token m a
-> (a -> ConduitT ByteString Token m b)
-> ConduitT ByteString Token m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT ByteString Token m ()
-> (ByteString -> ConduitT ByteString Token m ())
-> Maybe ByteString
-> ConduitT ByteString Token m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ByteString -> () -> ConduitT ByteString Token m ()
forall {m :: * -> *} {b} {i}.
Monad m =>
ByteString -> b -> ConduitT i Token m b
close ByteString
accum ()) (ByteString -> ByteString -> ConduitT ByteString Token m ()
push ByteString
accum)
#else
loop accum = awaitE >>= either (close accum) (push accum)
#endif
push :: ByteString -> ByteString -> ConduitT ByteString Token m ()
push ByteString
accum ByteString
input =
case Parser [Token] -> ByteString -> Either String [Token]
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Token]
html (ByteString
accum ByteString -> ByteString -> ByteString
`S.append` ByteString
input) of
Right ([Token] -> (ByteString, [Token])
forall s. Monoid s => [Token' s] -> (s, [Token' s])
splitAccum -> (ByteString
accum', [Token]
tokens)) -> (Token -> ConduitT ByteString Token m ())
-> [Token] -> ConduitT ByteString Token m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ Token -> ConduitT ByteString Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield [Token]
tokens ConduitT ByteString Token m ()
-> ConduitT ByteString Token m () -> ConduitT ByteString Token m ()
forall a b.
ConduitT ByteString Token m a
-> ConduitT ByteString Token m b -> ConduitT ByteString Token m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString -> ConduitT ByteString Token m ()
loop ByteString
accum'
Left String
err -> String -> ConduitT ByteString Token m ()
forall a. String -> ConduitT ByteString Token m a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err
close :: ByteString -> b -> ConduitT i Token m b
close ByteString
s b
r = do
Bool -> ConduitT i Token m () -> ConduitT i Token m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
S.null ByteString
s) (ConduitT i Token m () -> ConduitT i Token m ())
-> ConduitT i Token m () -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ Token -> ConduitT i Token m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Token -> ConduitT i Token m ()) -> Token -> ConduitT i Token m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Token
forall s. s -> Token' s
Text ByteString
s
b -> ConduitT i Token m b
forall a. a -> ConduitT i Token m a
forall (m :: * -> *) a. Monad m => a -> m a
return b
r