{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.HTML.TagStream.Entities
(Dec(..)
,isNameChar
,isNameStart
,decodeEntities)
where
import Data.Char
import Data.Monoid
import Data.String
import Data.Conduit
import Text.HTML.TagStream.Types
import qualified Data.Conduit.List as CL
import Data.Maybe (fromMaybe, isJust)
import Control.Arrow (first,second)
decodeEntities :: (Monad m
,Monoid builder
,Monoid string
,IsString string
,Eq string)
=> Dec builder string
-> Conduit (Token' string) m (Token' string)
decodeEntities :: forall (m :: * -> *) builder string.
(Monad m, Monoid builder, Monoid string, IsString string,
Eq string) =>
Dec builder string -> Conduit (Token' string) m (Token' string)
decodeEntities Dec builder string
dec =
ConduitT (Token' string) (Token' string) m ()
start
where
start :: ConduitT (Token' string) (Token' string) m ()
start = ConduitT (Token' string) (Token' string) m (Maybe (Token' string))
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT (Token' string) (Token' string) m (Maybe (Token' string))
-> (Maybe (Token' string)
-> ConduitT (Token' string) (Token' string) m ())
-> ConduitT (Token' string) (Token' string) m ()
forall a b.
ConduitT (Token' string) (Token' string) m a
-> (a -> ConduitT (Token' string) (Token' string) m b)
-> ConduitT (Token' string) (Token' string) m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT (Token' string) (Token' string) m ()
-> (Token' string -> ConduitT (Token' string) (Token' string) m ())
-> Maybe (Token' string)
-> ConduitT (Token' string) (Token' string) m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT (Token' string) (Token' string) m ()
forall a. a -> ConduitT (Token' string) (Token' string) m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\Token' string
token -> Token' string -> ConduitT (Token' string) (Token' string) m ()
forall {m :: * -> *}.
Monad m =>
Token' string -> ConduitT (Token' string) (Token' string) m ()
start' Token' string
token ConduitT (Token' string) (Token' string) m ()
-> ConduitT (Token' string) (Token' string) m ()
-> ConduitT (Token' string) (Token' string) m ()
forall a b.
ConduitT (Token' string) (Token' string) m a
-> ConduitT (Token' string) (Token' string) m b
-> ConduitT (Token' string) (Token' string) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT (Token' string) (Token' string) m ()
start)
start' :: Token' string -> ConduitT (Token' string) (Token' string) m ()
start' (Text string
t) = (string -> ConduitT (Token' string) string m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield string
t ConduitT (Token' string) string m ()
-> ConduitT (Token' string) string m ()
-> ConduitT (Token' string) string m ()
forall a b.
ConduitT (Token' string) string m a
-> ConduitT (Token' string) string m b
-> ConduitT (Token' string) string m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT (Token' string) string m ()
forall (m :: * -> *) string.
Monad m =>
Conduit (Token' string) m string
yieldWhileText) ConduitT (Token' string) string m ()
-> ConduitT string (Token' string) m ()
-> ConduitT (Token' string) (Token' string) m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= Dec builder string -> Conduit string m string
forall (m :: * -> *) string builder.
(Monad m, Monoid string, IsString string, Monoid builder,
Eq string) =>
Dec builder string -> Conduit string m string
decodeEntities' Dec builder string
dec Conduit string m string
-> ConduitT string (Token' string) m ()
-> ConduitT string (Token' string) m ()
forall (m :: * -> *) a b c r.
Monad m =>
Conduit a m b -> ConduitT b c m r -> ConduitT a c m r
=$= (string -> Maybe (Token' string))
-> ConduitT string (Token' string) m ()
forall (m :: * -> *) a b.
Monad m =>
(a -> Maybe b) -> ConduitT a b m ()
CL.mapMaybe string -> Maybe (Token' string)
forall {s}. (Eq s, IsString s) => s -> Maybe (Token' s)
go
start' (TagOpen string
name [Attr' string]
attrs Bool
bool) = Token' string -> ConduitT (Token' string) (Token' string) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (string -> [Attr' string] -> Bool -> Token' string
forall s. s -> [Attr' s] -> Bool -> Token' s
TagOpen string
name ((Attr' string -> Attr' string) -> [Attr' string] -> [Attr' string]
forall a b. (a -> b) -> [a] -> [b]
map ((string -> string) -> Attr' string -> Attr' string
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Dec builder string -> string -> string
forall a builder.
(Eq a, IsString a, Monoid builder, Monoid a) =>
Dec builder a -> a -> a
decodeString Dec builder string
dec)) [Attr' string]
attrs) Bool
bool)
start' Token' string
token = Token' string -> ConduitT (Token' string) (Token' string) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Token' string
token
go :: s -> Maybe (Token' s)
go s
t
| s
t s -> s -> Bool
forall a. Eq a => a -> a -> Bool
== s
"" = Maybe (Token' s)
forall a. Maybe a
Nothing
| Bool
otherwise = Token' s -> Maybe (Token' s)
forall a. a -> Maybe a
Just (s -> Token' s
forall s. s -> Token' s
Text s
t)
decodeString
:: (Eq a, IsString a, Monoid builder, Monoid a)
=> Dec builder a -> a -> a
decodeString :: forall a builder.
(Eq a, IsString a, Monoid builder, Monoid a) =>
Dec builder a -> a -> a
decodeString Dec builder a
dec a
input =
case Dec builder a -> a -> (a, a)
forall string builder.
(IsString string, Monoid builder, Eq string, Monoid string) =>
Dec builder string -> string -> (string, string)
makeEntityDecoder Dec builder a
dec a
input of
(a
value', a
remainder)
| a
value' a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
forall a. Monoid a => a
mempty -> a
value' a -> a -> a
forall a. Semigroup a => a -> a -> a
<> Dec builder a -> a -> a
forall a builder.
(Eq a, IsString a, Monoid builder, Monoid a) =>
Dec builder a -> a -> a
decodeString Dec builder a
dec a
remainder
| Bool
otherwise -> a
input
decodeEntities' :: (Monad m
,Monoid string
,IsString string
,Monoid builder
,Eq string)
=> Dec builder string
-> Conduit string m string
decodeEntities' :: forall (m :: * -> *) string builder.
(Monad m, Monoid string, IsString string, Monoid builder,
Eq string) =>
Dec builder string -> Conduit string m string
decodeEntities' Dec builder string
dec =
(string -> string) -> ConduitT string string m ()
forall {m :: * -> *}.
Monad m =>
(string -> string) -> ConduitT string string m ()
loop string -> string
forall a. a -> a
id
where
loop :: (string -> string) -> ConduitT string string m ()
loop string -> string
accum = do
Maybe string
mchunk <- ConduitT string string m (Maybe string)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
let chunk :: string
chunk = string -> string
accum (string -> string) -> string -> string
forall a b. (a -> b) -> a -> b
$ string -> Maybe string -> string
forall a. a -> Maybe a -> a
fromMaybe string
forall a. Monoid a => a
mempty Maybe string
mchunk
(string
newStr, string
remainder) = Dec builder string -> string -> (string, string)
forall string builder.
(IsString string, Monoid builder, Eq string, Monoid string) =>
Dec builder string -> string -> (string, string)
makeEntityDecoder Dec builder string
dec string
chunk
string -> ConduitT string string m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield string
newStr
if Maybe string -> Bool
forall a. Maybe a -> Bool
isJust Maybe string
mchunk
then (string -> string) -> ConduitT string string m ()
loop (string -> string -> string
forall a. Monoid a => a -> a -> a
mappend string
remainder)
else string -> ConduitT string string m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield string
remainder
yieldWhileText :: Monad m => Conduit (Token' string) m string
yieldWhileText :: forall (m :: * -> *) string.
Monad m =>
Conduit (Token' string) m string
yieldWhileText =
ConduitT (Token' string) string m ()
forall {o}. ConduitT (Token' o) o m ()
loop
where
loop :: ConduitT (Token' o) o m ()
loop = ConduitT (Token' o) o m (Maybe (Token' o))
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT (Token' o) o m (Maybe (Token' o))
-> (Maybe (Token' o) -> ConduitT (Token' o) o m ())
-> ConduitT (Token' o) o m ()
forall a b.
ConduitT (Token' o) o m a
-> (a -> ConduitT (Token' o) o m b) -> ConduitT (Token' o) o m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ConduitT (Token' o) o m ()
-> (Token' o -> ConduitT (Token' o) o m ())
-> Maybe (Token' o)
-> ConduitT (Token' o) o m ()
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (() -> ConduitT (Token' o) o m ()
forall a. a -> ConduitT (Token' o) o m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()) Token' o -> ConduitT (Token' o) o m ()
go
go :: Token' o -> ConduitT (Token' o) o m ()
go (Text o
t) = o -> ConduitT (Token' o) o m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield o
t ConduitT (Token' o) o m ()
-> ConduitT (Token' o) o m () -> ConduitT (Token' o) o m ()
forall a b.
ConduitT (Token' o) o m a
-> ConduitT (Token' o) o m b -> ConduitT (Token' o) o m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT (Token' o) o m ()
loop
go Token' o
token = Token' o -> ConduitT (Token' o) o m ()
forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Token' o
token
data Dec builder string = Dec
{ forall builder string. Dec builder string -> builder -> string
decToS :: builder -> string
, forall builder string.
Dec builder string -> (Char -> Bool) -> string -> (string, string)
decBreak :: (Char -> Bool) -> string -> (string,string)
, forall builder string. Dec builder string -> string -> builder
decBuilder :: string -> builder
, forall builder string.
Dec builder string -> Int -> string -> string
decDrop :: Int -> string -> string
, forall builder string. Dec builder string -> string -> Maybe string
decEntity :: string -> Maybe string
, forall builder string.
Dec builder string -> string -> Maybe (Char, string)
decUncons :: string -> Maybe (Char,string)
}
makeEntityDecoder :: (IsString string,Monoid builder,Eq string,Monoid string)
=> Dec builder string -> string -> (string, string)
makeEntityDecoder :: forall string builder.
(IsString string, Monoid builder, Eq string, Monoid string) =>
Dec builder string -> string -> (string, string)
makeEntityDecoder Dec{string -> builder
string -> Maybe string
string -> Maybe (Char, string)
builder -> string
Int -> string -> string
(Char -> Bool) -> string -> (string, string)
decToS :: forall builder string. Dec builder string -> builder -> string
decBreak :: forall builder string.
Dec builder string -> (Char -> Bool) -> string -> (string, string)
decBuilder :: forall builder string. Dec builder string -> string -> builder
decDrop :: forall builder string.
Dec builder string -> Int -> string -> string
decEntity :: forall builder string. Dec builder string -> string -> Maybe string
decUncons :: forall builder string.
Dec builder string -> string -> Maybe (Char, string)
decToS :: builder -> string
decBreak :: (Char -> Bool) -> string -> (string, string)
decBuilder :: string -> builder
decDrop :: Int -> string -> string
decEntity :: string -> Maybe string
decUncons :: string -> Maybe (Char, string)
..} = (builder -> string) -> (builder, string) -> (string, string)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first builder -> string
decToS ((builder, string) -> (string, string))
-> (string -> (builder, string)) -> string -> (string, string)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. string -> (builder, string)
go
where
go :: string -> (builder, string)
go string
s =
case (Char -> Bool) -> string -> (string, string)
decBreak (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'&') string
s of
(string
_,string
"") -> (string -> builder
decBuilder string
s, string
"")
(string
before,restPlusAmp :: string
restPlusAmp@(Int -> string -> string
decDrop Int
1 -> string
rest)) ->
case (Char -> Bool) -> string -> (string, string)
decBreak (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char -> Bool
isNameChar Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#')) string
rest of
(string
_,string
"") -> (string -> builder
decBuilder string
before, string
restPlusAmp)
(string
entity,string
after) -> (builder
before1 builder -> builder -> builder
forall a. Semigroup a => a -> a -> a
<> builder
before2, string
after')
where
before1 :: builder
before1 = string -> builder
decBuilder string
before
(builder
before2, string
after') =
case Maybe string
mdecoded of
Maybe string
Nothing -> (builder -> builder) -> (builder, string) -> (builder, string)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first ((string -> builder
decBuilder string
"&" builder -> builder -> builder
forall a. Semigroup a => a -> a -> a
<> string -> builder
decBuilder string
entity) builder -> builder -> builder
forall a. Semigroup a => a -> a -> a
<>) (string -> (builder, string)
go string
after)
Just (string -> builder
decBuilder -> builder
decoded) ->
case string -> Maybe (Char, string)
decUncons string
after of
Just (Char
';',string
validAfter) -> (builder -> builder) -> (builder, string) -> (builder, string)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (builder
decoded builder -> builder -> builder
forall a. Semigroup a => a -> a -> a
<>) (string -> (builder, string)
go string
validAfter)
Just (Char
_invalid,string
_rest) -> (builder -> builder) -> (builder, string) -> (builder, string)
forall b c d. (b -> c) -> (b, d) -> (c, d)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (builder
decoded builder -> builder -> builder
forall a. Semigroup a => a -> a -> a
<>) (string -> (builder, string)
go string
after)
Maybe (Char, string)
Nothing -> (builder
forall a. Monoid a => a
mempty, string
s)
mdecoded :: Maybe string
mdecoded =
if string
entity string -> string -> Bool
forall a. Eq a => a -> a -> Bool
== string
forall a. Monoid a => a
mempty
then Maybe string
forall a. Maybe a
Nothing
else string -> Maybe string
decEntity string
entity
isNameStart :: Char -> Bool
isNameStart :: Char -> Bool
isNameStart Char
c =
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
':' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
||
Char -> Bool
isAsciiUpper Char
c Bool -> Bool -> Bool
||
Char -> Bool
isAsciiLower Char
c Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xC0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD6') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xD8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xF6') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF8' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x370' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x37D') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x37F' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x1FFF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x200C' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x200D') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2070' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x218F') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x2C00' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2FEF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x3001' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xF900' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFDCF') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\xFDF0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFD') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x10000' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xEFFFF')
isNameChar :: Char -> Bool
isNameChar :: Char -> Bool
isNameChar Char
c =
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' Bool -> Bool -> Bool
||
Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xB7' Bool -> Bool -> Bool
||
Char -> Bool
isDigit Char
c Bool -> Bool -> Bool
||
Char -> Bool
isNameStart Char
c Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x0300' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x036F') Bool -> Bool -> Bool
||
(Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x203F' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x2040')