Copyright | Bryan O'Sullivan 2007-2015 |
---|---|

License | BSD3 |

Maintainer | bos@serpentine.com |

Stability | experimental |

Portability | unknown |

Safe Haskell | Trustworthy |

Language | Haskell98 |

Simple, efficient combinator parsing that can consume lazy `Text`

strings, loosely based on the Parsec library.

This is essentially the same code as in the `Text`

module, only with a `parse`

function that can consume a lazy
`Text`

incrementally, and a `Result`

type that does not allow
more input to be fed in. Think of this as suitable for use with a
lazily read file, e.g. via `readFile`

or `hGetContents`

.

*Note:* The various parser functions and combinators such as
`string`

still expect *strict* `Text`

parameters, and return
strict `Text`

results. Behind the scenes, strict `Text`

values
are still used internally to store parser input and manipulate it
efficiently.

## Synopsis

- data Result r
- data Number
- compareResults :: (Eq i, Eq r) => IResult i r -> IResult i r -> Maybe Bool
- endOfInput :: forall t. Chunk t => Parser t ()
- atEnd :: Chunk t => Parser t Bool
- try :: Parser i a -> Parser i a
- (<?>) :: Parser i a -> String -> Parser i a
- choice :: Alternative f => [f a] -> f a
- option :: Alternative f => a -> f a -> f a
- many' :: MonadPlus m => m a -> m [a]
- many1 :: Alternative f => f a -> f [a]
- many1' :: MonadPlus m => m a -> m [a]
- sepBy :: Alternative f => f a -> f s -> f [a]
- sepBy' :: MonadPlus m => m a -> m s -> m [a]
- sepBy1 :: Alternative f => f a -> f s -> f [a]
- sepBy1' :: MonadPlus m => m a -> m s -> m [a]
- manyTill :: Alternative f => f a -> f b -> f [a]
- manyTill' :: MonadPlus m => m a -> m b -> m [a]
- skipMany :: Alternative f => f a -> f ()
- skipMany1 :: Alternative f => f a -> f ()
- count :: Monad m => Int -> m a -> m [a]
- eitherP :: Alternative f => f a -> f b -> f (Either a b)
- feed :: Monoid i => IResult i r -> i -> IResult i r
- type Parser = Parser Text
- satisfy :: (Char -> Bool) -> Parser Char
- skip :: (Char -> Bool) -> Parser ()
- satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a
- take :: Int -> Parser Text
- string :: Text -> Parser Text
- stringCI :: Text -> Parser Text
- asciiCI :: Text -> Parser Text
- skipWhile :: (Char -> Bool) -> Parser ()
- takeTill :: (Char -> Bool) -> Parser Text
- takeWhile :: (Char -> Bool) -> Parser Text
- takeText :: Parser Text
- takeLazyText :: Parser Text
- scan :: s -> (s -> Char -> Maybe s) -> Parser Text
- runScanner :: s -> (s -> Char -> Maybe s) -> Parser (Text, s)
- takeWhile1 :: (Char -> Bool) -> Parser Text
- inClass :: String -> Char -> Bool
- notInClass :: String -> Char -> Bool
- anyChar :: Parser Char
- char :: Char -> Parser Char
- notChar :: Char -> Parser Char
- peekChar :: Parser (Maybe Char)
- peekChar' :: Parser Char
- endOfLine :: Parser ()
- parseOnly :: Parser a -> Text -> Either String a
- match :: Parser a -> Parser (Text, a)
- isEndOfLine :: Char -> Bool
- isHorizontalSpace :: Char -> Bool
- hexadecimal :: (Integral a, Bits a) => Parser a
- decimal :: Integral a => Parser a
- signed :: Num a => Parser a -> Parser a
- rational :: Fractional a => Parser a
- double :: Parser Double
- number :: Parser Number
- scientific :: Parser Scientific
- digit :: Parser Char
- letter :: Parser Char
- space :: Parser Char
- skipSpace :: Parser ()
- (.*>) :: Text -> Parser a -> Parser a
- (<*.) :: Parser a -> Text -> Parser a
- parse :: Parser a -> Text -> Result a
- parseTest :: Show a => Parser a -> Text -> IO ()
- maybeResult :: Result r -> Maybe r
- eitherResult :: Result r -> Either String r

# Documentation

The result of a parse.

Fail Text [String] String | The parse failed. The |

Done Text r | The parse succeeded. The |

A numeric type that can represent integers accurately, and
floating point numbers to the precision of a `Double`

.

*Note*: this type is deprecated, and will be removed in the next
major release. Use the `Scientific`

type instead.

## Instances

Eq Number # | |

Fractional Number # | |

Data Number # | |

Defined in Data.Attoparsec.Number gfoldl :: (forall d b. Data d => c (d -> b) -> d -> c b) -> (forall g. g -> c g) -> Number -> c Number # gunfold :: (forall b r. Data b => c (b -> r) -> c r) -> (forall r. r -> c r) -> Constr -> c Number # toConstr :: Number -> Constr # dataTypeOf :: Number -> DataType # dataCast1 :: Typeable t => (forall d. Data d => c (t d)) -> Maybe (c Number) # dataCast2 :: Typeable t => (forall d e. (Data d, Data e) => c (t d e)) -> Maybe (c Number) # gmapT :: (forall b. Data b => b -> b) -> Number -> Number # gmapQl :: (r -> r' -> r) -> r -> (forall d. Data d => d -> r') -> Number -> r # gmapQr :: (r' -> r -> r) -> r -> (forall d. Data d => d -> r') -> Number -> r # gmapQ :: (forall d. Data d => d -> u) -> Number -> [u] # gmapQi :: Int -> (forall d. Data d => d -> u) -> Number -> u # gmapM :: Monad m => (forall d. Data d => d -> m d) -> Number -> m Number # gmapMp :: MonadPlus m => (forall d. Data d => d -> m d) -> Number -> m Number # gmapMo :: MonadPlus m => (forall d. Data d => d -> m d) -> Number -> m Number # | |

Num Number # | |

Ord Number # | |

Real Number # | |

Defined in Data.Attoparsec.Number toRational :: Number -> Rational # | |

RealFrac Number # | |

Show Number # | |

NFData Number # | |

Defined in Data.Attoparsec.Number |

endOfInput :: forall t. Chunk t => Parser t () #

Match only if all input has been consumed.

atEnd :: Chunk t => Parser t Bool #

Return an indication of whether the end of input has been reached.

try :: Parser i a -> Parser i a #

Attempt a parse, and if it fails, rewind the input so that no input appears to have been consumed.

This combinator is provided for compatibility with Parsec. attoparsec parsers always backtrack on failure.

Name the parser, in case failure occurs.

choice :: Alternative f => [f a] -> f a #

`choice ps`

tries to apply the actions in the list `ps`

in order,
until one of them succeeds. Returns the value of the succeeding
action.

option :: Alternative f => a -> f a -> f a #

`option x p`

tries to apply action `p`

. If `p`

fails without
consuming input, it returns the value `x`

, otherwise the value
returned by `p`

.

priority = option 0 (digitToInt <$> digit)

many' :: MonadPlus m => m a -> m [a] #

`many' p`

applies the action `p`

*zero* or more times. Returns a
list of the returned values of `p`

. The value returned by `p`

is
forced to WHNF.

word = many' letter

many1 :: Alternative f => f a -> f [a] #

`many1 p`

applies the action `p`

*one* or more times. Returns a
list of the returned values of `p`

.

word = many1 letter

many1' :: MonadPlus m => m a -> m [a] #

`many1' p`

applies the action `p`

*one* or more times. Returns a
list of the returned values of `p`

. The value returned by `p`

is
forced to WHNF.

word = many1' letter

sepBy :: Alternative f => f a -> f s -> f [a] #

`sepBy p sep`

applies *zero* or more occurrences of `p`

, separated
by `sep`

. Returns a list of the values returned by `p`

.

commaSep p = p `sepBy` (char ',')

sepBy' :: MonadPlus m => m a -> m s -> m [a] #

`sepBy' p sep`

applies *zero* or more occurrences of `p`

, separated
by `sep`

. Returns a list of the values returned by `p`

. The value
returned by `p`

is forced to WHNF.

commaSep p = p `sepBy'` (char ',')

sepBy1 :: Alternative f => f a -> f s -> f [a] #

`sepBy1 p sep`

applies *one* or more occurrences of `p`

, separated
by `sep`

. Returns a list of the values returned by `p`

.

commaSep p = p `sepBy1` (char ',')

sepBy1' :: MonadPlus m => m a -> m s -> m [a] #

`sepBy1' p sep`

applies *one* or more occurrences of `p`

, separated
by `sep`

. Returns a list of the values returned by `p`

. The value
returned by `p`

is forced to WHNF.

commaSep p = p `sepBy1'` (char ',')

manyTill :: Alternative f => f a -> f b -> f [a] #

`manyTill p end`

applies action `p`

*zero* or more times until
action `end`

succeeds, and returns the list of values returned by
`p`

. This can be used to scan comments:

simpleComment = string "<!--" *> manyTill anyChar (string "-->")

(Note the overlapping parsers `anyChar`

and `string "-->"`

.
While this will work, it is not very efficient, as it will cause a
lot of backtracking.)

manyTill' :: MonadPlus m => m a -> m b -> m [a] #

`manyTill' p end`

applies action `p`

*zero* or more times until
action `end`

succeeds, and returns the list of values returned by
`p`

. This can be used to scan comments:

simpleComment = string "<!--" *> manyTill' anyChar (string "-->")

(Note the overlapping parsers `anyChar`

and `string "-->"`

.
While this will work, it is not very efficient, as it will cause a
lot of backtracking.)

The value returned by `p`

is forced to WHNF.

skipMany :: Alternative f => f a -> f () #

Skip zero or more instances of an action.

skipMany1 :: Alternative f => f a -> f () #

Skip one or more instances of an action.

eitherP :: Alternative f => f a -> f b -> f (Either a b) #

Combine two alternatives.

feed :: Monoid i => IResult i r -> i -> IResult i r #

If a parser has returned a `Partial`

result, supply it with more
input.

satisfy :: (Char -> Bool) -> Parser Char #

The parser `satisfy p`

succeeds for any character for which the
predicate `p`

returns `True`

. Returns the character that is
actually parsed.

digit = satisfy isDigit where isDigit c = c >= '0' && c <= '9'

skip :: (Char -> Bool) -> Parser () #

The parser `skip p`

succeeds for any character for which the
predicate `p`

returns `True`

.

skipDigit = skip isDigit where isDigit c = c >= '0' && c <= '9'

satisfyWith :: (Char -> a) -> (a -> Bool) -> Parser a #

The parser `satisfyWith f p`

transforms a character, and succeeds
if the predicate `p`

returns `True`

on the transformed value. The
parser returns the transformed character that was parsed.

string :: Text -> Parser Text #

`string s`

parses a sequence of characters that identically match
`s`

. Returns the parsed string (i.e. `s`

). This parser consumes no
input if it fails (even if a partial match).

*Note*: The behaviour of this parser is different to that of the
similarly-named parser in Parsec, as this one is all-or-nothing.
To illustrate the difference, the following parser will fail under
Parsec given an input of `"for"`

:

string "foo" <|> string "for"

The reason for its failure is that the first branch is a
partial match, and will consume the letters `'f'`

and `'o'`

before failing. In attoparsec, the above parser will *succeed* on
that input, because the failed first branch will consume nothing.

stringCI :: Text -> Parser Text #

Deprecated: this is very inefficient, use asciiCI instead

Satisfy a literal string, ignoring case.

Note: this function is currently quite inefficient. Unicode case folding can change the length of a string ("ß" becomes "ss"), which makes a simple, efficient implementation tricky. We have (for now) chosen simplicity over efficiency.

asciiCI :: Text -> Parser Text #

Satisfy a literal string, ignoring case for characters in the ASCII range.

skipWhile :: (Char -> Bool) -> Parser () #

Skip past input for as long as the predicate returns `True`

.

takeTill :: (Char -> Bool) -> Parser Text #

Consume input as long as the predicate returns `False`

(i.e. until it returns `True`

), and return the consumed input.

This parser does not fail. It will return an empty string if the
predicate returns `True`

on the first character of input.

*Note*: Because this parser does not fail, do not use it with
combinators such as `many`

, because such
parsers loop until a failure occurs. Careless use will thus result
in an infinite loop.

takeWhile :: (Char -> Bool) -> Parser Text #

Consume input as long as the predicate returns `True`

, and return
the consumed input.

This parser does not fail. It will return an empty string if the
predicate returns `False`

on the first character of input.

*Note*: Because this parser does not fail, do not use it with
combinators such as `many`

, because such
parsers loop until a failure occurs. Careless use will thus result
in an infinite loop.

takeLazyText :: Parser Text #

Consume all remaining input and return it as a single string.

scan :: s -> (s -> Char -> Maybe s) -> Parser Text #

A stateful scanner. The predicate consumes and transforms a
state argument, and each transformed state is passed to successive
invocations of the predicate on each character of the input until one
returns `Nothing`

or the input ends.

This parser does not fail. It will return an empty string if the
predicate returns `Nothing`

on the first character of input.

*Note*: Because this parser does not fail, do not use it with
combinators such as `many`

, because such
parsers loop until a failure occurs. Careless use will thus result
in an infinite loop.

runScanner :: s -> (s -> Char -> Maybe s) -> Parser (Text, s) #

Like `scan`

, but generalized to return the final state of the
scanner.

inClass :: String -> Char -> Bool #

Match any character in a set.

vowel = inClass "aeiou"

Range notation is supported.

halfAlphabet = inClass "a-nA-N"

To add a literal `'-'`

to a set, place it at the beginning or end
of the string.

notInClass :: String -> Char -> Bool #

Match any character not in a set.

peekChar :: Parser (Maybe Char) #

Match any character, to perform lookahead. Returns `Nothing`

if
end of input has been reached. Does not consume any input.

*Note*: Because this parser does not fail, do not use it with
combinators such as `many`

, because such
parsers loop until a failure occurs. Careless use will thus result
in an infinite loop.

Match any character, to perform lookahead. Does not consume any input, but will fail if end of input has been reached.

Match either a single newline character `'\n'`

, or a carriage
return followed by a newline character `"\r\n"`

.

parseOnly :: Parser a -> Text -> Either String a #

Run a parser that cannot be resupplied via a `Partial`

result.

This function does not force a parser to consume all of its input. Instead, any residual input will be discarded. To force a parser to consume all of its input, use something like this:

`parseOnly`

(myParser`<*`

`endOfInput`

)

match :: Parser a -> Parser (Text, a) #

Return both the result of a parse and the portion of the input that was consumed while it was being parsed.

isEndOfLine :: Char -> Bool #

A predicate that matches either a carriage return `'\r'`

or
newline `'\n'`

character.

isHorizontalSpace :: Char -> Bool #

A predicate that matches either a space `' '`

or horizontal tab
`'\t'`

character.

hexadecimal :: (Integral a, Bits a) => Parser a #

Parse and decode an unsigned hexadecimal number. The hex digits
`'a'`

through `'f'`

may be upper or lower case.

This parser does not accept a leading `"0x"`

string.

signed :: Num a => Parser a -> Parser a #

Parse a number with an optional leading `'+'`

or `'-'`

sign
character.

rational :: Fractional a => Parser a #

Parse a rational number.

The syntax accepted by this parser is the same as for `double`

.

*Note*: this parser is not safe for use with inputs from untrusted
sources. An input with a suitably large exponent such as
`"1e1000000000"`

will cause a huge `Integer`

to be allocated,
resulting in what is effectively a denial-of-service attack.

In most cases, it is better to use `double`

or `scientific`

instead.

Parse a rational number.

This parser accepts an optional leading sign character, followed by
at least one decimal digit. The syntax similar to that accepted by
the `read`

function, with the exception that a trailing `'.'`

or
`'e'`

*not* followed by a number is not consumed.

Examples with behaviour identical to `read`

, if you feed an empty
continuation to the first result:

rational "3" == Done 3.0 "" rational "3.1" == Done 3.1 "" rational "3e4" == Done 30000.0 "" rational "3.1e4" == Done 31000.0, ""

Examples with behaviour identical to `read`

:

rational ".3" == Fail "input does not start with a digit" rational "e3" == Fail "input does not start with a digit"

Examples of differences from `read`

:

rational "3.foo" == Done 3.0 ".foo" rational "3e" == Done 3.0 "e"

This function does not accept string representations of "NaN" or "Infinity".

Deprecated: Use `scientific`

instead.

Parse a number, attempting to preserve both speed and precision.

The syntax accepted by this parser is the same as for `double`

.

This function does not accept string representations of "NaN" or "Infinity".

scientific :: Parser Scientific #

Parse a scientific number.

The syntax accepted by this parser is the same as for `double`

.

# Running parsers

parseTest :: Show a => Parser a -> Text -> IO () #

Run a parser and print its result to standard output.