{-# LANGUAGE Trustworthy #-}{-# LANGUAGE NoImplicitPrelude #-}------------------------------------------------------------------------------- |-- Module : Text.Read.Lex-- Copyright : (c) The University of Glasgow 2002-- License : BSD-style (see the file libraries/base/LICENSE)---- Maintainer : libraries@haskell.org-- Stability : provisional-- Portability : non-portable (uses Text.ParserCombinators.ReadP)---- The cut-down Haskell lexer, used by Text.Read-------------------------------------------------------------------------------moduleText.Read.Lex-- lexing types(Lexeme (..),Number ,numberToInteger ,numberToFixed ,numberToRational ,numberToRangedRational -- lexer,lex ,expect ,hsLex ,lexChar ,readIntP ,readOctP ,readDecP ,readHexP ,isSymbolChar )whereimportText.ParserCombinators.ReadP importGHC.Base importGHC.Char importGHC.Num (Num (..),Integer)importGHC.Show (Show (..))importGHC.Unicode (GeneralCategory (..),generalCategory ,isSpace ,isAlpha ,isAlphaNum )importGHC.Real (Rational ,(% ),fromIntegral ,Integral ,toInteger ,(^ ),quot ,even )importGHC.List importGHC.Enum (minBound ,maxBound )importData.Maybe -- local copy to break import-cycle-- | @'guard' b@ is @'return' ()@ if @b@ is 'True',-- and 'mzero' if @b@ is 'False'.guard::(MonadPlus m )=>Bool->m ()guard True=return ()guardFalse=mzero -- ------------------------------------------------------------------------------- Lexing types-- ^ Haskell lexemes.dataLexeme =Char Char-- ^ Character literal|String String -- ^ String literal, with escapes interpreted|Punc String -- ^ Punctuation or reserved symbol, e.g. @(@, @::@|Ident String -- ^ Haskell identifier, e.g. @foo@, @Baz@|Symbol String -- ^ Haskell symbol, e.g. @>>@, @:%@|Number Number -- ^ @since 4.6.0.0|EOF deriving(Eq,Show )-- | @since 4.7.0.0dataNumber =MkNumber Int-- BaseDigits -- Integral part|MkDecimal Digits -- Integral part(Maybe Digits )-- Fractional part(Maybe Integer)-- Exponentderiving(Eq,Show )-- | @since 4.5.1.0numberToInteger::Number ->Maybe IntegernumberToInteger (MkNumber base iPart )=Just (val (fromIntegral base )iPart )numberToInteger(MkDecimal iPart Nothing Nothing )=Just (val 10iPart )numberToInteger_=Nothing -- | @since 4.7.0.0numberToFixed::Integer->Number ->Maybe (Integer,Integer)numberToFixed _(MkNumber base iPart )=Just (val (fromIntegral base )iPart ,0)numberToFixed_(MkDecimal iPart Nothing Nothing )=Just (val 10iPart ,0)numberToFixedp (MkDecimal iPart (Just fPart )Nothing )=leti =val 10iPart f =val 10(integerTake p (fPart ++ repeat 0))-- Sigh, we really want genericTake, but that's above us in-- the hierarchy, so we define our own version here (actually-- specialised to Integer)integerTake::Integer->[a ]->[a ]integerTake n _|n <=0=[]integerTake_[]=[]integerTaken (x :xs )=x :integerTake (n -1)xs inJust (i ,f )numberToFixed__=Nothing -- This takes a floatRange, and if the Rational would be outside of-- the floatRange then it may return Nothing. Not that it will not-- /necessarily/ return Nothing, but it is good enough to fix the-- space problems in #5688-- Ways this is conservative:-- * the floatRange is in base 2, but we pretend it is in base 10-- * we pad the floateRange a bit, just in case it is very small-- and we would otherwise hit an edge case-- * We only worry about numbers that have an exponent. If they don't-- have an exponent then the Rational won't be much larger than the-- Number, so there is no problem-- | @since 4.5.1.0numberToRangedRational::(Int,Int)->Number ->Maybe Rational -- Nothing = InfnumberToRangedRational (neg ,pos )n @(MkDecimal iPart mFPart (Just exp ))-- if exp is out of integer bounds,-- then the number is definitely out of range|exp >fromIntegral (maxBound ::Int)||exp <fromIntegral (minBound ::Int)=Nothing |otherwise =letmFirstDigit =casedropWhile (0==)iPart ofiPart' @(_:_)->Just (length iPart' )[]->casemFPart ofNothing ->Nothing Just fPart ->casespan (0==)fPart of(_,[])->Nothing (zeroes ,_)->Just (negate (length zeroes ))incasemFirstDigit ofNothing ->Just 0Just firstDigit ->letfirstDigit' =firstDigit + fromInteger exp iniffirstDigit' >(pos + 3)thenNothing elseiffirstDigit' <(neg -3)thenJust 0elseJust (numberToRational n )numberToRangedRational_n =Just (numberToRational n )-- | @since 4.6.0.0numberToRational::Number ->Rational numberToRational (MkNumber base iPart )=val (fromIntegral base )iPart % 1numberToRational(MkDecimal iPart mFPart mExp )=leti =val 10iPart incase(mFPart ,mExp )of(Nothing ,Nothing )->i % 1(Nothing ,Just exp )|exp >=0->(i * (10^ exp ))% 1|otherwise ->i % (10^ (-exp ))(Just fPart ,Nothing )->fracExp 0i fPart (Just fPart ,Just exp )->fracExp exp i fPart -- fracExp is a bit more efficient in calculating the Rational.-- Instead of calculating the fractional part alone, then-- adding the integral part and finally multiplying with-- 10 ^ exp if an exponent was given, do it all at once.-- ------------------------------------------------------------------------------- Lexinglex::ReadP Lexeme lex =skipSpaces >> lexToken -- | @since 4.7.0.0expect::Lexeme ->ReadP ()expect lexeme =do{skipSpaces ;thing <-lexToken ;ifthing ==lexeme thenreturn ()elsepfail }hsLex::ReadP String -- ^ Haskell lexer: returns the lexed string, rather than the lexemehsLex =doskipSpaces (s ,_)<-gather lexToken return s lexToken::ReadP Lexeme lexToken =lexEOF +++ lexLitChar +++ lexString +++ lexPunc +++ lexSymbol +++ lexId +++ lexNumber -- ------------------------------------------------------------------------ End of filelexEOF::ReadP Lexeme lexEOF =dos <-look guard (null s )return EOF -- ----------------------------------------------------------------------------- Single character lexemeslexPunc::ReadP Lexeme lexPunc =doc <-satisfy isPuncChar return (Punc [c ])-- | The @special@ character class as defined in the Haskell Report.isPuncChar::Char->BoolisPuncChar c =c `elem `",;()[]{}`"-- ------------------------------------------------------------------------ SymbolslexSymbol::ReadP Lexeme lexSymbol =dos <-munch1 isSymbolChar ifs `elem `reserved_ops thenreturn (Punc s )-- Reserved-ops count as punctuationelsereturn (Symbol s )wherereserved_ops =["..","::","=","\\","|","<-","->","@","~","=>"]isSymbolChar::Char->BoolisSymbolChar c =not(isPuncChar c )&&casegeneralCategory c ofMathSymbol ->TrueCurrencySymbol ->TrueModifierSymbol ->TrueOtherSymbol ->TrueDashPunctuation ->TrueOtherPunctuation ->not(c `elem `"'\"")ConnectorPunctuation ->c /='_'_->False-- ------------------------------------------------------------------------ identifierslexId::ReadP Lexeme lexId =doc <-satisfy isIdsChar s <-munch isIdfChar return (Ident (c :s ))where-- Identifiers can start with a '_'isIdsChar c =isAlpha c ||c =='_'isIdfChar c =isAlphaNum c ||c `elem `"_'"-- ----------------------------------------------------------------------------- Lexing character literalslexLitChar::ReadP Lexeme lexLitChar =do_<-char '\''(c ,esc )<-lexCharE guard (esc ||c /='\'')-- Eliminate '' possibility_<-char '\''return (Char c )lexChar::ReadP CharlexChar =do{(c ,_)<-lexCharE ;return c }lexCharE::ReadP (Char,Bool)-- "escaped or not"?lexCharE =doc1 <-get ifc1 =='\\'thendoc2 <-lexEsc ;return (c2 ,True)elsedoreturn (c1 ,False)wherelexEsc =lexEscChar +++ lexNumeric +++ lexCntrlChar +++ lexAscii lexEscChar =doc <-get casec of'a'->return '\a''b'->return '\b''f'->return '\f''n'->return '\n''r'->return '\r''t'->return '\t''v'->return '\v''\\'->return '\\''\"'->return '\"''\''->return '\''_->pfail lexNumeric =dobase <-lexBaseChar <++ return 10n <-lexInteger base guard (n <=toInteger (ord maxBound ))return (chr (fromInteger n ))lexCntrlChar =do_<-char '^'c <-get casec of'@'->return '\^@''A'->return '\^A''B'->return '\^B''C'->return '\^C''D'->return '\^D''E'->return '\^E''F'->return '\^F''G'->return '\^G''H'->return '\^H''I'->return '\^I''J'->return '\^J''K'->return '\^K''L'->return '\^L''M'->return '\^M''N'->return '\^N''O'->return '\^O''P'->return '\^P''Q'->return '\^Q''R'->return '\^R''S'->return '\^S''T'->return '\^T''U'->return '\^U''V'->return '\^V''W'->return '\^W''X'->return '\^X''Y'->return '\^Y''Z'->return '\^Z''['->return '\^[''\\'->return '\^\'']'->return '\^]''^'->return '\^^''_'->return '\^_'_->pfail lexAscii =dochoice [(string "SOH">> return '\SOH')<++ (string "SO">> return '\SO')-- \SO and \SOH need maximal-munch treatment-- See the Haskell report Sect 2.6,string "NUL">> return '\NUL',string "STX">> return '\STX',string "ETX">> return '\ETX',string "EOT">> return '\EOT',string "ENQ">> return '\ENQ',string "ACK">> return '\ACK',string "BEL">> return '\BEL',string "BS">> return '\BS',string "HT">> return '\HT',string "LF">> return '\LF',string "VT">> return '\VT',string "FF">> return '\FF',string "CR">> return '\CR',string "SI">> return '\SI',string "DLE">> return '\DLE',string "DC1">> return '\DC1',string "DC2">> return '\DC2',string "DC3">> return '\DC3',string "DC4">> return '\DC4',string "NAK">> return '\NAK',string "SYN">> return '\SYN',string "ETB">> return '\ETB',string "CAN">> return '\CAN',string "EM">> return '\EM',string "SUB">> return '\SUB',string "ESC">> return '\ESC',string "FS">> return '\FS',string "GS">> return '\GS',string "RS">> return '\RS',string "US">> return '\US',string "SP">> return '\SP',string "DEL">> return '\DEL']-- ----------------------------------------------------------------------------- string literallexString::ReadP Lexeme lexString =do_<-char '"'body id wherebody f =do(c ,esc )<-lexStrItem ifc /='"'||esc thenbody (f . (c :))elselets =f ""inreturn (String s )lexStrItem =(lexEmpty >> lexStrItem )+++ lexCharE lexEmpty =do_<-char '\\'c <-get casec of'&'->doreturn ()_|isSpace c ->doskipSpaces ;_<-char '\\';return ()_->dopfail -- ----------------------------------------------------------------------------- Lexing numberstypeBase =InttypeDigits =[Int]lexNumber::ReadP Lexeme lexNumber =lexHexOct <++ -- First try for hex or octal 0x, 0o etc-- If that fails, try for a decimal numberlexDecNumber -- Start with ordinary digitslexHexOct::ReadP Lexeme lexHexOct =do_<-char '0'base <-lexBaseChar digits <-lexDigits base return (Number (MkNumber base digits ))lexBaseChar::ReadP Int-- Lex a single character indicating the base; fail if not therelexBaseChar =do{c <-get ;casec of'o'->return 8'O'->return 8'x'->return 16'X'->return 16_->pfail }lexDecNumber::ReadP Lexeme lexDecNumber =doxs <-lexDigits 10mFrac <-lexFrac <++ return Nothing mExp <-lexExp <++ return Nothing return (Number (MkDecimal xs mFrac mExp ))lexFrac::ReadP (Maybe Digits )-- Read the fractional part; fail if it doesn't-- start ".d" where d is a digitlexFrac =do_<-char '.'fraction <-lexDigits 10return (Just fraction )lexExp::ReadP (Maybe Integer)lexExp =do_<-char 'e'+++ char 'E'exp <-signedExp +++ lexInteger 10return (Just exp )wheresignedExp =doc <-char '-'+++ char '+'n <-lexInteger 10return (ifc =='-'then-n elsen )lexDigits::Int->ReadP Digits -- Lex a non-empty sequence of digits in specified baselexDigits base =dos <-look xs <-scan s id guard (not(null xs ))return xs wherescan (c :cs )f =casevalDig base c ofJust n ->do_<-get ;scan cs (f . (n :))Nothing ->doreturn (f [])scan[]f =doreturn (f [])lexInteger::Base ->ReadP IntegerlexInteger base =doxs <-lexDigits base return (val (fromIntegral base )xs )val::Num a =>a ->Digits ->a val =valSimple {-# RULES "val/Integer" val = valInteger #-}{-# INLINE [1] val #-}-- The following algorithm is only linear for types whose Num operations-- are in constant time.valSimple::(Num a ,Integral d )=>a ->[d ]->a valSimple base =go 0wherego r []=r gor (d :ds )=r' `seq`go r' ds wherer' =r * base + fromIntegral d {-# INLINE valSimple #-}-- A sub-quadratic algorithm for Integer. Pairs of adjacent radix b-- digits are combined into a single radix b^2 digit. This process is-- repeated until we are left with a single digit. This algorithm-- performs well only on large inputs, so we use the simple algorithm-- for smaller inputs.valInteger::Integer->Digits ->IntegervalInteger b0 ds0 =go b0 (length ds0 )$ map fromIntegral ds0 wherego __[]=0go__[d ]=d gob l ds |l >40=b' `seq`go b' l' (combine b ds' )|otherwise =valSimple b ds where-- ensure that we have an even number of digits-- before we call combine:ds' =ifeven l thends else0:ds b' =b * b l' =(l + 1)`quot `2combine b (d1 :d2 :ds )=d `seq`(d :combine b ds )whered =d1 * b + d2 combine_[]=[]combine_[_]=errorWithoutStackTrace "this should not happen"-- Calculate a Rational from the exponent [of 10 to multiply with],-- the integral part of the mantissa and the digits of the fractional-- part. Leaving the calculation of the power of 10 until the end,-- when we know the effective exponent, saves multiplications.-- More importantly, this way we need at most one gcd instead of three.---- frac was never used with anything but Integer and base 10, so-- those are hardcoded now (trivial to change if necessary).fracExp::Integer->Integer->Digits ->Rational fracExp exp mant []|exp <0=mant % (10^ (-exp ))|otherwise =fromInteger (mant * 10^ exp )fracExpexp mant (d :ds )=exp' `seq`mant' `seq`fracExp exp' mant' ds whereexp' =exp -1mant' =mant * 10+ fromIntegral d valDig::(Eqa ,Num a )=>a ->Char->Maybe IntvalDig 8c |'0'<=c &&c <='7'=Just (ord c -ord '0')|otherwise =Nothing valDig10c =valDecDig c valDig16c |'0'<=c &&c <='9'=Just (ord c -ord '0')|'a'<=c &&c <='f'=Just (ord c -ord 'a'+ 10)|'A'<=c &&c <='F'=Just (ord c -ord 'A'+ 10)|otherwise =Nothing valDig__=errorWithoutStackTrace "valDig: Bad base"valDecDig::Char->Maybe IntvalDecDig c |'0'<=c &&c <='9'=Just (ord c -ord '0')|otherwise =Nothing -- ------------------------------------------------------------------------ other numeric lexing functionsreadIntP::Num a =>a ->(Char->Bool)->(Char->Int)->ReadP a readIntP base isDigit valDigit =dos <-munch1 isDigit return (val base (map valDigit s )){-# SPECIALISE readIntP :: Integer -> (Char -> Bool) -> (Char -> Int) -> ReadP Integer #-}readIntP'::(Eqa ,Num a )=>a ->ReadP a readIntP' base =readIntP base isDigit valDigit whereisDigit c =maybe False(const True)(valDig base c )valDigit c =maybe 0id (valDig base c ){-# SPECIALISE readIntP' :: Integer -> ReadP Integer #-}readOctP,readDecP,readHexP::(Eqa ,Num a )=>ReadP a readOctP =readIntP' 8readDecP =readIntP' 10readHexP =readIntP' 16{-# SPECIALISE readOctP :: ReadP Integer #-}{-# SPECIALISE readDecP :: ReadP Integer #-}{-# SPECIALISE readHexP :: ReadP Integer #-}