Network/URI.hs

{-# LANGUAGE CPP #-}
--------------------------------------------------------------------------------
-- |
-- Module : Network.URI
-- Copyright : (c) 2004, Graham Klyne
-- License : BSD-style (see end of this file)
--
-- Maintainer : Graham Klyne <gk@ninebynine.org>
-- Stability : provisional
-- Portability : portable
--
-- This module defines functions for handling URIs. It presents substantially the
-- same interface as the older GHC Network.URI module, but is implemented using
-- Parsec rather than a Regex library that is not available with Hugs. The internal
-- representation of URI has been changed so that URI strings are more
-- completely preserved when round-tripping to a URI value and back.
--
-- In addition, four methods are provided for parsing different
-- kinds of URI string (as noted in RFC3986):
-- 'parseURI',
-- 'parseURIReference',
-- 'parseRelativeReference' and
-- 'parseAbsoluteURI'.
--
-- Further, four methods are provided for classifying different
-- kinds of URI string (as noted in RFC3986):
-- 'isURI',
-- 'isURIReference',
-- 'isRelativeReference' and
-- 'isAbsoluteURI'.
--
-- The long-standing official reference for URI handling was RFC2396 [1],
-- as updated by RFC 2732 [2], but this was replaced by a new specification,
-- RFC3986 [3] in January 2005. This latter specification has been used
-- as the primary reference for constructing the URI parser implemented
-- here, and it is intended that there is a direct relationship between
-- the syntax definition in that document and this parser implementation.
--
-- RFC 1808 [4] contains a number of test cases for relative URI handling.
-- Dan Connolly's Python module @uripath.py@ [5] also contains useful details
-- and test cases.
--
-- Some of the code has been copied from the previous GHC implementation,
-- but the parser is replaced with one that performs more complete
-- syntax checking of the URI itself, according to RFC3986 [3].
--
-- References
--
-- (1) <http://www.ietf.org/rfc/rfc2396.txt>
--
-- (2) <http://www.ietf.org/rfc/rfc2732.txt>
--
-- (3) <http://www.ietf.org/rfc/rfc3986.txt>
--
-- (4) <http://www.ietf.org/rfc/rfc1808.txt>
--
-- (5) <http://www.w3.org/2000/10/swap/uripath.py>
--
--------------------------------------------------------------------------------

module Network.URI
 (
 -- * The URI type
 URI(..)
 , URIAuth(..)
 , nullURI
 
 -- * Parsing
 , parseURI
 , parseURIReference
 , parseRelativeReference
 , parseAbsoluteURI
 
 -- * Test for strings containing various kinds of URI
 , isURI
 , isURIReference
 , isRelativeReference
 , isAbsoluteURI
 , isIPv6address
 , isIPv4address
 
 -- * Predicates
 , uriIsAbsolute
 , uriIsRelative
 
 -- * Relative URIs
 , relativeTo
 , nonStrictRelativeTo
 , relativeFrom
 
 -- * Operations on URI strings
 -- | Support for putting strings into URI-friendly
 -- escaped format and getting them back again.
 -- This can't be done transparently in all cases, because certain
 -- characters have different meanings in different kinds of URI.
 -- The URI spec [3], section 2.4, indicates that all URI components
 -- should be escaped before they are assembled as a URI:
 -- \"Once produced, a URI is always in its percent-encoded form\"
 , uriToString
 , isReserved, isUnreserved
 , isAllowedInURI, isUnescapedInURI
 , isUnescapedInURIComponent
 , escapeURIChar
 , escapeURIString
 , unEscapeString
 
 -- * URI Normalization functions
 , normalizeCase
 , normalizeEscape
 , normalizePathSegments
 
 -- * Deprecated functions
 , parseabsoluteURI
 , escapeString
 , reserved, unreserved
 , scheme, authority, path, query, fragment
 ) where

import Text.ParserCombinators.Parsec
 ( GenParser, ParseError
 , parse, (<|>), (<?>), try
 , option, many, many1, count, notFollowedBy
 , char, satisfy, oneOf, string, eof
 , unexpected
 )

import Control.Monad (MonadPlus(..))
import Data.Char (ord, chr, isHexDigit, toLower, toUpper, digitToInt)
import Data.Bits ((.|.),(.&.),shiftL,shiftR)
import Debug.Trace (trace)
import Numeric (showIntAtBase)

import Data.Typeable (Typeable)
#if MIN_VERSION_base(4,0,0)
import Data.Data (Data)
#else
import Data.Generics (Data)
#endif

------------------------------------------------------------
-- The URI datatype
------------------------------------------------------------

-- |Represents a general universal resource identifier using
-- its component parts.
--
-- For example, for the URI
--
-- > foo://anonymous@www.haskell.org:42/ghc?query#frag
--
-- the components are:
--
data URI = URI
 { uriScheme :: String -- ^ @foo:@
 , uriAuthority :: Maybe URIAuth -- ^ @\/\/anonymous\@www.haskell.org:42@
 , uriPath :: String -- ^ @\/ghc@
 , uriQuery :: String -- ^ @?query@
 , uriFragment :: String -- ^ @#frag@
 } deriving (Eq, Ord, Typeable, Data)

-- |Type for authority value within a URI
data URIAuth = URIAuth
 { uriUserInfo :: String -- ^ @anonymous\@@
 , uriRegName :: String -- ^ @www.haskell.org@
 , uriPort :: String -- ^ @:42@
 } deriving (Eq, Ord, Show, Typeable, Data)

-- |Blank URI
nullURI :: URI
nullURI = URI
 { uriScheme = ""
 , uriAuthority = Nothing
 , uriPath = ""
 , uriQuery = ""
 , uriFragment = ""
 }

-- URI as instance of Show. Note that for security reasons, the default
-- behaviour is to suppress any userinfo field (see RFC3986, section 7.5).
-- This can be overridden by using uriToString directly with first
-- argument @id@ (noting that this returns a ShowS value rather than a string).
--
-- [[[Another design would be to embed the userinfo mapping function in
-- the URIAuth value, with the default value suppressing userinfo formatting,
-- but providing a function to return a new URI value with userinfo
-- data exposed by show.]]]
--
instance Show URI where
 showsPrec _ = uriToString defaultUserInfoMap

defaultUserInfoMap :: String -> String
defaultUserInfoMap uinf = user++newpass
 where
 (user,pass) = break (==':') uinf
 newpass = if null pass || (pass == "@")
 || (pass == ":@")
 then pass
 else ":...@"

testDefaultUserInfoMap :: [Bool]
testDefaultUserInfoMap =
 [ defaultUserInfoMap "" == ""
 , defaultUserInfoMap "@" == "@"
 , defaultUserInfoMap "user@" == "user@"
 , defaultUserInfoMap "user:@" == "user:@"
 , defaultUserInfoMap "user:anonymous@" == "user:...@"
 , defaultUserInfoMap "user:pass@" == "user:...@"
 , defaultUserInfoMap "user:pass" == "user:...@"
 , defaultUserInfoMap "user:anonymous" == "user:...@"
 ]

------------------------------------------------------------
-- Parse a URI
------------------------------------------------------------

-- |Turn a string containing a URI into a 'URI'.
-- Returns 'Nothing' if the string is not a valid URI;
-- (an absolute URI with optional fragment identifier).
--
-- NOTE: this is different from the previous network.URI,
-- whose @parseURI@ function works like 'parseURIReference'
-- in this module.
--
parseURI :: String -> Maybe URI
parseURI = parseURIAny uri

-- |Parse a URI reference to a 'URI' value.
-- Returns 'Nothing' if the string is not a valid URI reference.
-- (an absolute or relative URI with optional fragment identifier).
--
parseURIReference :: String -> Maybe URI
parseURIReference = parseURIAny uriReference

-- |Parse a relative URI to a 'URI' value.
-- Returns 'Nothing' if the string is not a valid relative URI.
-- (a relative URI with optional fragment identifier).
--
parseRelativeReference :: String -> Maybe URI
parseRelativeReference = parseURIAny relativeRef

-- |Parse an absolute URI to a 'URI' value.
-- Returns 'Nothing' if the string is not a valid absolute URI.
-- (an absolute URI without a fragment identifier).
--
parseAbsoluteURI :: String -> Maybe URI
parseAbsoluteURI = parseURIAny absoluteURI

-- |Test if string contains a valid URI
-- (an absolute URI with optional fragment identifier).
--
isURI :: String -> Bool
isURI = isValidParse uri

-- |Test if string contains a valid URI reference
-- (an absolute or relative URI with optional fragment identifier).
--
isURIReference :: String -> Bool
isURIReference = isValidParse uriReference

-- |Test if string contains a valid relative URI
-- (a relative URI with optional fragment identifier).
--
isRelativeReference :: String -> Bool
isRelativeReference = isValidParse relativeRef

-- |Test if string contains a valid absolute URI
-- (an absolute URI without a fragment identifier).
--
isAbsoluteURI :: String -> Bool
isAbsoluteURI = isValidParse absoluteURI

-- |Test if string contains a valid IPv6 address
--
isIPv6address :: String -> Bool
isIPv6address = isValidParse ipv6address

-- |Test if string contains a valid IPv4 address
--
isIPv4address :: String -> Bool
isIPv4address = isValidParse ipv4address

-- |Test function: parse and reconstruct a URI reference
--
testURIReference :: String -> String
testURIReference uristr = show (parseAll uriReference "" uristr)

-- Helper function for turning a string into a URI
--
parseURIAny :: URIParser URI -> String -> Maybe URI
parseURIAny parser uristr = case parseAll parser "" uristr of
 Left _ -> Nothing
 Right u -> Just u

-- Helper function to test a string match to a parser
--
isValidParse :: URIParser a -> String -> Bool
isValidParse parser uristr = case parseAll parser "" uristr of
 -- Left e -> error (show e)
 Left _ -> False
 Right _ -> True

parseAll :: URIParser a -> String -> String -> Either ParseError a
parseAll parser filename uristr = parse newparser filename uristr
 where
 newparser =
 do { res <- parser
 ; eof
 ; return res
 }

------------------------------------------------------------
-- Predicates
------------------------------------------------------------

uriIsAbsolute :: URI -> Bool
uriIsAbsolute (URI {uriScheme = scheme}) = scheme /= ""

uriIsRelative :: URI -> Bool
uriIsRelative = not . uriIsAbsolute

------------------------------------------------------------
-- URI parser body based on Parsec elements and combinators
------------------------------------------------------------

-- Parser parser type.
-- Currently
type URIParser a = GenParser Char () a

-- RFC3986, section 2.1
--
-- Parse and return a 'pct-encoded' sequence
--
escaped :: URIParser String
escaped =
 do { char '%'
 ; h1 <- hexDigitChar
 ; h2 <- hexDigitChar
 ; return $ ['%',h1,h2]
 }

-- RFC3986, section 2.2
--
-- |Returns 'True' if the character is a \"reserved\" character in a
-- URI. To include a literal instance of one of these characters in a
-- component of a URI, it must be escaped.
--
isReserved :: Char -> Bool
isReserved c = isGenDelims c || isSubDelims c

isGenDelims :: Char -> Bool
isGenDelims c = c `elem` ":/?#[]@"

isSubDelims :: Char -> Bool
isSubDelims c = c `elem` "!$&'()*+,;="

genDelims :: URIParser String
genDelims = do { c <- satisfy isGenDelims ; return [c] }

subDelims :: URIParser String
subDelims = do { c <- satisfy isSubDelims ; return [c] }

-- RFC3986, section 2.3
--
-- |Returns 'True' if the character is an \"unreserved\" character in
-- a URI. These characters do not need to be escaped in a URI. The
-- only characters allowed in a URI are either \"reserved\",
-- \"unreserved\", or an escape sequence (@%@ followed by two hex digits).
--
isUnreserved :: Char -> Bool
isUnreserved c = isAlphaNumChar c || (c `elem` "-_.~")

unreservedChar :: URIParser String
unreservedChar = do { c <- satisfy isUnreserved ; return [c] }

-- RFC3986, section 3
--
-- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
--
-- hier-part = "//" authority path-abempty
-- / path-abs
-- / path-rootless
-- / path-empty

uri :: URIParser URI
uri =
 do { us <- try uscheme
 -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
 -- ; up <- upath
 ; (ua,up) <- hierPart
 ; uq <- option "" ( do { char '?' ; uquery } )
 ; uf <- option "" ( do { char '#' ; ufragment } )
 ; return $ URI
 { uriScheme = us
 , uriAuthority = ua
 , uriPath = up
 , uriQuery = uq
 , uriFragment = uf
 }
 }

hierPart :: URIParser ((Maybe URIAuth),String)
hierPart =
 do { try (string "//")
 ; ua <- uauthority
 ; up <- pathAbEmpty
 ; return (ua,up)
 }
 <|> do { up <- pathAbs
 ; return (Nothing,up)
 }
 <|> do { up <- pathRootLess
 ; return (Nothing,up)
 }
 <|> do { return (Nothing,"")
 }

-- RFC3986, section 3.1

uscheme :: URIParser String
uscheme =
 do { s <- oneThenMany alphaChar (satisfy isSchemeChar)
 ; char ':'
 ; return $ s++":"
 }

-- RFC3986, section 3.2

uauthority :: URIParser (Maybe URIAuth)
uauthority =
 do { uu <- option "" (try userinfo)
 ; uh <- host
 ; up <- option "" port
 ; return $ Just $ URIAuth
 { uriUserInfo = uu
 , uriRegName = uh
 , uriPort = up
 }
 }

-- RFC3986, section 3.2.1

userinfo :: URIParser String
userinfo =
 do { uu <- many (uchar ";:&=+,ドル")
 ; char '@'
 ; return (concat uu ++"@")
 }

-- RFC3986, section 3.2.2

host :: URIParser String
host = ipLiteral <|> try ipv4address <|> regName

ipLiteral :: URIParser String
ipLiteral =
 do { char '['
 ; ua <- ( ipv6address <|> ipvFuture )
 ; char ']'
 ; return $ "[" ++ ua ++ "]"
 }
 <?> "IP address literal"

ipvFuture :: URIParser String
ipvFuture =
 do { char 'v'
 ; h <- hexDigitChar
 ; char '.'
 ; a <- many1 (satisfy isIpvFutureChar)
 ; return $ 'v':h:'.':a
 }

isIpvFutureChar :: Char -> Bool
isIpvFutureChar c = isUnreserved c || isSubDelims c || (c==';')

ipv6address :: URIParser String
ipv6address =
 try ( do
 { a2 <- count 6 h4c
 ; a3 <- ls32
 ; return $ concat a2 ++ a3
 } )
 <|> try ( do
 { string "::"
 ; a2 <- count 5 h4c
 ; a3 <- ls32
 ; return $ "::" ++ concat a2 ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 0
 ; string "::"
 ; a2 <- count 4 h4c
 ; a3 <- ls32
 ; return $ a1 ++ "::" ++ concat a2 ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 1
 ; string "::"
 ; a2 <- count 3 h4c
 ; a3 <- ls32
 ; return $ a1 ++ "::" ++ concat a2 ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 2
 ; string "::"
 ; a2 <- count 2 h4c
 ; a3 <- ls32
 ; return $ a1 ++ "::" ++ concat a2 ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 3
 ; string "::"
 ; a2 <- h4c
 ; a3 <- ls32
 ; return $ a1 ++ "::" ++ a2 ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 4
 ; string "::"
 ; a3 <- ls32
 ; return $ a1 ++ "::" ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 5
 ; string "::"
 ; a3 <- h4
 ; return $ a1 ++ "::" ++ a3
 } )
 <|> try ( do
 { a1 <- opt_n_h4c_h4 6
 ; string "::"
 ; return $ a1 ++ "::"
 } )
 <?> "IPv6 address"

opt_n_h4c_h4 :: Int -> URIParser String
opt_n_h4c_h4 n = option "" $
 do { a1 <- countMinMax 0 n h4c
 ; a2 <- h4
 ; return $ concat a1 ++ a2
 }

ls32 :: URIParser String
ls32 = try ( do
 { a1 <- h4c
 ; a2 <- h4
 ; return (a1++a2)
 } )
 <|> ipv4address

h4c :: URIParser String
h4c = try $
 do { a1 <- h4
 ; char ':'
 ; notFollowedBy (char ':')
 ; return $ a1 ++ ":"
 }

h4 :: URIParser String
h4 = countMinMax 1 4 hexDigitChar

ipv4address :: URIParser String
ipv4address =
 do { a1 <- decOctet ; char '.'
 ; a2 <- decOctet ; char '.'
 ; a3 <- decOctet ; char '.'
 ; a4 <- decOctet
 ; notFollowedBy regName
 ; return $ a1++"."++a2++"."++a3++"."++a4
 }
 <?> "IPv4 Address"

decOctet :: URIParser String
decOctet =
 do { a1 <- countMinMax 1 3 digitChar
 ; if (read a1 :: Integer) > 255 then
 fail "Decimal octet value too large"
 else
 return a1
 }

regName :: URIParser String
regName =
 do { ss <- countMinMax 0 255 ( unreservedChar <|> escaped <|> subDelims )
 ; return $ concat ss
 }
 <?> "Registered name"

-- RFC3986, section 3.2.3

port :: URIParser String
port =
 do { char ':'
 ; p <- many digitChar
 ; return (':':p)
 }

--
-- RFC3986, section 3.3
--
-- path = path-abempty ; begins with "/" or is empty
-- / path-abs ; begins with "/" but not "//"
-- / path-noscheme ; begins with a non-colon segment
-- / path-rootless ; begins with a segment
-- / path-empty ; zero characters
--
-- path-abempty = *( "/" segment )
-- path-abs = "/" [ segment-nz *( "/" segment ) ]
-- path-noscheme = segment-nzc *( "/" segment )
-- path-rootless = segment-nz *( "/" segment )
-- path-empty = 0<pchar>
--
-- segment = *pchar
-- segment-nz = 1*pchar
-- segment-nzc = 1*( unreserved / pct-encoded / sub-delims / "@" )
--
-- pchar = unreserved / pct-encoded / sub-delims / ":" / "@"

{-
upath :: URIParser String
upath = pathAbEmpty
 <|> pathAbs
 <|> pathNoScheme
 <|> pathRootLess
 <|> pathEmpty
-}

pathAbEmpty :: URIParser String
pathAbEmpty =
 do { ss <- many slashSegment
 ; return $ concat ss
 }

pathAbs :: URIParser String
pathAbs =
 do { char '/'
 ; ss <- option "" pathRootLess
 ; return $ '/':ss
 }

pathNoScheme :: URIParser String
pathNoScheme =
 do { s1 <- segmentNzc
 ; ss <- many slashSegment
 ; return $ concat (s1:ss)
 }

pathRootLess :: URIParser String
pathRootLess =
 do { s1 <- segmentNz
 ; ss <- many slashSegment
 ; return $ concat (s1:ss)
 }

slashSegment :: URIParser String
slashSegment =
 do { char '/'
 ; s <- segment
 ; return ('/':s)
 }

segment :: URIParser String
segment =
 do { ps <- many pchar
 ; return $ concat ps
 }

segmentNz :: URIParser String
segmentNz =
 do { ps <- many1 pchar
 ; return $ concat ps
 }

segmentNzc :: URIParser String
segmentNzc =
 do { ps <- many1 (uchar "@")
 ; return $ concat ps
 }

pchar :: URIParser String
pchar = uchar ":@"

-- helper function for pchar and friends
uchar :: String -> URIParser String
uchar extras =
 unreservedChar
 <|> escaped
 <|> subDelims
 <|> do { c <- oneOf extras ; return [c] }

-- RFC3986, section 3.4

uquery :: URIParser String
uquery =
 do { ss <- many $ uchar (":@"++"/?")
 ; return $ '?':concat ss
 }

-- RFC3986, section 3.5

ufragment :: URIParser String
ufragment =
 do { ss <- many $ uchar (":@"++"/?")
 ; return $ '#':concat ss
 }

-- Reference, Relative and Absolute URI forms
--
-- RFC3986, section 4.1

uriReference :: URIParser URI
uriReference = uri <|> relativeRef

-- RFC3986, section 4.2
--
-- relative-URI = relative-part [ "?" query ] [ "#" fragment ]
--
-- relative-part = "//" authority path-abempty
-- / path-abs
-- / path-noscheme
-- / path-empty

relativeRef :: URIParser URI
relativeRef =
 do { notMatching uscheme
 -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
 -- ; up <- upath
 ; (ua,up) <- relativePart
 ; uq <- option "" ( do { char '?' ; uquery } )
 ; uf <- option "" ( do { char '#' ; ufragment } )
 ; return $ URI
 { uriScheme = ""
 , uriAuthority = ua
 , uriPath = up
 , uriQuery = uq
 , uriFragment = uf
 }
 }

relativePart :: URIParser ((Maybe URIAuth),String)
relativePart =
 do { try (string "//")
 ; ua <- uauthority
 ; up <- pathAbEmpty
 ; return (ua,up)
 }
 <|> do { up <- pathAbs
 ; return (Nothing,up)
 }
 <|> do { up <- pathNoScheme
 ; return (Nothing,up)
 }
 <|> do { return (Nothing,"")
 }

-- RFC3986, section 4.3

absoluteURI :: URIParser URI
absoluteURI =
 do { us <- uscheme
 -- ; ua <- option Nothing ( do { try (string "//") ; uauthority } )
 -- ; up <- upath
 ; (ua,up) <- hierPart
 ; uq <- option "" ( do { char '?' ; uquery } )
 ; return $ URI
 { uriScheme = us
 , uriAuthority = ua
 , uriPath = up
 , uriQuery = uq
 , uriFragment = ""
 }
 }

-- Imports from RFC 2234

 -- NOTE: can't use isAlphaNum etc. because these deal with ISO 8859
 -- (and possibly Unicode!) chars.
 -- [[[Above was a comment originally in GHC Network/URI.hs:
 -- when IRIs are introduced then most codepoints above 128(?) should
 -- be treated as unreserved, and higher codepoints for letters should
 -- certainly be allowed.
 -- ]]]

isAlphaChar :: Char -> Bool
isAlphaChar c = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')

isDigitChar :: Char -> Bool
isDigitChar c = (c >= '0' && c <= '9')

isAlphaNumChar :: Char -> Bool
isAlphaNumChar c = isAlphaChar c || isDigitChar c

isHexDigitChar :: Char -> Bool
isHexDigitChar c = isHexDigit c

isSchemeChar :: Char -> Bool
isSchemeChar c = (isAlphaNumChar c) || (c `elem` "+-.")

alphaChar :: URIParser Char
alphaChar = satisfy isAlphaChar -- or: Parsec.letter ?

digitChar :: URIParser Char
digitChar = satisfy isDigitChar -- or: Parsec.digit ?

alphaNumChar :: URIParser Char
alphaNumChar = satisfy isAlphaNumChar

hexDigitChar :: URIParser Char
hexDigitChar = satisfy isHexDigitChar -- or: Parsec.hexDigit ?

-- Additional parser combinators for common patterns

oneThenMany :: GenParser t s a -> GenParser t s a -> GenParser t s [a]
oneThenMany p1 pr =
 do { a1 <- p1
 ; ar <- many pr
 ; return (a1:ar)
 }

countMinMax :: Int -> Int -> GenParser t s a -> GenParser t s [a]
countMinMax m n p | m > 0 =
 do { a1 <- p
 ; ar <- countMinMax (m-1) (n-1) p
 ; return (a1:ar)
 }
countMinMax _ n _ | n <= 0 = return []
countMinMax _ n p = option [] $
 do { a1 <- p
 ; ar <- countMinMax 0 (n-1) p
 ; return (a1:ar)
 }

notMatching :: Show a => GenParser tok st a -> GenParser tok st ()
notMatching p = do { a <- try p ; unexpected (show a) } <|> return ()

------------------------------------------------------------
-- Reconstruct a URI string
------------------------------------------------------------
--
-- |Turn a 'URI' into a string.
--
-- Uses a supplied function to map the userinfo part of the URI.
--
-- The Show instance for URI uses a mapping that hides any password
-- that may be present in the URI. Use this function with argument @id@
-- to preserve the password in the formatted output.
--
uriToString :: (String->String) -> URI -> ShowS
uriToString userinfomap URI { uriScheme=myscheme
 , uriAuthority=myauthority
 , uriPath=mypath
 , uriQuery=myquery
 , uriFragment=myfragment
 } =
 (myscheme++) . (uriAuthToString userinfomap myauthority)
 . (mypath++) . (myquery++) . (myfragment++)

uriAuthToString :: (String->String) -> (Maybe URIAuth) -> ShowS
uriAuthToString _ Nothing = id -- shows ""
uriAuthToString userinfomap
 (Just URIAuth { uriUserInfo = myuinfo
 , uriRegName = myregname
 , uriPort = myport
 } ) =
 ("//"++) . (if null myuinfo then id else ((userinfomap myuinfo)++))
 . (myregname++)
 . (myport++)

------------------------------------------------------------
-- Character classes
------------------------------------------------------------

-- | Returns 'True' if the character is allowed in a URI.
--
isAllowedInURI :: Char -> Bool
isAllowedInURI c = isReserved c || isUnreserved c || c == '%' -- escape char

-- | Returns 'True' if the character is allowed unescaped in a URI.
--
isUnescapedInURI :: Char -> Bool
isUnescapedInURI c = isReserved c || isUnreserved c

-- | Returns 'True' if the character is allowed unescaped in a URI component.
--
isUnescapedInURIComponent :: Char -> Bool
isUnescapedInURIComponent c = not (isReserved c || not (isUnescapedInURI c))

------------------------------------------------------------
-- Escape sequence handling
------------------------------------------------------------

-- |Escape character if supplied predicate is not satisfied,
-- otherwise return character as singleton string.
--
escapeURIChar :: (Char->Bool) -> Char -> String
escapeURIChar p c
 | p c = [c]
 | otherwise = concatMap (\i -> '%' : myShowHex i "") (utf8EncodeChar c)
 where
 myShowHex :: Int -> ShowS
 myShowHex n r = case showIntAtBase 16 (toChrHex) n r of
 [] -> "00"
 [x] -> ['0',x]
 cs -> cs
 toChrHex d
 | d < 10 = chr (ord '0' + fromIntegral d)
 | otherwise = chr (ord 'A' + fromIntegral (d - 10))

-- From http://hackage.haskell.org/package/utf8-string 
-- by Eric Mertens, BSD3
-- Returns [Int] for use with showIntAtBase
utf8EncodeChar :: Char -> [Int]
utf8EncodeChar = map fromIntegral . go . ord
 where
 go oc
 | oc <= 0x7f = [oc]

 | oc <= 0x7ff = [ 0xc0 + (oc `shiftR` 6)
 , 0x80 + oc .&. 0x3f
 ]

 | oc <= 0xffff = [ 0xe0 + (oc `shiftR` 12)
 , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
 , 0x80 + oc .&. 0x3f
 ]
 | otherwise = [ 0xf0 + (oc `shiftR` 18)
 , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
 , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
 , 0x80 + oc .&. 0x3f
 ]

-- |Can be used to make a string valid for use in a URI.
--
escapeURIString
 :: (Char->Bool) -- ^ a predicate which returns 'False'
 -- if the character should be escaped
 -> String -- ^ the string to process
 -> String -- ^ the resulting URI string
escapeURIString p s = concatMap (escapeURIChar p) s

-- |Turns all instances of escaped characters in the string back
-- into literal characters.
--
unEscapeString :: String -> String
unEscapeString [] = ""
unEscapeString s@(c:cs) = case unEscapeByte s of
 Just (byte, rest) -> unEscapeUtf8 byte rest
 Nothing -> c : unEscapeString cs

unEscapeByte :: String -> Maybe (Int, String)
unEscapeByte ('%':x1:x2:s) | isHexDigit x1 && isHexDigit x2 =
 Just (digitToInt x1 * 16 + digitToInt x2, s)
unEscapeByte _ = Nothing

-- Adapted from http://hackage.haskell.org/package/utf8-string 
-- by Eric Mertens, BSD3
unEscapeUtf8 :: Int -> String -> String
unEscapeUtf8 c rest
 | c < 0x80 = chr c : unEscapeString rest
 | c < 0xc0 = replacement_character : unEscapeString rest
 | c < 0xe0 = multi1
 | c < 0xf0 = multi_byte 2 0xf 0x800
 | c < 0xf8 = multi_byte 3 0x7 0x10000
 | c < 0xfc = multi_byte 4 0x3 0x200000
 | c < 0xfe = multi_byte 5 0x1 0x4000000
 | otherwise = replacement_character : unEscapeString rest
 where
 replacement_character = '\xfffd'
 multi1 = case unEscapeByte rest of
 Just (c1, ds) | c1 .&. 0xc0 == 0x80 ->
 let d = ((fromEnum c .&. 0x1f) `shiftL` 6) .|. fromEnum (c1 .&. 0x3f)
 in if d >= 0x000080 then toEnum d : unEscapeString ds
 else replacement_character : unEscapeString ds
 _ -> replacement_character : unEscapeString rest

 multi_byte i mask overlong =
 aux i rest (unEscapeByte rest) (c .&. mask)
 where
 aux 0 rs _ acc
 | overlong <= acc && acc <= 0x10ffff &&
 (acc < 0xd800 || 0xdfff < acc) &&
 (acc < 0xfffe || 0xffff < acc) = chr acc : unEscapeString rs
 | otherwise = replacement_character : unEscapeString rs

 aux n _ (Just (r, rs)) acc
 | r .&. 0xc0 == 0x80 = aux (n-1) rs (unEscapeByte rs)
 $! shiftL acc 6 .|. (r .&. 0x3f)

 aux _ rs _ _ = replacement_character : unEscapeString rs

------------------------------------------------------------
-- Resolving a relative URI relative to a base URI
------------------------------------------------------------

-- |Returns a new 'URI' which represents the value of the
-- first 'URI' interpreted as relative to the second 'URI'.
-- For example:
--
-- > "foo" `relativeTo` "http://bar.org/" = "http://bar.org/foo"
-- > "http:foo" `nonStrictRelativeTo` "http://bar.org/" = "http://bar.org/foo"
--
-- Algorithm from RFC3986 [3], section 5.2.2
--

nonStrictRelativeTo :: URI -> URI -> URI
nonStrictRelativeTo ref base = relativeTo ref' base
 where
 ref' = if uriScheme ref == uriScheme base
 then ref { uriScheme="" }
 else ref

isDefined :: ( MonadPlus m, Eq (m a) ) => m a -> Bool
isDefined a = a /= mzero

-- | Returns a new 'URI' which represents the value of the first 'URI'
-- interpreted as relative to the second 'URI'.
--
-- Algorithm from RFC3986 [3], section 5.2
relativeTo :: URI -> URI -> URI
relativeTo ref base
 | isDefined ( uriScheme ref ) =
 just_segments ref
 | isDefined ( uriAuthority ref ) =
 just_segments ref { uriScheme = uriScheme base }
 | isDefined ( uriPath ref ) =
 if (head (uriPath ref) == '/') then
 just_segments ref
 { uriScheme = uriScheme base
 , uriAuthority = uriAuthority base
 }
 else
 just_segments ref
 { uriScheme = uriScheme base
 , uriAuthority = uriAuthority base
 , uriPath = mergePaths base ref
 }
 | isDefined ( uriQuery ref ) =
 just_segments ref
 { uriScheme = uriScheme base
 , uriAuthority = uriAuthority base
 , uriPath = uriPath base
 }
 | otherwise =
 just_segments ref
 { uriScheme = uriScheme base
 , uriAuthority = uriAuthority base
 , uriPath = uriPath base
 , uriQuery = uriQuery base
 }
 where
 just_segments u =
 u { uriPath = removeDotSegments (uriPath u) }
 mergePaths b r
 | isDefined (uriAuthority b) && null pb = '/':pr
 | otherwise = dropLast pb ++ pr
 where
 pb = uriPath b
 pr = uriPath r
 dropLast = fst . splitLast -- reverse . dropWhile (/='/') . reverse

-- Remove dot segments, but protect leading '/' character
removeDotSegments :: String -> String
removeDotSegments ('/':ps) = '/':elimDots ps []
removeDotSegments ps = elimDots ps []

-- Second arg accumulates segments processed so far in reverse order
elimDots :: String -> [String] -> String
-- elimDots ps rs | traceVal "\nps " ps $ traceVal "rs " rs $ False = error ""
elimDots [] [] = ""
elimDots [] rs = concat (reverse rs)
elimDots ( '.':'/':ps) rs = elimDots ps rs
elimDots ( '.':[] ) rs = elimDots [] rs
elimDots ( '.':'.':'/':ps) rs = elimDots ps (drop 1 rs)
elimDots ( '.':'.':[] ) rs = elimDots [] (drop 1 rs)
elimDots ps rs = elimDots ps1 (r:rs)
 where
 (r,ps1) = nextSegment ps

-- Returns the next segment and the rest of the path from a path string.
-- Each segment ends with the next '/' or the end of string.
--
nextSegment :: String -> (String,String)
nextSegment ps =
 case break (=='/') ps of
 (r,'/':ps1) -> (r++"/",ps1)
 (r,_) -> (r,[])

-- Split last (name) segment from path, returning (path,name)
splitLast :: String -> (String,String)
splitLast p = (reverse revpath,reverse revname)
 where
 (revname,revpath) = break (=='/') $ reverse p

------------------------------------------------------------
-- Finding a URI relative to a base URI
------------------------------------------------------------

-- |Returns a new 'URI' which represents the relative location of
-- the first 'URI' with respect to the second 'URI'. Thus, the
-- values supplied are expected to be absolute URIs, and the result
-- returned may be a relative URI.
--
-- Example:
--
-- > "http://example.com/Root/sub1/name2#frag"
-- > `relativeFrom` "http://example.com/Root/sub2/name2#frag"
-- > == "../sub1/name2#frag"
--
-- There is no single correct implementation of this function,
-- but any acceptable implementation must satisfy the following:
--
-- > (uabs `relativeFrom` ubase) `relativeTo` ubase == uabs
--
-- For any valid absolute URI.
-- (cf. <http://lists.w3.org/Archives/Public/uri/2003Jan/0008.html>
-- <http://lists.w3.org/Archives/Public/uri/2003Jan/0005.html>)
--
relativeFrom :: URI -> URI -> URI
relativeFrom uabs base
 | diff uriScheme uabs base = uabs
 | diff uriAuthority uabs base = uabs { uriScheme = "" }
 | diff uriPath uabs base = uabs
 { uriScheme = ""
 , uriAuthority = Nothing
 , uriPath = relPathFrom (removeBodyDotSegments $ uriPath uabs)
 (removeBodyDotSegments $ uriPath base)
 }
 | diff uriQuery uabs base = uabs
 { uriScheme = ""
 , uriAuthority = Nothing
 , uriPath = ""
 }
 | otherwise = uabs -- Always carry fragment from uabs
 { uriScheme = ""
 , uriAuthority = Nothing
 , uriPath = ""
 , uriQuery = ""
 }
 where
 diff :: Eq b => (a -> b) -> a -> a -> Bool
 diff sel u1 u2 = sel u1 /= sel u2
 -- Remove dot segments except the final segment
 removeBodyDotSegments p = removeDotSegments p1 ++ p2
 where
 (p1,p2) = splitLast p

relPathFrom :: String -> String -> String
relPathFrom [] _ = "/"
relPathFrom pabs [] = pabs
relPathFrom pabs base = -- Construct a relative path segments
 if sa1 == sb1 -- if the paths share a leading segment
 then if (sa1 == "/") -- other than a leading '/'
 then if (sa2 == sb2)
 then relPathFrom1 ra2 rb2
 else pabs
 else relPathFrom1 ra1 rb1
 else pabs
 where
 (sa1,ra1) = nextSegment pabs
 (sb1,rb1) = nextSegment base
 (sa2,ra2) = nextSegment ra1
 (sb2,rb2) = nextSegment rb1

-- relPathFrom1 strips off trailing names from the supplied paths,
-- and calls difPathFrom to find the relative path from base to
-- target
relPathFrom1 :: String -> String -> String
relPathFrom1 pabs base = relName
 where
 (sa,na) = splitLast pabs
 (sb,nb) = splitLast base
 rp = relSegsFrom sa sb
 relName = if null rp then
 if (na == nb) then ""
 else if protect na then "./"++na
 else na
 else
 rp++na
 -- Precede name with some path if it is null or contains a ':'
 protect s = null s || ':' `elem` s

-- relSegsFrom discards any common leading segments from both paths,
-- then invokes difSegsFrom to calculate a relative path from the end
-- of the base path to the end of the target path.
-- The final name is handled separately, so this deals only with
-- "directory" segtments.
--
relSegsFrom :: String -> String -> String
{-
relSegsFrom sabs base
 | traceVal "\nrelSegsFrom\nsabs " sabs $ traceVal "base " base $
 False = error ""
-}
relSegsFrom [] [] = "" -- paths are identical
relSegsFrom sabs base =
 if sa1 == sb1
 then relSegsFrom ra1 rb1
 else difSegsFrom sabs base
 where
 (sa1,ra1) = nextSegment sabs
 (sb1,rb1) = nextSegment base

-- difSegsFrom calculates a path difference from base to target,
-- not including the final name at the end of the path
-- (i.e. results always ends with '/')
--
-- This function operates under the invariant that the supplied
-- value of sabs is the desired path relative to the beginning of
-- base. Thus, when base is empty, the desired path has been found.
--
difSegsFrom :: String -> String -> String
{-
difSegsFrom sabs base
 | traceVal "\ndifSegsFrom\nsabs " sabs $ traceVal "base " base $
 False = error ""
-}
difSegsFrom sabs "" = sabs
difSegsFrom sabs base = difSegsFrom ("../"++sabs) (snd $ nextSegment base)

------------------------------------------------------------
-- Other normalization functions
------------------------------------------------------------

-- |Case normalization; cf. RFC3986 section 6.2.2.1
-- NOTE: authority case normalization is not performed
--
normalizeCase :: String -> String
normalizeCase uristr = ncScheme uristr
 where
 ncScheme (':':cs) = ':':ncEscape cs
 ncScheme (c:cs) | isSchemeChar c = toLower c:ncScheme cs
 ncScheme _ = ncEscape uristr -- no scheme present
 ncEscape ('%':h1:h2:cs) = '%':toUpper h1:toUpper h2:ncEscape cs
 ncEscape (c:cs) = c:ncEscape cs
 ncEscape [] = []

-- |Encoding normalization; cf. RFC3986 section 6.2.2.2
--
normalizeEscape :: String -> String
normalizeEscape ('%':h1:h2:cs)
 | isHexDigit h1 && isHexDigit h2 && isUnreserved escval =
 escval:normalizeEscape cs
 where
 escval = chr (digitToInt h1*16+digitToInt h2)
normalizeEscape (c:cs) = c:normalizeEscape cs
normalizeEscape [] = []

-- |Path segment normalization; cf. RFC3986 section 6.2.2.4
--
normalizePathSegments :: String -> String
normalizePathSegments uristr = normstr juri
 where
 juri = parseURI uristr
 normstr Nothing = uristr
 normstr (Just u) = show (normuri u)
 normuri u = u { uriPath = removeDotSegments (uriPath u) }

------------------------------------------------------------
-- Local trace helper functions
------------------------------------------------------------

traceShow :: Show a => String -> a -> a
traceShow msg x = trace (msg ++ show x) x

traceVal :: Show a => String -> a -> b -> b
traceVal msg x y = trace (msg ++ show x) y

------------------------------------------------------------
-- Deprecated functions
------------------------------------------------------------

{-# DEPRECATED parseabsoluteURI "use parseAbsoluteURI" #-}
parseabsoluteURI :: String -> Maybe URI
parseabsoluteURI = parseAbsoluteURI

{-# DEPRECATED escapeString "use escapeURIString, and note the flipped arguments" #-}
escapeString :: String -> (Char->Bool) -> String
escapeString = flip escapeURIString

{-# DEPRECATED reserved "use isReserved" #-}
reserved :: Char -> Bool
reserved = isReserved

{-# DEPRECATED unreserved "use isUnreserved" #-}
unreserved :: Char -> Bool
unreserved = isUnreserved

-- Additional component access functions for backward compatibility

{-# DEPRECATED scheme "use uriScheme" #-}
scheme :: URI -> String
scheme = orNull init . uriScheme

{-# DEPRECATED authority "use uriAuthority, and note changed functionality" #-}
authority :: URI -> String
authority = dropss . ($"") . uriAuthToString id . uriAuthority
 where
 -- Old-style authority component does not include leading '//'
 dropss ('/':'/':s) = s
 dropss s = s

{-# DEPRECATED path "use uriPath" #-}
path :: URI -> String
path = uriPath

{-# DEPRECATED query "use uriQuery, and note changed functionality" #-}
query :: URI -> String
query = orNull tail . uriQuery

{-# DEPRECATED fragment "use uriFragment, and note changed functionality" #-}
fragment :: URI -> String
fragment = orNull tail . uriFragment

orNull :: ([a]->[a]) -> [a] -> [a]
orNull _ [] = []
orNull f as = f as

--------------------------------------------------------------------------------
--
-- Copyright (c) 2004, G. KLYNE. All rights reserved.
-- Distributed as free software under the following license.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
--
-- - Redistributions of source code must retain the above copyright notice,
-- this list of conditions and the following disclaimer.
--
-- - Redistributions in binary form must reproduce the above copyright
-- notice, this list of conditions and the following disclaimer in the
-- documentation and/or other materials provided with the distribution.
--
-- - Neither name of the copyright holders nor the names of its
-- contributors may be used to endorse or promote products derived from
-- this software without specific prior written permission.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND THE CONTRIBUTORS
-- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-- HOLDERS OR THE CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-- INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-- BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-- OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
-- TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
-- USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--
--------------------------------------------------------------------------------