src\Development\Shake\FilePattern.hs

{-# LANGUAGE PatternGuards, ViewPatterns #-}

module Development.Shake.FilePattern(
 -- * Primitive API, as exposed
 FilePattern, (?==), (<//>),
 -- * Optimisation opportunities
 simple,
 -- * Multipattern file rules
 compatible, extract, substitute,
 -- * Accelerated searching
 Walk(..), walk,
 -- * Testing only
 internalTest, isRelativePath, isRelativePattern
 ) where

import Development.Shake.Errors
import System.FilePath(isPathSeparator)
import Data.List.Extra
import Control.Applicative
import Control.Monad
import Data.Char
import Data.Tuple.Extra
import Data.Maybe
import System.Info.Extra
import Prelude


-- | A type synonym for file patterns, containing @\/\/@ and @*@. For the syntax
-- and semantics of 'FilePattern' see '?=='.
--
-- Most 'normaliseEx'd 'FilePath' values are suitable as 'FilePattern' values which match
-- only that specific file. On Windows @\\@ is treated as equivalent to @\/@.
--
-- You can write 'FilePattern' values as a literal string, or build them
-- up using the operators 'Development.Shake.FilePath.<.>', 'Development.Shake.FilePath.</>'
-- and 'Development.Shake.<//>'. However, beware that:
--
-- * On Windows, use 'Development.Shake.FilePath.<.>' from "Development.Shake.FilePath" instead of from
-- "System.FilePath" - otherwise @\"\/\/*\" \<.\> exe@ results in @\"\/\/*\\\\.exe\"@.
--
-- * If the second argument of 'Development.Shake.FilePath.</>' has a leading path separator (namely @\/@)
-- then the second argument will be returned.
type FilePattern = String

infixr 5 <//>

 -- | Join two 'FilePattern' values by inserting two @\/@ characters between them.
-- Will first remove any trailing path separators on the first argument, and any leading
-- separators on the second.
--
-- > "dir" <//> "*" == "dir//*"
(<//>) :: FilePattern -> FilePattern -> FilePattern
a <//> b = dropWhileEnd isPathSeparator a ++ "//" ++ dropWhile isPathSeparator b


---------------------------------------------------------------------
-- PATTERNS

data Pat = Lit String -- ^ foo
 | Star -- ^ /*/
 | Skip -- ^ //
 | Skip1 -- ^ //, but must be at least 1 element
 | Stars String [String] String -- ^ *foo*, prefix (fixed), infix floaters, suffix
 -- e.g. *foo*bar = Stars "" ["foo"] "bar"
 deriving (Show,Eq,Ord)

isLit Lit{} = True; isLit _ = False
fromLit (Lit x) = x


data Lexeme = Str String | Slash | SlashSlash

lexer :: FilePattern -> [Lexeme]
lexer "" = []
lexer (x1:x2:xs) | isPathSeparator x1, isPathSeparator x2 = SlashSlash : lexer xs
lexer (x1:xs) | isPathSeparator x1 = Slash : lexer xs
lexer xs = Str a : lexer b
 where (a,b) = break isPathSeparator xs


-- | Parse a FilePattern. All optimisations I can think of are invalid because they change the extracted expressions.
parse :: FilePattern -> [Pat]
parse = f False True . lexer
 where
 -- str = I have ever seen a Str go past (equivalent to "can I be satisfied by no paths")
 -- slash = I am either at the start, or my previous character was Slash
 f str slash [] = [Lit "" | slash]
 f str slash (Str "**":xs) = Skip : f True False xs
 f str slash (Str x:xs) = parseLit x : f True False xs
 f str slash (SlashSlash:Slash:xs) | not str = Skip1 : f str True xs
 f str slash (SlashSlash:xs) = Skip : f str False xs
 f str slash (Slash:xs) = [Lit "" | not str] ++ f str True xs


parseLit :: String -> Pat
parseLit "*" = Star
parseLit x = case split (== '*') x of
 [x] -> Lit x
 pre:xs | Just (mid,post) <- unsnoc xs -> Stars pre mid post


internalTest :: IO ()
internalTest = do
 let x # y = when (parse x /= y) $ fail $ show ("FilePattern.internalTest",x,parse x,y)
 "" # [Lit ""]
 "x" # [Lit "x"]
 "/" # [Lit "",Lit ""]
 "x/" # [Lit "x",Lit ""]
 "/x" # [Lit "",Lit "x"]
 "x/y" # [Lit "x",Lit "y"]
 "//" # [Skip]
 "**" # [Skip]
 "//x" # [Skip, Lit "x"]
 "**/x" # [Skip, Lit "x"]
 "x//" # [Lit "x", Skip]
 "x/**" # [Lit "x", Skip]
 "x//y" # [Lit "x",Skip, Lit "y"]
 "x/**/y" # [Lit "x",Skip, Lit "y"]
 "///" # [Skip1, Lit ""]
 "**/**" # [Skip,Skip]
 "**/**/" # [Skip, Skip, Lit ""]
 "///x" # [Skip1, Lit "x"]
 "**/x" # [Skip, Lit "x"]
 "x///" # [Lit "x", Skip, Lit ""]
 "x/**/" # [Lit "x", Skip, Lit ""]
 "x///y" # [Lit "x",Skip, Lit "y"]
 "x/**/y" # [Lit "x",Skip, Lit "y"]
 "////" # [Skip, Skip]
 "**/**/**" # [Skip, Skip, Skip]
 "////x" # [Skip, Skip, Lit "x"]
 "x////" # [Lit "x", Skip, Skip]
 "x////y" # [Lit "x",Skip, Skip, Lit "y"]
 "**//x" # [Skip, Skip, Lit "x"]


-- | Optimisations that may change the matched expressions
optimise :: [Pat] -> [Pat]
optimise (Skip:Skip:xs) = optimise $ Skip:xs
optimise (Skip:Star:xs) = optimise $ Skip1:xs
optimise (Star:Skip:xs) = optimise $ Skip1:xs
optimise (x:xs) = x : optimise xs
optimise [] =[]


-- | A 'FilePattern' that will only match 'isRelativePath' values.
isRelativePattern :: FilePattern -> Bool
isRelativePattern ('*':'*':xs)
 | [] <- xs = True
 | x:xs <- xs, isPathSeparator x = True
isRelativePattern _ = False

-- | A non-absolute 'FilePath'.
isRelativePath :: FilePath -> Bool
isRelativePath (x:_) | isPathSeparator x = False
isRelativePath (x:':':_) | isWindows, isAlpha x = False
isRelativePath _ = True


-- | Given a pattern, and a list of path components, return a list of all matches
-- (for each wildcard in order, what the wildcard matched).
match :: [Pat] -> [String] -> [[String]]
match (Skip:xs) (y:ys) = map ("":) (match xs (y:ys)) ++ match (Skip1:xs) (y:ys)
match (Skip1:xs) (y:ys) = [(y++"/"++r):rs | r:rs <- match (Skip:xs) ys]
match (Skip:xs) [] = map ("":) $ match xs []
match (Star:xs) (y:ys) = map (y:) $ match xs ys
match (Lit x:xs) (y:ys) | x == y = match xs ys
match (x@Stars{}:xs) (y:ys) | Just rs <- matchStars x y = map (rs ++) $ match xs ys
match [] [] = [[]]
match _ _ = []


matchOne :: Pat -> String -> Bool
matchOne (Lit x) y = x == y
matchOne x@Stars{} y = isJust $ matchStars x y
matchOne Star _ = True


-- Only return the first (all patterns left-most) valid star matching
matchStars :: Pat -> String -> Maybe [String]
matchStars (Stars pre mid post) x = do
 x <- stripPrefix pre x
 x <- if null post then Just x else stripSuffix post x
 stripInfixes mid x
 where
 stripInfixes [] x = Just [x]
 stripInfixes (m:ms) x = do
 (a,x) <- stripInfix m x
 (a:) <$> stripInfixes ms x


-- | Match a 'FilePattern' against a 'FilePath', There are three special forms:
--
-- * @*@ matches an entire path component, excluding any separators.
--
-- * @\/\/@ matches an arbitrary number of path components, including absolute path
-- prefixes.
--
-- * @**@ as a path component matches an arbitrary number of path components, but not
-- absolute path prefixes.
-- Currently considered experimental.
--
-- Some examples:
--
-- * @test.c@ matches @test.c@ and nothing else.
--
-- * @*.c@ matches all @.c@ files in the current directory, so @file.c@ matches,
-- but @file.h@ and @dir\/file.c@ don't.
--
-- * @\/\/*.c@ matches all @.c@ files anywhere on the filesystem,
-- so @file.c@, @dir\/file.c@, @dir1\/dir2\/file.c@ and @\/path\/to\/file.c@ all match,
-- but @file.h@ and @dir\/file.h@ don't.
--
-- * @dir\/*\/*@ matches all files one level below @dir@, so @dir\/one\/file.c@ and
-- @dir\/two\/file.h@ match, but @file.c@, @one\/dir\/file.c@, @dir\/file.h@
-- and @dir\/one\/two\/file.c@ don't.
--
-- Patterns with constructs such as @foo\/..\/bar@ will never match
-- normalised 'FilePath' values, so are unlikely to be correct.
(?==) :: FilePattern -> FilePath -> Bool
(?==) p = case optimise $ parse p of
 [x] | x == Skip || x == Skip1 -> if rp then isRelativePath else const True
 p -> let f = not . null . match p . split isPathSeparator
 in if rp then (\x -> isRelativePath x && f x) else f
 where rp = isRelativePattern p


---------------------------------------------------------------------
-- MULTIPATTERN COMPATIBLE SUBSTITUTIONS

specials :: FilePattern -> [Pat]
specials = concatMap f . parse
 where
 f Lit{} = []
 f Star = [Star]
 f Skip = [Skip]
 f Skip1 = [Skip]
 f (Stars _ xs _) = replicate (length xs + 1) Star

-- | Is the pattern free from any * and //.
simple :: FilePattern -> Bool
simple = null . specials

-- | Do they have the same * and // counts in the same order
compatible :: [FilePattern] -> Bool
compatible [] = True
compatible (x:xs) = all ((==) (specials x) . specials) xs

-- | Extract the items that match the wildcards. The pair must match with '?=='.
extract :: FilePattern -> FilePath -> [String]
extract p = let pat = parse p in \x ->
 case match pat (split isPathSeparator x) of
 [] | p ?== x -> err $ "extract with " ++ show p ++ " and " ++ show x
 | otherwise -> error $ "Pattern " ++ show p ++ " does not match " ++ x ++ ", when trying to extract the FilePattern matches"
 ms:_ -> ms


-- | Given the result of 'extract', substitute it back in to a 'compatible' pattern.
--
-- > p '?==' x ==> substitute (extract p x) p == x
substitute :: [String] -> FilePattern -> FilePath
substitute oms oxs = intercalate "/" $ concat $ snd $ mapAccumL f oms (parse oxs)
 where
 f ms (Lit x) = (ms, [x])
 f (m:ms) Star = (ms, [m])
 f (m:ms) Skip = (ms, split m)
 f (m:ms) Skip1 = (ms, split m)
 f ms (Stars pre mid post) = (ms2, [concat $ pre : zipWith (++) ms1 (mid++[post])])
 where (ms1,ms2) = splitAt (length mid + 1) ms
 f _ _ = error $ "Substitution failed into pattern " ++ show oxs ++ " with " ++ show (length oms) ++ " matches, namely " ++ show oms

 split = linesBy (== '/')


---------------------------------------------------------------------
-- EFFICIENT PATH WALKING

-- | Given a list of files, return a list of things I can match in this directory
-- plus a list of subdirectories and walks that apply to them.
-- Use WalkTo when the list can be predicted in advance
data Walk = Walk ([String] -> ([String],[(String,Walk)]))
 | WalkTo ([String],[(String,Walk)])

walk :: [FilePattern] -> (Bool, Walk)
walk ps = let ps2 = map (optimise . parse) ps in (any (\p -> isEmpty p || not (null $ match p [""])) ps2, f ps2)
 where
 f (nubOrd -> ps)
 | all isLit fin, all (isLit . fst) nxt = WalkTo (map fromLit fin, map (fromLit *** f) nxt)
 | otherwise = Walk $ \xs ->
 (if finStar then xs else filter (\x -> any (`matchOne` x) fin) xs
 ,[(x, f ys) | x <- xs, let ys = concat [b | (a,b) <- nxt, matchOne a x], not $ null ys])
 where
 finStar = Star `elem` fin
 fin = nubOrd $ mapMaybe final ps
 nxt = groupSort $ concatMap next ps


next :: [Pat] -> [(Pat, [Pat])]
next (Skip1:xs) = [(Star,Skip:xs)]
next (Skip:xs) = (Star,Skip:xs) : next xs
next (x:xs) = [(x,xs) | not $ null xs]
next [] = []

final :: [Pat] -> Maybe Pat
final (Skip:xs) = if isEmpty xs then Just Star else final xs
final (Skip1:xs) = if isEmpty xs then Just Star else Nothing
final (x:xs) = if isEmpty xs then Just x else Nothing
final [] = Nothing

isEmpty = all (== Skip)

AltStyle によって変換されたページ (->オリジナル) /