Here is code that will output maven coordinates from given pom.xml file. How can I remove boilerplate (possibly convert it to xml picklers)?
import Control.Lens ((&))
import Text.XML.HXT.Core
import System.Environment (getArgs)
import Text.XML.HXT.Arrow.XmlArrow
import Control.Monad (liftM)
import Data.Maybe
import Control.Applicative
main = do
(filename:_) <- getArgs
_groupId <- extractOne filename $ project >>> groupId
_artifactId <- extractOne filename $ project >>> artifactId
_version <- extractOne filename $ project >>> version
_parentGroupId <- extractOne filename $ project >>> parent >>> groupId
_parentArtifactId <- extractOne filename $ project >>> parent >>> artifactId
_parentVersion <- extractOne filename $ project >>> parent >>> version
let coordinate = (,,) <$> _groupId <*> _artifactId <*> _version
parentCoordinate = (,,) <$> _parentGroupId <*> _parentArtifactId <*> _parentVersion
in do
print coordinate
print parentCoordinate
extract :: String -> IOSArrow XmlTree a -> IO [a]
extract filename cat = runX $ readDocument [withValidate no] filename >>> cat
extractOne :: String -> IOSArrow XmlTree a -> IO (Maybe a)
extractOne filename cat = extract filename cat >>= return . listToMaybe
version :: ArrowXml a => a XmlTree String
version = getChildren >>> hasName "version" >>> getChildren >>> getText
artifactId :: ArrowXml a => a XmlTree String
artifactId = getChildren >>> hasName "artifactId" >>> getChildren >>> getText
groupId :: ArrowXml a => a XmlTree String
groupId = getChildren >>> hasName "groupId" >>> getChildren >>> getText
parent :: ArrowXml a => a XmlTree XmlTree
parent = getChildren >>> hasName "parent"
project :: ArrowXml a => a XmlTree XmlTree
project = getChildren >>> hasName "project"
1 Answer 1
The main issue I see is that the code opens the XML file six times to read a single value each time. Edit: With a bit more a work I have a now a rather nice solution for this. The original minified version is below.
So the point here is to only read the file once, then filter the tree in
two stages, once for the top-level coordinate and once for the "parent"
node below. By combining the two stages with &&&
and handling the
edge-cases of missing values, the output is same (I hope) and
performance, as well as the number of lines is improved significantly.
(There is still potential for reduction in at least the coordinate
function, but I'll leave that to a kind editor for now.)
import Control.Lens ((&))
import Text.XML.HXT.Core
import System.Environment (getArgs)
import Text.XML.HXT.Arrow.XmlArrow
import Control.Monad (liftM)
import Data.Maybe
import Control.Applicative
type Coordinate = (String, String, String)
main = do
(filename:_) <- getArgs
(coordinate, parentCoordinate):_ <- runX (readFileDoc filename >>> coordinatesFilter)
print coordinate
print parentCoordinate
readFileDoc = readDocument [withValidate no]
coordinatesFilter :: ArrowXml a => a XmlTree (Maybe Coordinate, Maybe Coordinate)
coordinatesFilter =
project >>>
maybeSelector coordinate &&&
maybeSelector (parent >>> coordinate)
maybeSelector :: ArrowList a => a b c -> a b (Maybe c)
maybeSelector selector = withDefault (selector >>> arr Just) Nothing
namedChildren :: ArrowXml a => String -> a XmlTree XmlTree
namedChildren name = getChildren >>> hasName name
project :: ArrowXml a => a XmlTree XmlTree
project = namedChildren "project"
parent :: ArrowXml a => a XmlTree XmlTree
parent = namedChildren "parent"
field :: ArrowXml a => String -> a XmlTree String
field name = namedChildren name >>> getChildren >>> getText
threeAndSelector :: Arrow a => (t -> a b c) -> t -> t -> t -> a b (c, c, c)
threeAndSelector f a b c = (f a &&& f b &&& f c) >>> arr (\(x, (y, z)) -> (x, y, z))
coordinate :: ArrowXml a => a XmlTree Coordinate
coordinate = threeAndSelector field "groupId" "artifactId" "version"