Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Commit 2ba61a2

Browse files
committed
Use Attoparsec to parse git output instead of manually splitting Text
1 parent c562987 commit 2ba61a2

File tree

2 files changed

+62
-9
lines changed

2 files changed

+62
-9
lines changed

src/Semantic/Git.hs

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,18 @@ module Semantic.Git
99
, ObjectType(..)
1010
, ObjectMode(..)
1111
, OID(..)
12+
13+
-- Testing Purposes
14+
, parseEntries
15+
, parseEntry
1216
) where
1317

1418
import Control.Monad.IO.Class
15-
import Data.Text as Text
16-
import Shelly hiding (FilePath)
17-
import System.IO (hSetBinaryMode)
19+
import Data.Attoparsec.Text (Parser)
20+
import Data.Attoparsec.Text as AP
21+
import Data.Text as Text
22+
import Shelly hiding (FilePath)
23+
import System.IO (hSetBinaryMode)
1824

1925
-- | git clone --bare
2026
clone :: Text -> FilePath -> IO ()
@@ -30,16 +36,45 @@ catFile gitDir (OID oid) = sh $ do
3036
lsTree :: FilePath -> OID -> IO [TreeEntry]
3137
lsTree gitDir (OID sha) = sh $ do
3238
out <- run "git" [pack ("--git-dir=" <> gitDir), "ls-tree", "-rz", sha]
33-
pure $ mkEntry <$> splitOn "\NUL" out
34-
where
35-
mkEntry row | [mode, ty, rest] <- splitOn " " row
36-
, [oid, path] <- splitOn "\t" rest
37-
= TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
38-
| otherwise = nullTreeEntry
39+
pure $ parseEntries out
3940

4041
sh :: MonadIO m => Sh a -> m a
4142
sh = shelly . silently . onCommandHandles (initOutputHandles (`hSetBinaryMode` True))
4243

44+
-- | Parses an list of entries separated by \NUL, and on failure return []
45+
parseEntries :: Text -> [TreeEntry]
46+
parseEntries = either (const []) id . AP.parseOnly (AP.sepBy entryParser (AP.char '\NUL'))
47+
48+
-- | Parse the entire input with entryParser, and on failure return a default
49+
-- For testing purposes only
50+
parseEntry :: Text -> TreeEntry
51+
parseEntry = either (const nullTreeEntry) id . AP.parseOnly entryParser
52+
53+
-- | Parses an entry successfully, falling back to the failure case if necessary.
54+
entryParser :: Parser TreeEntry
55+
entryParser = AP.choice [entrySuccessParser, entryDefaultParser]
56+
57+
-- | Attoparsec parser for a block af text ending with \NUL
58+
-- in order to consume invalid input
59+
entryDefaultParser :: Parser TreeEntry
60+
entryDefaultParser = do
61+
_ <- AP.takeWhile (/= '\NUL')
62+
pure $ nullTreeEntry
63+
64+
-- | Attoparsec parser for a single line of git ls-tree -rz output
65+
entrySuccessParser :: Parser TreeEntry
66+
entrySuccessParser = do
67+
mode <- takeWhileToNul (/= ' ')
68+
_ <- AP.char ' '
69+
ty <- takeWhileToNul (/= ' ')
70+
_ <- AP.char ' '
71+
oid <- takeWhileToNul (/= '\t')
72+
_ <- AP.char '\t'
73+
path <- takeWhileToNul (const True)
74+
pure $ TreeEntry (objectMode mode) (objectType ty) (OID oid) (unpack path)
75+
where
76+
takeWhileToNul f = AP.takeWhile (\x -> f x && x /= '\NUL')
77+
4378
newtype OID = OID Text
4479
deriving (Eq, Show, Ord)
4580

test/Semantic/Spec.hs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module Semantic.Spec (spec) where
33
import Data.Diff
44
import Data.Patch
55
import Semantic.Api hiding (Blob)
6+
import Semantic.Git
67
import System.Exit
78

89
import SpecHelpers
@@ -24,5 +25,22 @@ spec = parallel $ do
2425
it "renders with the specified renderer" $ do
2526
output <- fmap runBuilder . runTaskOrDie $ parseTermBuilder TermSExpression [methodsBlob]
2627
output `shouldBe` "(Statements\n (Method\n (Empty)\n (Identifier)\n (Statements)))\n"
28+
29+
describe "gitParsing" $ do
30+
it "parses a git output string" $ do
31+
let input = "100644 tree ThisIsTheOid\t/this/is/the/path"
32+
let expected = TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path"
33+
parseEntry input `shouldBe` expected
34+
35+
it "parses nonsense into a default value" $ do
36+
let input = "iel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz"
37+
let expected = TreeEntry OtherMode OtherObjectType (OID mempty) mempty
38+
parseEntry input `shouldBe` expected
39+
40+
it "parses many outputs separated by \\NUL" $ do
41+
let input = "100644 tree ThisIsTheOid\t/this/is/the/path\NULiel jgh\nf2 8i4p\r8f2y4fpoxin u3y2 unz\NUL120000 blob 17776\t/dev/urandom"
42+
let expected = [ TreeEntry NormalMode TreeObject (OID "ThisIsTheOid") "/this/is/the/path", TreeEntry OtherMode OtherObjectType (OID mempty) mempty, TreeEntry SymlinkMode BlobObject (OID "17776") "/dev/urandom"]
43+
parseEntries input `shouldBe` expected
44+
2745
where
2846
methodsBlob = makeBlob "def foo\nend\n" "methods.rb" Ruby mempty

0 commit comments

Comments
 (0)