Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit f28e447

Browse files
tarutistrib
authored andcommitted
Make prune object walker generic
1 parent 9dcb096 commit f28e447

File tree

2 files changed

+107
-91
lines changed

2 files changed

+107
-91
lines changed

object_walker.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package git
2+
3+
import (
4+
"fmt"
5+
6+
"gopkg.in/src-d/go-git.v4/plumbing"
7+
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
8+
"gopkg.in/src-d/go-git.v4/plumbing/object"
9+
"gopkg.in/src-d/go-git.v4/storage"
10+
)
11+
12+
type objectWalker struct {
13+
Storer storage.Storer
14+
// seen is the set of objects seen in the repo.
15+
// seen map can become huge if walking over large
16+
// repos. Thus using struct{} as the value type.
17+
seen map[plumbing.Hash]struct{}
18+
}
19+
20+
func newObjectWalker(s storage.Storer) *objectWalker {
21+
return &objectWalker{s, map[plumbing.Hash]struct{}{}}
22+
}
23+
24+
// walkAllRefs walks all (hash) refererences from the repo.
25+
func (p *objectWalker) walkAllRefs() error {
26+
// Walk over all the references in the repo.
27+
it, err := p.Storer.IterReferences()
28+
if err != nil {
29+
return err
30+
}
31+
defer it.Close()
32+
err = it.ForEach(func(ref *plumbing.Reference) error {
33+
// Exit this iteration early for non-hash references.
34+
if ref.Type() != plumbing.HashReference {
35+
return nil
36+
}
37+
return p.walkObjectTree(ref.Hash())
38+
})
39+
if err != nil {
40+
return err
41+
}
42+
return nil
43+
}
44+
45+
func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
46+
_, seen := p.seen[hash]
47+
return seen
48+
}
49+
50+
func (p *objectWalker) add(hash plumbing.Hash) {
51+
p.seen[hash] = struct{}{}
52+
}
53+
54+
// walkObjectTree walks over all objects and remembers references
55+
// to them in the objectWalker. This is used instead of the revlist
56+
// walks because memory usage is tight with huge repos.
57+
func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
58+
// Check if we have already seen, and mark this object
59+
if p.isSeen(hash) {
60+
return nil
61+
}
62+
p.add(hash)
63+
// Fetch the object.
64+
obj, err := object.GetObject(p.Storer, hash)
65+
if err != nil {
66+
return fmt.Errorf("Getting object %s failed: %v", hash, err)
67+
}
68+
// Walk all children depending on object type.
69+
switch obj := obj.(type) {
70+
case *object.Commit:
71+
err = p.walkObjectTree(obj.TreeHash)
72+
if err != nil {
73+
return err
74+
}
75+
for _, h := range obj.ParentHashes {
76+
err = p.walkObjectTree(h)
77+
if err != nil {
78+
return err
79+
}
80+
}
81+
case *object.Tree:
82+
for i := range obj.Entries {
83+
// Shortcut for blob objects:
84+
// 'or' the lower bits of a mode and check that it
85+
// it matches a filemode.Executable. The type information
86+
// is in the higher bits, but this is the cleanest way
87+
// to handle plain files with different modes.
88+
// Other non-tree objects are somewhat rare, so they
89+
// are not special-cased.
90+
if obj.Entries[i].Mode|0755 == filemode.Executable {
91+
p.add(obj.Entries[i].Hash)
92+
continue
93+
}
94+
// Normal walk for sub-trees (and symlinks etc).
95+
err = p.walkObjectTree(obj.Entries[i].Hash)
96+
if err != nil {
97+
return err
98+
}
99+
}
100+
default:
101+
// Error out on unhandled object types.
102+
return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
103+
}
104+
return nil
105+
}

prune.go

Lines changed: 2 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
11
package git
22

33
import (
4-
"fmt"
54
"time"
65

76
"gopkg.in/src-d/go-git.v4/plumbing"
8-
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
9-
"gopkg.in/src-d/go-git.v4/plumbing/object"
10-
"gopkg.in/src-d/go-git.v4/storage"
117
)
128

139
type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
@@ -26,23 +22,8 @@ func (r *Repository) DeleteObject(hash plumbing.Hash) error {
2622
}
2723

2824
func (r *Repository) Prune(opt PruneOptions) error {
29-
pw := &pruneWalker{
30-
Storer: r.Storer,
31-
seen: map[plumbing.Hash]struct{}{},
32-
}
33-
// Walk over all the references in the repo.
34-
it, err := r.Storer.IterReferences()
35-
if err != nil {
36-
return nil
37-
}
38-
defer it.Close()
39-
err = it.ForEach(func(ref *plumbing.Reference) error {
40-
// Exit this iteration early for non-hash references.
41-
if ref.Type() != plumbing.HashReference {
42-
return nil
43-
}
44-
return pw.walkObjectTree(ref.Hash())
45-
})
25+
pw := newObjectWalker(r.Storer)
26+
err := pw.walkAllRefs()
4627
if err != nil {
4728
return err
4829
}
@@ -73,73 +54,3 @@ func (r *Repository) Prune(opt PruneOptions) error {
7354
}
7455
return nil
7556
}
76-
77-
type pruneWalker struct {
78-
Storer storage.Storer
79-
// seen is the set of objects seen in the repo.
80-
// seen map can become huge if walking over large
81-
// repos. Thus using struct{} as the value type.
82-
seen map[plumbing.Hash]struct{}
83-
}
84-
85-
func (p *pruneWalker) isSeen(hash plumbing.Hash) bool {
86-
_, seen := p.seen[hash]
87-
return seen
88-
}
89-
90-
func (p *pruneWalker) add(hash plumbing.Hash) {
91-
p.seen[hash] = struct{}{}
92-
}
93-
94-
// walkObjectTree walks over all objects and remembers references
95-
// to them in the pruneWalker. This is used instead of the revlist
96-
// walks because memory usage is tight with huge repos.
97-
func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error {
98-
// Check if we have already seen, and mark this object
99-
if p.isSeen(hash) {
100-
return nil
101-
}
102-
p.add(hash)
103-
// Fetch the object.
104-
obj, err := object.GetObject(p.Storer, hash)
105-
if err != nil {
106-
return fmt.Errorf("Getting object %s failed: %v", hash, err)
107-
}
108-
// Walk all children depending on object type.
109-
switch obj := obj.(type) {
110-
case *object.Commit:
111-
err = p.walkObjectTree(obj.TreeHash)
112-
if err != nil {
113-
return err
114-
}
115-
for _, h := range obj.ParentHashes {
116-
err = p.walkObjectTree(h)
117-
if err != nil {
118-
return err
119-
}
120-
}
121-
case *object.Tree:
122-
for i := range obj.Entries {
123-
// Shortcut for blob objects:
124-
// 'or' the lower bits of a mode and check that it
125-
// it matches a filemode.Executable. The type information
126-
// is in the higher bits, but this is the cleanest way
127-
// to handle plain files with different modes.
128-
// Other non-tree objects are somewhat rare, so they
129-
// are not special-cased.
130-
if obj.Entries[i].Mode|0755 == filemode.Executable {
131-
p.add(obj.Entries[i].Hash)
132-
continue
133-
}
134-
// Normal walk for sub-trees (and symlinks etc).
135-
err = p.walkObjectTree(obj.Entries[i].Hash)
136-
if err != nil {
137-
return err
138-
}
139-
}
140-
default:
141-
// Error out on unhandled object types.
142-
return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
143-
}
144-
return nil
145-
}

0 commit comments

Comments
 (0)