Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit ca59809

Browse files
authored
Merge pull request #697 from ajnavarro/performance/improve-delta-reusing
plumbing: packafile, improve delta reutilization
2 parents 55b5d73 + c932bd4 commit ca59809

File tree

5 files changed

+152
-29
lines changed

5 files changed

+152
-29
lines changed

plumbing/format/packfile/delta_selector.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,6 @@ func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*O
174174
return dw.undeltify(otp)
175175
}
176176

177-
if base.Size() <= otp.Size() {
178-
// Bases should be bigger
179-
return dw.undeltify(otp)
180-
}
181-
182177
if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
183178
return err
184179
}

plumbing/format/packfile/encoder.go

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@ type Encoder struct {
1818
w *offsetWriter
1919
zw *zlib.Writer
2020
hasher plumbing.Hasher
21-
// offsets is a map of object hashes to corresponding offsets in the packfile.
22-
// It is used to determine offset of the base of a delta when a OFS_DELTA is
23-
// used.
24-
offsets map[plumbing.Hash]int64
21+
2522
useRefDeltas bool
2623
}
2724

@@ -40,7 +37,6 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E
4037
w: ow,
4138
zw: zw,
4239
hasher: h,
43-
offsets: make(map[plumbing.Hash]int64),
4440
useRefDeltas: useRefDeltas,
4541
}
4642
}
@@ -85,11 +81,34 @@ func (e *Encoder) head(numEntries int) error {
8581
}
8682

8783
func (e *Encoder) entry(o *ObjectToPack) error {
88-
offset := e.w.Offset()
89-
e.offsets[o.Hash()] = offset
84+
if o.WantWrite() {
85+
// A cycle exists in this delta chain. This should only occur if a
86+
// selected object representation disappeared during writing
87+
// (for example due to a concurrent repack) and a different base
88+
// was chosen, forcing a cycle. Select something other than a
89+
// delta, and write this object.
90+
o.BackToOriginal()
91+
}
92+
93+
if o.IsWritten() {
94+
return nil
95+
}
96+
97+
o.MarkWantWrite()
98+
99+
if err := e.writeBaseIfDelta(o); err != nil {
100+
return err
101+
}
102+
103+
// We need to check if we already write that object due a cyclic delta chain
104+
if o.IsWritten() {
105+
return nil
106+
}
107+
108+
o.Offset = e.w.Offset()
90109

91110
if o.IsDelta() {
92-
if err := e.writeDeltaHeader(o, offset); err != nil {
111+
if err := e.writeDeltaHeader(o); err != nil {
93112
return err
94113
}
95114
} else {
@@ -112,7 +131,16 @@ func (e *Encoder) entry(o *ObjectToPack) error {
112131
return e.zw.Close()
113132
}
114133

115-
func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
134+
func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error {
135+
if o.IsDelta() && !o.Base.IsWritten() {
136+
// We must write base first
137+
return e.entry(o.Base)
138+
}
139+
140+
return nil
141+
}
142+
143+
func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error {
116144
// Write offset deltas by default
117145
t := plumbing.OFSDeltaObject
118146
if e.useRefDeltas {
@@ -126,23 +154,18 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
126154
if e.useRefDeltas {
127155
return e.writeRefDeltaHeader(o.Base.Hash())
128156
} else {
129-
return e.writeOfsDeltaHeader(offset, o.Base.Hash())
157+
return e.writeOfsDeltaHeader(o)
130158
}
131159
}
132160

133161
func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
134162
return binary.Write(e.w, base)
135163
}
136164

137-
func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
138-
baseOffset, ok := e.offsets[base]
139-
if !ok {
140-
return fmt.Errorf("base for delta not found, base hash: %v", base)
141-
}
142-
165+
func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error {
143166
// for OFS_DELTA, offset of the base is interpreted as negative offset
144167
// relative to the type-byte of the header of the ofs-delta entry.
145-
relativeOffset := deltaOffset - baseOffset
168+
relativeOffset := o.Offset - o.Base.Offset
146169
if relativeOffset <= 0 {
147170
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
148171
}

plumbing/format/packfile/encoder_advanced_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,18 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac
6868

6969
buf := bytes.NewBuffer(nil)
7070
enc := NewEncoder(buf, storage, false)
71-
_, err = enc.Encode(hashes, packWindow)
71+
encodeHash, err := enc.Encode(hashes, packWindow)
7272
c.Assert(err, IsNil)
7373

7474
scanner := NewScanner(buf)
7575
storage = memory.NewStorage()
7676
d, err := NewDecoder(scanner, storage)
7777
c.Assert(err, IsNil)
78-
_, err = d.Decode()
78+
decodeHash, err := d.Decode()
7979
c.Assert(err, IsNil)
8080

81+
c.Assert(encodeHash, Equals, decodeHash)
82+
8183
objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
8284
c.Assert(err, IsNil)
8385
obtainedObjects := map[plumbing.Hash]bool{}

plumbing/format/packfile/encoder_test.go

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) {
106106
s.deltaOverDeltaTest(c)
107107
}
108108

109+
func (s *EncoderSuite) TestDecodeEncodeWithCycleREF(c *C) {
110+
s.enc = NewEncoder(s.buf, s.store, true)
111+
s.deltaOverDeltaCyclicTest(c)
112+
}
113+
114+
func (s *EncoderSuite) TestDecodeEncodeWithCycleOFS(c *C) {
115+
s.enc = NewEncoder(s.buf, s.store, false)
116+
s.deltaOverDeltaCyclicTest(c)
117+
}
118+
109119
func (s *EncoderSuite) simpleDeltaTest(c *C) {
110120
srcObject := newObject(plumbing.BlobObject, []byte("0"))
111121
targetObject := newObject(plumbing.BlobObject, []byte("01"))
@@ -114,7 +124,7 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) {
114124
c.Assert(err, IsNil)
115125

116126
srcToPack := newObjectToPack(srcObject)
117-
_, err = s.enc.encode([]*ObjectToPack{
127+
encHash, err := s.enc.encode([]*ObjectToPack{
118128
srcToPack,
119129
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
120130
})
@@ -126,9 +136,11 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) {
126136
d, err := NewDecoder(scanner, storage)
127137
c.Assert(err, IsNil)
128138

129-
_, err = d.Decode()
139+
decHash, err := d.Decode()
130140
c.Assert(err, IsNil)
131141

142+
c.Assert(encHash, Equals, decHash)
143+
132144
decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash())
133145
c.Assert(err, IsNil)
134146
c.Assert(decSrc, DeepEquals, srcObject)
@@ -153,7 +165,8 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {
153165

154166
srcToPack := newObjectToPack(srcObject)
155167
targetToPack := newObjectToPack(targetObject)
156-
_, err = s.enc.encode([]*ObjectToPack{
168+
encHash, err := s.enc.encode([]*ObjectToPack{
169+
targetToPack,
157170
srcToPack,
158171
newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
159172
newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject),
@@ -165,9 +178,11 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {
165178
d, err := NewDecoder(scanner, storage)
166179
c.Assert(err, IsNil)
167180

168-
_, err = d.Decode()
181+
decHash, err := d.Decode()
169182
c.Assert(err, IsNil)
170183

184+
c.Assert(encHash, Equals, decHash)
185+
171186
decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash())
172187
c.Assert(err, IsNil)
173188
c.Assert(decSrc, DeepEquals, srcObject)
@@ -180,3 +195,61 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) {
180195
c.Assert(err, IsNil)
181196
c.Assert(decOtherTarget, DeepEquals, otherTargetObject)
182197
}
198+
199+
func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) {
200+
o1 := newObject(plumbing.BlobObject, []byte("0"))
201+
o2 := newObject(plumbing.BlobObject, []byte("01"))
202+
o3 := newObject(plumbing.BlobObject, []byte("011111"))
203+
o4 := newObject(plumbing.BlobObject, []byte("01111100000"))
204+
205+
d2, err := GetDelta(o1, o2)
206+
c.Assert(err, IsNil)
207+
208+
d3, err := GetDelta(o4, o3)
209+
c.Assert(err, IsNil)
210+
211+
d4, err := GetDelta(o3, o4)
212+
c.Assert(err, IsNil)
213+
214+
po1 := newObjectToPack(o1)
215+
pd2 := newDeltaObjectToPack(po1, o2, d2)
216+
pd3 := newObjectToPack(o3)
217+
pd4 := newObjectToPack(o4)
218+
219+
pd3.SetDelta(pd4, d3)
220+
pd4.SetDelta(pd3, d4)
221+
222+
encHash, err := s.enc.encode([]*ObjectToPack{
223+
po1,
224+
pd2,
225+
pd3,
226+
pd4,
227+
})
228+
c.Assert(err, IsNil)
229+
230+
scanner := NewScanner(s.buf)
231+
storage := memory.NewStorage()
232+
d, err := NewDecoder(scanner, storage)
233+
c.Assert(err, IsNil)
234+
235+
decHash, err := d.Decode()
236+
c.Assert(err, IsNil)
237+
238+
c.Assert(encHash, Equals, decHash)
239+
240+
decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash())
241+
c.Assert(err, IsNil)
242+
c.Assert(decSrc, DeepEquals, o1)
243+
244+
decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash())
245+
c.Assert(err, IsNil)
246+
c.Assert(decTarget, DeepEquals, o2)
247+
248+
decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash())
249+
c.Assert(err, IsNil)
250+
c.Assert(decOtherTarget, DeepEquals, o3)
251+
252+
decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash())
253+
c.Assert(err, IsNil)
254+
c.Assert(decAnotherTarget, DeepEquals, o4)
255+
}

plumbing/format/packfile/object_pack.go

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@ type ObjectToPack struct {
1313
// If the main object is not a delta, Base will be null
1414
Base *ObjectToPack
1515
// Original is the object that we can generate applying the delta to
16-
// Base, or the same object as EncodedObject in the case of a non-delta
16+
// Base, or the same object as Object in the case of a non-delta
1717
// object.
1818
Original plumbing.EncodedObject
1919
// Depth is the amount of deltas needed to resolve to obtain Original
2020
// (delta based on delta based on ...)
2121
Depth int
22+
23+
// offset in pack when object has been already written, or 0 if it
24+
// has not been written yet
25+
Offset int64
2226
}
2327

2428
// newObjectToPack creates a correct ObjectToPack based on a non-delta object
@@ -41,6 +45,32 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb
4145
}
4246
}
4347

48+
// BackToOriginal converts that ObjectToPack to a non-deltified object if it was one
49+
func (o *ObjectToPack) BackToOriginal() {
50+
if o.IsDelta() {
51+
o.Object = o.Original
52+
o.Base = nil
53+
o.Depth = 0
54+
}
55+
}
56+
57+
// IsWritten returns if that ObjectToPack was
58+
// already written into the packfile or not
59+
func (o *ObjectToPack) IsWritten() bool {
60+
return o.Offset > 1
61+
}
62+
63+
// MarkWantWrite marks this ObjectToPack as WantWrite
64+
// to avoid delta chain loops
65+
func (o *ObjectToPack) MarkWantWrite() {
66+
o.Offset = 1
67+
}
68+
69+
// WantWrite checks if this ObjectToPack was marked as WantWrite before
70+
func (o *ObjectToPack) WantWrite() bool {
71+
return o.Offset == 1
72+
}
73+
4474
func (o *ObjectToPack) Type() plumbing.ObjectType {
4575
if o.Original != nil {
4676
return o.Original.Type()

0 commit comments

Comments
 (0)