Skip to content

Commit bfcd218

Browse files
bk2204gitster
authored andcommitted
rust: add functionality to hash an object
In a future commit, we'll want to hash some data when dealing with a loose object map. Let's make this easy by creating a structure to hash objects and calling into the C functions as necessary to perform the hashing. For now, we only implement safe hashing, but in the future we could add unsafe hashing if we want. Implement Clone and Drop to appropriately manage our memory. Additionally implement Write to make it easy to use with other formats that implement this trait. While we're at it, add some tests for the various cases in this file. Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 970e48c commit bfcd218

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed

src/hash.rs

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
// You should have received a copy of the GNU General Public License along
1111
// with this program; if not, see <https://www.gnu.org/licenses/>.
1212

13+
use std::io::{self, Write};
1314
use std::os::raw::c_void;
1415

1516
pub const GIT_MAX_RAWSZ: usize = 32;
@@ -39,6 +40,81 @@ impl ObjectID {
3940
}
4041
}
4142

43+
pub struct Hasher {
44+
algo: HashAlgorithm,
45+
safe: bool,
46+
ctx: *mut c_void,
47+
}
48+
49+
impl Hasher {
50+
/// Create a new safe hasher.
51+
pub fn new(algo: HashAlgorithm) -> Hasher {
52+
let ctx = unsafe { c::git_hash_alloc() };
53+
unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
54+
Hasher {
55+
algo,
56+
safe: true,
57+
ctx,
58+
}
59+
}
60+
61+
/// Return whether this is a safe hasher.
62+
pub fn is_safe(&self) -> bool {
63+
self.safe
64+
}
65+
66+
/// Update the hasher with the specified data.
67+
pub fn update(&mut self, data: &[u8]) {
68+
unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
69+
}
70+
71+
/// Return an object ID, consuming the hasher.
72+
pub fn into_oid(self) -> ObjectID {
73+
let mut oid = ObjectID {
74+
hash: [0u8; 32],
75+
algo: self.algo as u32,
76+
};
77+
unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
78+
oid
79+
}
80+
81+
/// Return a hash as a `Vec`, consuming the hasher.
82+
pub fn into_vec(self) -> Vec<u8> {
83+
let mut v = vec![0u8; self.algo.raw_len()];
84+
unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
85+
v
86+
}
87+
}
88+
89+
impl Write for Hasher {
90+
fn write(&mut self, data: &[u8]) -> io::Result<usize> {
91+
self.update(data);
92+
Ok(data.len())
93+
}
94+
95+
fn flush(&mut self) -> io::Result<()> {
96+
Ok(())
97+
}
98+
}
99+
100+
impl Clone for Hasher {
101+
fn clone(&self) -> Hasher {
102+
let ctx = unsafe { c::git_hash_alloc() };
103+
unsafe { c::git_hash_clone(ctx, self.ctx) };
104+
Hasher {
105+
algo: self.algo,
106+
safe: self.safe,
107+
ctx,
108+
}
109+
}
110+
}
111+
112+
impl Drop for Hasher {
113+
fn drop(&mut self) {
114+
unsafe { c::git_hash_free(self.ctx) };
115+
}
116+
}
117+
42118
/// A hash algorithm,
43119
#[repr(C)]
44120
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
@@ -167,12 +243,93 @@ impl HashAlgorithm {
167243
pub fn hash_algo_ptr(self) -> *const c_void {
168244
unsafe { c::hash_algo_ptr_by_offset(self as u32) }
169245
}
246+
247+
/// Create a hasher for this algorithm.
248+
pub fn hasher(self) -> Hasher {
249+
Hasher::new(self)
250+
}
170251
}
171252

172253
pub mod c {
173254
use std::os::raw::c_void;
174255

175256
extern "C" {
176257
pub fn hash_algo_ptr_by_offset(n: u32) -> *const c_void;
258+
pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
259+
pub fn git_hash_alloc() -> *mut c_void;
260+
pub fn git_hash_free(ctx: *mut c_void);
261+
pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
262+
pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
263+
pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
264+
pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
265+
pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
266+
}
267+
}
268+
269+
#[cfg(test)]
270+
mod tests {
271+
use super::{HashAlgorithm, ObjectID};
272+
use std::io::Write;
273+
274+
fn all_algos() -> &'static [HashAlgorithm] {
275+
&[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
276+
}
277+
278+
#[test]
279+
fn format_id_round_trips() {
280+
for algo in all_algos() {
281+
assert_eq!(
282+
*algo,
283+
HashAlgorithm::from_format_id(algo.format_id()).unwrap()
284+
);
285+
}
286+
}
287+
288+
#[test]
289+
fn offset_round_trips() {
290+
for algo in all_algos() {
291+
assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap());
292+
}
293+
}
294+
295+
#[test]
296+
fn slices_have_correct_length() {
297+
for algo in all_algos() {
298+
for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] {
299+
assert_eq!(oid.as_slice().len(), algo.raw_len());
300+
}
301+
}
302+
}
303+
304+
#[test]
305+
fn hasher_works_correctly() {
306+
for algo in all_algos() {
307+
let tests: &[(&[u8], &ObjectID)] = &[
308+
(b"blob 0\0", algo.empty_blob()),
309+
(b"tree 0\0", algo.empty_tree()),
310+
];
311+
for (data, oid) in tests {
312+
let mut h = algo.hasher();
313+
assert_eq!(h.is_safe(), true);
314+
// Test that this works incrementally.
315+
h.update(&data[0..2]);
316+
h.update(&data[2..]);
317+
318+
let h2 = h.clone();
319+
320+
let actual_oid = h.into_oid();
321+
assert_eq!(**oid, actual_oid);
322+
323+
let v = h2.into_vec();
324+
assert_eq!((*oid).as_slice(), &v);
325+
326+
let mut h = algo.hasher();
327+
h.write_all(&data[0..2]).unwrap();
328+
h.write_all(&data[2..]).unwrap();
329+
330+
let actual_oid = h.into_oid();
331+
assert_eq!(**oid, actual_oid);
332+
}
333+
}
177334
}
178335
}

0 commit comments

Comments
 (0)