Skip to content

Commit 1234336

Browse files
uqstnyblom
authored andcommitted
Make conversion runs deterministic by sorting what's returned via hash from the SVN API.
An SVN commit spanning multiple paths that shall be turned into multiple branches in git would result in the git commits to be fed into git fast-import in arbitrary order. This is problematic for merge commits, as it means the list of parent commits for the merge commit will have an arbitrary order. Fix this by always handling the paths in an SVN commit in order. This makes svn2git conversion runs reproducible. Unfortunately, commits where paths have been removed and added again, might no longer be handled correctly. I haven't found such a case in the FreeBSD repository however. This is IHMO a bug in git, but it all hinges on semantics like list of parents vs. set of parents and how you define a hash on a set of objects that have no natural order.
1 parent 8314eb2 commit 1234336

File tree

1 file changed

+51
-23
lines changed

1 file changed

+51
-23
lines changed

src/svn.cpp

Lines changed: 51 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -338,21 +338,29 @@ static int recursiveDumpDir(Repository::Transaction *txn, svn_fs_root_t *fs_root
338338
SVN_ERR(svn_fs_dir_entries(&entries, fs_root, pathname, pool));
339339
AprAutoPool dirpool(pool);
340340

341+
// While we get a hash, put it in a map for sorted lookup, so we can
342+
// repeat the conversions and get the same git commit hashes.
343+
QMap<QByteArray, svn_node_kind_t> map;
341344
for (apr_hash_index_t *i = apr_hash_first(pool, entries); i; i = apr_hash_next(i)) {
342-
dirpool.clear();
343345
const void *vkey;
344346
void *value;
345347
apr_hash_this(i, &vkey, NULL, &value);
346-
347348
svn_fs_dirent_t *dirent = reinterpret_cast<svn_fs_dirent_t *>(value);
348-
QByteArray entryName = pathname + '/' + dirent->name;
349-
QString entryFinalName = finalPathName + QString::fromUtf8(dirent->name);
349+
map.insertMulti(QByteArray(dirent->name), dirent->kind);
350+
}
351+
352+
QMapIterator<QByteArray, svn_node_kind_t> i(map);
353+
while (i.hasNext()) {
354+
dirpool.clear();
355+
i.next();
356+
QByteArray entryName = pathname + '/' + i.key();
357+
QString entryFinalName = finalPathName + QString::fromUtf8(i.key());
350358

351-
if (dirent->kind == svn_node_dir) {
359+
if (i.value() == svn_node_dir) {
352360
entryFinalName += '/';
353361
if (recursiveDumpDir(txn, fs_root, entryName, entryFinalName, dirpool) == EXIT_FAILURE)
354362
return EXIT_FAILURE;
355-
} else if (dirent->kind == svn_node_file) {
363+
} else if (i.value() == svn_node_file) {
356364
printf("+");
357365
fflush(stdout);
358366
if (dumpBlob(txn, fs_root, entryName, entryFinalName, dirpool) == EXIT_FAILURE)
@@ -469,31 +477,41 @@ int SvnPrivate::exportRevision(int revnum)
469477

470478
int SvnRevision::prepareTransactions()
471479
{
472-
QLinkedList< QPair<svn_fs_path_change_t*, const char*> > sortedChanges;
473480
// find out what was changed in this revision:
474481
apr_hash_t *changes;
475482
SVN_ERR(svn_fs_paths_changed(&changes, fs_root, pool));
483+
484+
QMap<QByteArray, svn_fs_path_change_t*> map;
476485
for (apr_hash_index_t *i = apr_hash_first(pool, changes); i; i = apr_hash_next(i)) {
477486
const void *vkey;
478487
void *value;
479488
apr_hash_this(i, &vkey, NULL, &value);
480489
const char *key = reinterpret_cast<const char *>(vkey);
481490
svn_fs_path_change_t *change = reinterpret_cast<svn_fs_path_change_t *>(value);
482-
483-
// If we mix path deletions with path adds/replaces we might erase a branch after that it has been reset -> history truncated
484-
if(change->change_kind == svn_fs_path_change_delete) {
485-
sortedChanges.prepend( qMakePair(change, key) );
486-
} else {
487-
sortedChanges.append( qMakePair(change, key) );
491+
// If we mix path deletions with path adds/replaces we might erase a
492+
// branch after that it has been reset -> history truncated
493+
if (map.contains(QByteArray(key))) {
494+
// If the same path is deleted and added, we need to put the
495+
// deletions into the map first, then the addition.
496+
if (change->change_kind == svn_fs_path_change_delete) {
497+
// XXX
498+
}
499+
fprintf(stderr, "\nDuplicate key found in rev %d: %s\n", revnum, key);
500+
fprintf(stderr, "This needs more code to be handled, file a bug report\n");
501+
fflush(stderr);
502+
exit(1);
488503
}
504+
map.insertMulti(QByteArray(key), change);
489505
}
490506

491-
QPair<svn_fs_path_change_t*, const char*> pair;
492-
foreach (pair, sortedChanges) {
493-
if (exportEntry(pair.second, pair.first, changes) == EXIT_FAILURE)
507+
QMapIterator<QByteArray, svn_fs_path_change_t*> i(map);
508+
while (i.hasNext()) {
509+
i.next();
510+
if (exportEntry(i.key(), i.value(), changes) == EXIT_FAILURE)
494511
return EXIT_FAILURE;
495512
}
496-
return EXIT_SUCCESS;
513+
514+
return EXIT_SUCCESS;
497515
}
498516

499517
int SvnRevision::fetchRevProps()
@@ -823,7 +841,6 @@ int SvnRevision::recurse(const char *path, const svn_fs_path_change_t *change,
823841
SVN_ERR(svn_fs_revision_root(&fs_root, fs, revnum - 1, pool));
824842

825843
// get the dir listing
826-
apr_hash_t *entries;
827844
svn_node_kind_t kind;
828845
SVN_ERR(svn_fs_check_path(&kind, fs_root, path, pool));
829846
if(kind == svn_node_none) {
@@ -834,21 +851,32 @@ int SvnRevision::recurse(const char *path, const svn_fs_path_change_t *change,
834851
return EXIT_SUCCESS;
835852
}
836853

854+
apr_hash_t *entries;
837855
SVN_ERR(svn_fs_dir_entries(&entries, fs_root, path, pool));
838-
839856
AprAutoPool dirpool(pool);
857+
858+
// While we get a hash, put it in a map for sorted lookup, so we can
859+
// repeat the conversions and get the same git commit hashes.
860+
QMap<QByteArray, svn_node_kind_t> map;
840861
for (apr_hash_index_t *i = apr_hash_first(pool, entries); i; i = apr_hash_next(i)) {
841862
dirpool.clear();
842863
const void *vkey;
843864
void *value;
844865
apr_hash_this(i, &vkey, NULL, &value);
845-
846866
svn_fs_dirent_t *dirent = reinterpret_cast<svn_fs_dirent_t *>(value);
867+
if (dirent->kind != svn_node_dir)
868+
continue; // not a directory, so can't recurse; skip
869+
map.insertMulti(QByteArray(dirent->name), dirent->kind);
870+
}
847871

848-
QByteArray entry = path + QByteArray("/") + dirent->name;
872+
QMapIterator<QByteArray, svn_node_kind_t> i(map);
873+
while (i.hasNext()) {
874+
dirpool.clear();
875+
i.next();
876+
QByteArray entry = path + QByteArray("/") + i.key();
849877
QByteArray entryFrom;
850878
if (path_from)
851-
entryFrom = path_from + QByteArray("/") + dirent->name;
879+
entryFrom = path_from + QByteArray("/") + i.key();
852880

853881
// check if this entry is in the changelist for this revision already
854882
svn_fs_path_change_t *otherchange =
@@ -860,7 +888,7 @@ int SvnRevision::recurse(const char *path, const svn_fs_path_change_t *change,
860888
}
861889

862890
QString current = QString::fromUtf8(entry);
863-
if (dirent->kind == svn_node_dir)
891+
if (i.value() == svn_node_dir)
864892
current += '/';
865893

866894
// find the first rule that matches this pathname

0 commit comments

Comments
 (0)