From f32ce3e59acfd963651dc0e82a9c91b46f0dbc52 Mon Sep 17 00:00:00 2001 From: FUTATSUKI Yasuhito Date: Thu, 16 Dec 2021 17:58:44 +0900 Subject: [PATCH 1/5] Add test for GitHub issue 18 * run-test.py (internal_co_broken_rcsfile): New test. (test_list): Add it. --- run-tests.py | 45 ++++++++-- .../irregular.txt,v | 88 +++++++++++++++++++ 2 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v diff --git a/run-tests.py b/run-tests.py index 7eaee085..dd7594d8 100755 --- a/run-tests.py +++ b/run-tests.py @@ -3345,6 +3345,40 @@ def internal_co_keywords(): raise Failure() +@Cvs2SvnTestFunction +def internal_co_broken_rcsfile(): + "check if internal co can handle broken delta" + + # This is a test for GitHub issue 18. + + # Check if internal co can handle broken delta which inserts unterminated + # line other than after last line. Although it does not seem such a + # "broken" RCS file could be generated by normal RCS/CVS operation, + # RCS/CVS can handle it. + + ### TODO: Once an unterminated line is inserted in a middle of lines + ### in line buffer, there can be more critical situations than one + ### that the irregular.txt,v has. So we need to add such test data. + + rcs_conv = ensure_conversion( + 'gh-issue-18-irregular-rcsfile', + args=['--use-rcs', '--default-eol=native'], + dumpfile='broken-rcsfile-rcs.dump', + ) + conv = ensure_conversion( + 'gh-issue-18-irregular-rcsfile', + args=['--default-eol=native'], + dumpfile='broken-rcsfile-int.dump', + ) + if conv.output_found(r'WARNING\: internal problem\: leftover revisions'): + raise Failure() + rcs_lines = list(open(rcs_conv.dumpfile, 'rb')) + lines = list(open(conv.dumpfile, 'rb')) + # Compare all lines following the repository UUID: + if lines[3:] != rcs_lines[3:]: + raise Failure() + + @Cvs2SvnTestFunction def timestamp_chaos(): "test timestamp adjustments" @@ -4237,10 +4271,11 @@ def vendor_1_1_not_root(): internal_co_exclude, internal_co_trunk_only, internal_co_keywords, + internal_co_broken_rcsfile, leftover_revs, requires_internal_co, - timestamp_chaos, # 140: + timestamp_chaos, symlinks, empty_trunk_path, preferred_parent_cycle, @@ -4250,8 +4285,8 @@ def vendor_1_1_not_root(): add_on_branch, main_git, main_git2, - main_git_merged, # 150: + main_git_merged, main_git2_merged, git_options, main_hg, @@ -4261,8 +4296,8 @@ def vendor_1_1_not_root(): EOLVariants('LF'), EOLVariants('CR'), EOLVariants('CRLF'), - EOLVariants('native'), # 160: + EOLVariants('native'), no_revs_file, mirror_keyerror_test, exclude_ntdb_test, @@ -4272,8 +4307,8 @@ def vendor_1_1_not_root(): missing_deltatext, transform_unlabeled_branch_name, ignore_unlabeled_branch, - exclude_unlabeled_branch, # 170: + exclude_unlabeled_branch, unlabeled_branch_name_collision, collision_with_unlabeled_branch_name, many_deletes, @@ -4283,8 +4318,8 @@ def vendor_1_1_not_root(): exclude_symbol_default, add_on_branch2, branch_from_vendor_branch, - strange_default_branch, # 180: + strange_default_branch, move_parent, log_message_eols, missing_vendor_branch, diff --git a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v new file mode 100644 index 00000000..6dd03304 --- /dev/null +++ b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v @@ -0,0 +1,88 @@ +head 1.5; +access; +symbols; +locks; strict; +comment @# @; + + +1.5 +date 2021.11.26.04.16.45; author futatuki; state Exp; +branches; +next 1.4; + +1.4 +date 2021.11.26.04.14.23; author futatuki; state Exp; +branches; +next 1.3; + +1.3 +date 2021.11.26.03.46.06; author futatuki; state Exp; +branches; +next 1.2; + +1.2 +date 2021.11.26.03.41.39; author futatuki; state Exp; +branches; +next 1.1; + +1.1 +date 2021.11.26.03.34.28; author futatuki; state Exp; +branches; +next ; + + +desc +@create a new file irregular.txt +@ + + +1.5 +log +@r1.5: +@ +text +@aaa +bbb +ccc +hhh@ + + +1.4 +log +@r1.4: +@ +text +@d4 1 +a4 1 +ggg@ + + +1.3 +log +@r1.3: s/eee/fff/ +@ +text +@d3 1 +a3 1 +fff@ + + +1.2 +log +@r1.2: s/ddd/eee/ +@ +text +@d3 1 +a3 1 +eee@ + + +1.1 +log +@r1.1 +@ +text +@d3 1 +a3 1 +ddd@ + From ae478afd1fa83f86c25924fc21e7663580f6d329 Mon Sep 17 00:00:00 2001 From: FUTATSUKI Yasuhito Date: Mon, 20 Dec 2021 15:05:55 +0900 Subject: [PATCH 2/5] issue #18: Use internal lines instead of plain text in checkout_db. As the return value of RCSStream.invert_diff() is not to be applied for flat text content but for internal logical lines in RCSStream, which may contain some unterminated logical lines at any position, so we should use the content of internal logical lines in RCSStream as base text which is used to get the content of the next newer revision. With this commit, it is implemented by splitting the checkout() method in TextRecord, checkout() method for external use for as in the past and checkout_as_lines() method for internal use. * cvs2svn_lib/checkout_internal.py (TextRecord.checout_as_lines): New method. Replacement of checkout() method but returns internal lines in RCSStream instead of a plain text. (TextRecord.checkout): Use checkout_as_lines() for default implementation. (FullTextRecord.checkout, DeltaTextRecord.checkout): Removed to use default implementation. (FullTextRecord.checkout_as_lines): New method. Just same logic as the past checkout() method. (DeltaTextRecord.checkout_as_lines): New method. Just same logic as the past checkout() method but uses internal lines in rcs_stream instead of its text. (_Sink.set_revision_info): Record the internal lines in rcs_stream instead of its text at revision 1.1. * cvs2svn_lib/rcs_stream.py (RCSStream.__init__): Allow to set lines directly in addition to a text. (RCSStream.get_lines): New method. --- cvs2svn_lib/checkout_internal.py | 41 +++++++++++++++++++------------- cvs2svn_lib/rcs_stream.py | 10 +++++++- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py index 8fdc132c..166eda21 100644 --- a/cvs2svn_lib/checkout_internal.py +++ b/cvs2svn_lib/checkout_internal.py @@ -134,15 +134,22 @@ def decrement_refcount(self, text_record_db): if self.refcount == 0: text_record_db.discard(self.id) - def checkout(self, text_record_db): + def checkout_as_lines(self, text_record_db): """Workhorse of the checkout process. - Return the text for this revision, decrement our reference count, - and update the databases depending on whether there will be future - checkouts.""" + Return the text for this revision as list of logical lines, + decrement our reference count, and update the databases depending + on whether there will be future checkouts.""" raise NotImplementedError() + def checkout(self, text_record_db): + """Return the text for this revision. + + Just same as checkout_as_lines() but returns text as flat text string.""" + + return "".join(self.checkout_as_lines(text_record_db)) + def free(self, text_record_db): """This instance will never again be checked out; free it. @@ -168,10 +175,10 @@ def __getstate__(self): def __setstate__(self, state): (self.id, self.refcount,) = state - def checkout(self, text_record_db): - text = text_record_db.delta_db[self.id] + def checkout_as_lines(self, text_record_db): + lines = text_record_db.delta_db[self.id] self.decrement_refcount(text_record_db) - return text + return lines def free(self, text_record_db): del text_record_db.delta_db[self.id] @@ -205,12 +212,12 @@ def __setstate__(self, state): def increment_dependency_refcounts(self, text_record_db): text_record_db[self.pred_id].refcount += 1 - def checkout(self, text_record_db): - base_text = text_record_db[self.pred_id].checkout(text_record_db) - rcs_stream = RCSStream(base_text) + def checkout_as_lines(self, text_record_db): + base_lines = text_record_db[self.pred_id].checkout_as_lines(text_record_db) + rcs_stream = RCSStream(base_lines) delta_text = text_record_db.delta_db[self.id] rcs_stream.apply_diff(delta_text) - text = rcs_stream.get_text() + lines = rcs_stream.get_lines() del rcs_stream self.refcount -= 1 if self.refcount == 0: @@ -220,11 +227,11 @@ def checkout(self, text_record_db): del text_record_db[self.id] else: # Store a new CheckedOutTextRecord in place of ourselves: - text_record_db.checkout_db['%x' % self.id] = text + text_record_db.checkout_db['%x' % self.id] = lines new_text_record = CheckedOutTextRecord(self.id) new_text_record.refcount = self.refcount text_record_db.replace(new_text_record) - return text + return lines def free(self, text_record_db): del text_record_db.delta_db[self.id] @@ -251,10 +258,10 @@ def __getstate__(self): def __setstate__(self, state): (self.id, self.refcount,) = state - def checkout(self, text_record_db): - text = text_record_db.checkout_db['%x' % self.id] + def checkout_as_lines(self, text_record_db): + lines = text_record_db.checkout_db['%x' % self.id] self.decrement_refcount(text_record_db) - return text + return lines def free(self, text_record_db): del text_record_db.checkout_db['%x' % self.id] @@ -533,7 +540,7 @@ def set_revision_info(self, revision, log, text): # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout( - text_record, self._rcs_stream.get_text() + text_record, self._rcs_stream.get_lines() ) # There will be no more trunk revisions delivered, so free the diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py index 0963956d..c4ad79a0 100644 --- a/cvs2svn_lib/rcs_stream.py +++ b/cvs2svn_lib/rcs_stream.py @@ -174,13 +174,21 @@ class RCSStream: def __init__(self, text): """Instantiate and initialize the file content with TEXT.""" - self.set_text(text) + if isinstance(text, bytes): + self.set_text(text) + else: + self.set_lines(text) def get_text(self): """Return the current file content.""" return "".join(self._lines) + def get_lines(self): + """Return the current file content as list of logical lines.""" + + return self._lines + def set_lines(self, lines): """Set the current contents to the specified LINES. From eb5aab78abad7d0887c2caace89a8b6acf5abbc3 Mon Sep 17 00:00:00 2001 From: FUTATSUKI Yasuhito Date: Tue, 21 Dec 2021 21:01:51 +0900 Subject: [PATCH 3/5] Fix comments in cvs2svn_lib/checkout_internal.py. No functional change. --- cvs2svn_lib/checkout_internal.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py index 166eda21..6ce43761 100644 --- a/cvs2svn_lib/checkout_internal.py +++ b/cvs2svn_lib/checkout_internal.py @@ -102,7 +102,10 @@ class TextRecord(object): - """Bookkeeping data for the text of a single CVSRevision.""" + """Bookkeeping data for the text of a single CVSRevision. + + The text is stored as the list of logical lines manipulated + by RCSStream.""" __slots__ = ['id', 'refcount'] @@ -137,7 +140,7 @@ def decrement_refcount(self, text_record_db): def checkout_as_lines(self, text_record_db): """Workhorse of the checkout process. - Return the text for this revision as list of logical lines, + Return the text for this revision as a list of logical lines, decrement our reference count, and update the databases depending on whether there will be future checkouts.""" @@ -146,7 +149,8 @@ def checkout_as_lines(self, text_record_db): def checkout(self, text_record_db): """Return the text for this revision. - Just same as checkout_as_lines() but returns text as flat text string.""" + Just as checkout_as_lines(), but returns the text as a flat text + string.""" return "".join(self.checkout_as_lines(text_record_db)) @@ -165,7 +169,7 @@ class FullTextRecord(TextRecord): These records are used for revisions whose fulltext was determined by the InternalRevisionCollector during FilterSymbolsPass. The fulltext for such a revision is is stored in the delta_db as a - single string.""" + list of logical lines manipulated by RCSStream.""" __slots__ = [] @@ -221,7 +225,7 @@ def checkout_as_lines(self, text_record_db): del rcs_stream self.refcount -= 1 if self.refcount == 0: - # This text will never be needed again; just delete ourselves + # This lines will never be needed again; just delete ourselves # without ever having stored the fulltext to the checkout # database: del text_record_db[self.id] @@ -248,7 +252,8 @@ class CheckedOutTextRecord(TextRecord): These records are used for revisions whose fulltext has been computed already during OutputPass. The fulltext for such a - revision is stored in the text_record_db as a single string.""" + revision is stored in the text_record_db as a list of logical + lines manipulated by RCSStream.""" __slots__ = [] From 5d1a8dfcb7d732dc0b9b7a60008dc1c4a02c85a0 Mon Sep 17 00:00:00 2001 From: FUTATSUKI Yasuhito Date: Tue, 21 Dec 2021 21:05:08 +0900 Subject: [PATCH 4/5] test-data: improve the test data for test internal_co_broken_rcsfile. * test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v - Insert a broken delta in r1.5 - Add a branch revision r1.2.1.1, branched from corrupted revision r1.2. --- .../irregular.txt,v | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v index 6dd03304..c668e2eb 100644 --- a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v +++ b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v @@ -22,7 +22,8 @@ next 1.2; 1.2 date 2021.11.26.03.41.39; author futatuki; state Exp; -branches; +branches + 1.2.1.1; next 1.1; 1.1 @@ -30,6 +31,11 @@ date 2021.11.26.03.34.28; author futatuki; state Exp; branches; next ; +1.2.1.1 +date 2021.12.21.11.44.31; author futatuki; state Exp; +branches; +next ; + desc @create a new file irregular.txt @@ -77,6 +83,18 @@ a3 1 eee@ +1.2.1.1 +log +@rev:1.2.1.1 +@ +text +@d3 1 +a3 2 +ccc +eeeggg +@ + + 1.1 log @r1.1 From 86c896cea2e2aa8f3ea8c646b6180912780a2e93 Mon Sep 17 00:00:00 2001 From: FUTATSUKI Yasuhito Date: Tue, 21 Dec 2021 22:03:10 +0900 Subject: [PATCH 5/5] Follow up to previous: Insert a corrupt delta r1.5 in irregular.txt,v Previous commit did not contain new delta r1.5. This is a correction. --- .../irregular.txt,v | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v index c668e2eb..c6a7b247 100644 --- a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v +++ b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v @@ -1,10 +1,15 @@ -head 1.5; +head 1.6; access; symbols; locks; strict; comment @# @; +1.6 +date 2021.12.21.11.21.57; author futatuki; state Exp; +branches; +next 1.5; + 1.5 date 2021.11.26.04.16.45; author futatuki; state Exp; branches; @@ -42,24 +47,35 @@ desc @ -1.5 +1.6 log -@r1.5: +@r1.6: @ text @aaa bbb -ccc +bc +cc hhh@ +1.5 +log +@r1.5: +@ +text +@d3 1 +a3 1 +c@ + + 1.4 log @r1.4: @ text -@d4 1 -a4 1 +@d5 1 +a5 1 ggg@ @@ -68,8 +84,8 @@ log @r1.3: s/eee/fff/ @ text -@d3 1 -a3 1 +@d4 1 +a4 1 fff@ @@ -78,8 +94,8 @@ log @r1.2: s/ddd/eee/ @ text -@d3 1 -a3 1 +@d4 1 +a4 1 eee@ @@ -88,8 +104,8 @@ log @rev:1.2.1.1 @ text -@d3 1 -a3 2 +@d4 1 +a4 2 ccc eeeggg @ @@ -100,7 +116,7 @@ log @r1.1 @ text -@d3 1 -a3 1 +@d4 1 +a4 1 ddd@