Revise docstrings/comments in performance tests

EliahKagan · EliahKagan · commit 30f49d9deaa0 · 2023-10-24T03:45:21.000-04:00
diff --git a/test/performance/test_commit.py b/test/performance/test_commit.py
@@ -1,8 +1,10 @@
-# test_performance.py
 # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
 #
 # This module is part of GitPython and is released under
 # the BSD License: https://opensource.org/license/bsd-3-clause/
+
+"""Performance tests for commits (iteration, traversal, and serialization)."""
+
 from io import BytesIO
 from time import time
 import sys
@@ -19,7 +21,7 @@ def tearDown(self):
 
         gc.collect()
 
-    # ref with about 100 commits in its history
+    # ref with about 100 commits in its history.
     ref_100 = "0.1.6"
 
     def _query_commit_info(self, c):
@@ -36,9 +38,9 @@ def test_iteration(self):
         no = 0
         nc = 0
 
-        # find the first commit containing the given path - always do a full
-        # iteration ( restricted to the path in question ), but in fact it should
-        # return quite a lot of commits, we just take one and hence abort the operation
+        # Find the first commit containing the given path. Always do a full iteration
+        # (restricted to the path in question). This should return quite a lot of
+        # commits. We just take one and hence abort the operation.
 
         st = time()
         for c in self.rorepo.iter_commits(self.ref_100):
@@ -57,7 +59,7 @@ def test_iteration(self):
         )
 
     def test_commit_traversal(self):
-        # bound to cat-file parsing performance
+        # Bound to cat-file parsing performance.
         nc = 0
         st = time()
         for c in self.gitrorepo.commit().traverse(branch_first=False):
@@ -71,7 +73,7 @@ def test_commit_traversal(self):
         )
 
     def test_commit_iteration(self):
-        # bound to stream parsing performance
+        # Bound to stream parsing performance.
         nc = 0
         st = time()
         for c in Commit.iter_items(self.gitrorepo, self.gitrorepo.head):
@@ -89,8 +91,8 @@ def test_commit_serialization(self):
 
         rwrepo = self.gitrwrepo
         make_object = rwrepo.odb.store
-        # direct serialization - deserialization can be tested afterwards
-        # serialization is probably limited on IO
+        # Direct serialization - deserialization can be tested afterwards.
+        # Serialization is probably limited on IO.
         hc = rwrepo.commit(rwrepo.head)
 
         nc = 5000
diff --git a/test/performance/test_odb.py b/test/performance/test_odb.py
@@ -1,4 +1,5 @@
-"""Performance tests for object store"""
+"""Performance tests for object store."""
+
 import sys
 from time import time
 
@@ -24,7 +25,7 @@ def test_random_access(self):
             results[0].append(elapsed)
 
             # GET TREES
-            # walk all trees of all commits
+            # Walk all trees of all commits.
             st = time()
             blobs_per_commit = []
             nt = 0
@@ -35,7 +36,7 @@ def test_random_access(self):
                     nt += 1
                     if item.type == "blob":
                         blobs.append(item)
-                    # direct access for speed
+                    # Direct access for speed.
                 # END while trees are there for walking
                 blobs_per_commit.append(blobs)
             # END for each commit
@@ -75,7 +76,7 @@ def test_random_access(self):
             results[2].append(elapsed)
         # END for each repo type
 
-        # final results
+        # Final results.
         for test_name, a, b in results:
             print(
                 "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a),
diff --git a/test/performance/test_streams.py b/test/performance/test_streams.py
@@ -1,4 +1,5 @@
-"""Performance data streaming performance"""
+"""Performance tests for data streaming."""
+
 import os
 import subprocess
 import sys
@@ -15,13 +16,13 @@
 
 
 class TestObjDBPerformance(TestBigRepoR):
-    large_data_size_bytes = 1000 * 1000 * 10  # some MiB should do it
-    moderate_data_size_bytes = 1000 * 1000 * 1  # just 1 MiB
+    large_data_size_bytes = 1000 * 1000 * 10  # Some MiB should do it.
+    moderate_data_size_bytes = 1000 * 1000 * 1  # Just 1 MiB.
 
     @with_rw_repo("HEAD", bare=True)
     def test_large_data_streaming(self, rwrepo):
-        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
-        # It should be shared if possible
+        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream.
+        # It should be shared if possible.
         ldb = LooseObjectDB(osp.join(rwrepo.git_dir, "objects"))
 
         for randomize in range(2):
@@ -32,7 +33,7 @@ def test_large_data_streaming(self, rwrepo):
             elapsed = time() - st
             print("Done (in %f s)" % elapsed, file=sys.stderr)
 
-            # writing - due to the compression it will seem faster than it is
+            # Writing - due to the compression it will seem faster than it is.
             st = time()
             binsha = ldb.store(IStream("blob", size, stream)).binsha
             elapsed_add = time() - st
@@ -45,7 +46,7 @@ def test_large_data_streaming(self, rwrepo):
             msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
             print(msg, file=sys.stderr)
 
-            # reading all at once
+            # Reading all at once.
             st = time()
             ostream = ldb.stream(binsha)
             shadata = ostream.read()
@@ -57,7 +58,7 @@ def test_large_data_streaming(self, rwrepo):
             msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
             print(msg, file=sys.stderr)
 
-            # reading in chunks of 1 MiB
+            # Reading in chunks of 1 MiB.
             cs = 512 * 1000
             chunks = []
             st = time()
@@ -86,7 +87,7 @@ def test_large_data_streaming(self, rwrepo):
                 file=sys.stderr,
             )
 
-            # del db file so git has something to do
+            # del db file so git has something to do.
             ostream = None
             import gc
 
@@ -95,34 +96,34 @@ def test_large_data_streaming(self, rwrepo):
 
             # VS. CGIT
             ##########
-            # CGIT ! Can using the cgit programs be faster ?
+            # CGIT! Can using the cgit programs be faster?
             proc = rwrepo.git.hash_object("-w", "--stdin", as_process=True, istream=subprocess.PIPE)
 
-            # write file - pump everything in at once to be a fast as possible
-            data = stream.getvalue()  # cache it
+            # Write file - pump everything in at once to be a fast as possible.
+            data = stream.getvalue()  # Cache it.
             st = time()
             proc.stdin.write(data)
             proc.stdin.close()
             gitsha = proc.stdout.read().strip()
             proc.wait()
             gelapsed_add = time() - st
             del data
-            assert gitsha == bin_to_hex(binsha)  # we do it the same way, right ?
+            assert gitsha == bin_to_hex(binsha)  # We do it the same way, right?
 
-            #  as its the same sha, we reuse our path
+            # As it's the same sha, we reuse our path.
             fsize_kib = osp.getsize(db_file) / 1000
             msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
             msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
             print(msg, file=sys.stderr)
 
-            # compare ...
+            # Compare.
             print(
                 "Git-Python is %f %% faster than git when adding big %s files"
                 % (100.0 - (elapsed_add / gelapsed_add) * 100, desc),
                 file=sys.stderr,
             )
 
-            # read all
+            # Read all.
             st = time()
             _hexsha, _typename, size, data = rwrepo.git.get_object_data(gitsha)
             gelapsed_readall = time() - st
@@ -132,14 +133,14 @@ def test_large_data_streaming(self, rwrepo):
                 file=sys.stderr,
             )
 
-            # compare
+            # Compare.
             print(
                 "Git-Python is %f %% faster than git when reading big %sfiles"
                 % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc),
                 file=sys.stderr,
             )
 
-            # read chunks
+            # Read chunks.
             st = time()
             _hexsha, _typename, size, stream = rwrepo.git.stream_object_data(gitsha)
             while True:
@@ -158,7 +159,7 @@ def test_large_data_streaming(self, rwrepo):
             )
             print(msg, file=sys.stderr)
 
-            # compare
+            # Compare.
             print(
                 "Git-Python is %f %% faster than git when reading big %s files in chunks"
                 % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc),