Skip to content

Commit fe090d1

Browse files
authored
Merge pull request #87 from dflook/remove-explicit-return-none
Add transform to change `return None` into `return`
2 parents 01af245 + e103295 commit fe090d1

File tree

12 files changed

+510
-60
lines changed

12 files changed

+510
-60
lines changed

.github/workflows/test_corpus.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ on:
1717
type: boolean
1818
description: 'Regenerate results'
1919
required: true
20-
default: true
20+
default: false
2121
workflow_call:
2222
inputs:
2323
ref:

.github/workflows/xtest.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
2727
- name: Run tests
2828
run: |
29-
29+
3030
if [[ "${{ matrix.python }}" == "python3.4" ]]; then
3131
(cd /usr/lib64/python3.4/test && python3.4 make_ssl_certs.py)
3232
elif [[ "${{ matrix.python }}" == "python3.5" ]]; then

corpus_test/generate_report.py

Lines changed: 121 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from result import Result, ResultReader
88

9-
ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', False)
9+
ENHANCED_REPORT = os.environ.get('ENHANCED_REPORT', True)
1010

1111

1212
@dataclass
@@ -64,6 +64,9 @@ def mean_percent_of_original(self) -> float:
6464
def larger_than_original(self) -> Iterable[Result]:
6565
"""Return those entries that have a larger minified size than the original size"""
6666
for result in self.entries.values():
67+
if result.outcome != 'Minified':
68+
continue
69+
6770
if result.original_size < result.minified_size:
6871
yield result
6972

@@ -91,10 +94,18 @@ def compare_size_increase(self, base: 'ResultSet') -> Iterable[Result]:
9194
"""
9295

9396
for result in self.entries.values():
97+
if result.outcome != 'Minified':
98+
# This result was not minified, so we can't compare
99+
continue
100+
94101
if result.corpus_entry not in base.entries:
95102
continue
96103

97104
base_result = base.entries[result.corpus_entry]
105+
if base_result.outcome != 'Minified':
106+
# The base result was not minified, so we can't compare
107+
continue
108+
98109
if result.minified_size > base_result.minified_size:
99110
yield result
100111

@@ -104,10 +115,17 @@ def compare_size_decrease(self, base: 'ResultSet') -> Iterable[Result]:
104115
"""
105116

106117
for result in self.entries.values():
118+
if result.outcome != 'Minified':
119+
continue
120+
107121
if result.corpus_entry not in base.entries:
108122
continue
109123

110124
base_result = base.entries[result.corpus_entry]
125+
if base_result.outcome != 'Minified':
126+
# The base result was not minified, so we can't compare
127+
continue
128+
111129
if result.minified_size < base_result.minified_size:
112130
yield result
113131

@@ -164,6 +182,103 @@ def format_difference(compare: Iterable[Result], base: Iterable[Result]) -> str:
164182
else:
165183
return s
166184

185+
def report_larger_than_original(results_dir: str, python_versions: str, minifier_sha: str) -> str:
186+
yield '''
187+
## Larger than original
188+
189+
| Corpus Entry | Original Size | Minified Size |
190+
|--------------|--------------:|--------------:|'''
191+
192+
for python_version in python_versions:
193+
try:
194+
summary = result_summary(results_dir, python_version, minifier_sha)
195+
except FileNotFoundError:
196+
continue
197+
198+
larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)
199+
200+
for entry in larger_than_original:
201+
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'
202+
203+
def report_unstable(results_dir: str, python_versions: str, minifier_sha: str) -> str:
204+
yield '''
205+
## Unstable
206+
207+
| Corpus Entry | Python Version | Original Size |
208+
|--------------|----------------|--------------:|'''
209+
210+
for python_version in python_versions:
211+
try:
212+
summary = result_summary(results_dir, python_version, minifier_sha)
213+
except FileNotFoundError:
214+
continue
215+
216+
unstable = sorted(summary.unstable_minification(), key=lambda result: result.original_size)
217+
218+
for entry in unstable:
219+
yield f'| {entry.corpus_entry} | {python_version} | {entry.original_size} |'
220+
221+
def report_exceptions(results_dir: str, python_versions: str, minifier_sha: str) -> str:
222+
yield '''
223+
## Exceptions
224+
225+
| Corpus Entry | Python Version | Exception |
226+
|--------------|----------------|-----------|'''
227+
228+
exceptions_found = False
229+
230+
for python_version in python_versions:
231+
try:
232+
summary = result_summary(results_dir, python_version, minifier_sha)
233+
except FileNotFoundError:
234+
continue
235+
236+
exceptions = sorted(summary.exception(), key=lambda result: result.original_size)
237+
238+
for entry in exceptions:
239+
exceptions_found = True
240+
yield f'| {entry.corpus_entry} | {python_version} | {entry.outcome} |'
241+
242+
if not exceptions_found:
243+
yield ' None | | |'
244+
245+
def report_larger_than_base(results_dir: str, python_versions: str, minifier_sha: str, base_sha: str) -> str:
246+
yield '''
247+
## Top 10 Larger than base
248+
249+
| Corpus Entry | Original Size | Minified Size |
250+
|--------------|--------------:|--------------:|'''
251+
252+
there_are_some_larger_than_base = False
253+
254+
for python_version in python_versions:
255+
try:
256+
summary = result_summary(results_dir, python_version, minifier_sha)
257+
except FileNotFoundError:
258+
continue
259+
260+
base_summary = result_summary(results_dir, python_version, base_sha)
261+
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]
262+
263+
for entry in larger_than_original:
264+
there_are_some_larger_than_base = True
265+
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'
266+
267+
if not there_are_some_larger_than_base:
268+
yield '| N/A | N/A | N/A |'
269+
270+
def report_slowest(results_dir: str, python_versions: str, minifier_sha: str) -> str:
271+
yield '''
272+
## Top 10 Slowest
273+
274+
| Corpus Entry | Original Size | Minified Size | Time |
275+
|--------------|--------------:|--------------:|-----:|'''
276+
277+
for python_version in python_versions:
278+
summary = result_summary(results_dir, python_version, minifier_sha)
279+
280+
for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
281+
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'
167282

168283
def report(results_dir: str, minifier_ref: str, minifier_sha: str, base_ref: str, base_sha: str) -> Iterable[str]:
169284
"""
@@ -236,50 +351,11 @@ def format_size_change_detail() -> str:
236351
)
237352

238353
if ENHANCED_REPORT:
239-
yield '''
240-
## Larger than original
241-
242-
| Corpus Entry | Original Size | Minified Size |
243-
|--------------|--------------:|--------------:|'''
244-
245-
for python_version in ['3.11']:
246-
summary = result_summary(results_dir, python_version, minifier_sha)
247-
larger_than_original = sorted(summary.larger_than_original(), key=lambda result: result.original_size)
248-
249-
for entry in larger_than_original:
250-
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - entry.original_size:+}) |'
251-
252-
yield '''
253-
## Top 10 Larger than base
254-
255-
| Corpus Entry | Original Size | Minified Size |
256-
|--------------|--------------:|--------------:|'''
257-
258-
there_are_some_larger_than_base = False
259-
260-
for python_version in ['3.11']:
261-
summary = result_summary(results_dir, python_version, minifier_sha)
262-
base_summary = result_summary(results_dir, python_version, base_sha)
263-
larger_than_original = sorted(summary.compare_size_increase(base_summary), key=lambda result: result.original_size)[:10]
264-
265-
for entry in larger_than_original:
266-
there_are_some_larger_than_base = True
267-
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} ({entry.minified_size - base_summary.entries[entry.corpus_entry].minified_size:+}) |'
268-
269-
if not there_are_some_larger_than_base:
270-
yield '| N/A | N/A | N/A |'
271-
272-
yield '''
273-
## Top 10 Slowest
274-
275-
| Corpus Entry | Original Size | Minified Size | Time |
276-
|--------------|--------------:|--------------:|-----:|'''
277-
278-
for python_version in ['3.11']:
279-
summary = result_summary(results_dir, python_version, minifier_sha)
280-
281-
for entry in sorted(summary.entries.values(), key=lambda entry: entry.time, reverse=True)[:10]:
282-
yield f'| {entry.corpus_entry} | {entry.original_size} | {entry.minified_size} | {entry.time:.3f} |'
354+
yield from report_larger_than_original(results_dir, ['3.11'], minifier_sha)
355+
yield from report_larger_than_base(results_dir, ['3.11'], minifier_sha, base_sha)
356+
yield from report_slowest(results_dir, ['3.11'], minifier_sha)
357+
yield from report_unstable(results_dir, ['2.7', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)
358+
yield from report_exceptions(results_dir, ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], minifier_sha)
283359

284360

285361
def main():

corpus_test/generate_results.py

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
import argparse
2+
import datetime
3+
import gzip
24
import os
35
import sys
46
import time
57

8+
9+
import logging
10+
11+
612
import python_minifier
713
from result import Result, ResultWriter
814

@@ -23,8 +29,13 @@ def minify_corpus_entry(corpus_path, corpus_entry):
2329
:rtype: Result
2430
"""
2531

26-
with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
27-
source = f.read()
32+
if os.path.isfile(os.path.join(corpus_path, corpus_entry + '.py.gz')):
33+
with gzip.open(os.path.join(corpus_path, corpus_entry + '.py.gz'), 'rb') as f:
34+
source = f.read()
35+
else:
36+
with open(os.path.join(corpus_path, corpus_entry), 'rb') as f:
37+
source = f.read()
38+
2839

2940
result = Result(corpus_entry, len(source), 0, 0, '')
3041

@@ -72,21 +83,54 @@ def corpus_test(corpus_path, results_path, sha, regenerate_results):
7283
:param str sha: The python-minifier sha we are testing
7384
:param bool regenerate_results: Regenerate results even if they are present
7485
"""
75-
corpus_entries = os.listdir(corpus_path)
76-
7786
python_version = '.'.join([str(s) for s in sys.version_info[:2]])
87+
88+
log_path = 'results_' + python_version + '_' + sha + '.log'
89+
print('Logging in GitHub Actions is absolute garbage. Logs are going to ' + log_path)
90+
91+
logging.basicConfig(filename=os.path.join(results_path, log_path), level=logging.DEBUG)
92+
93+
corpus_entries = [entry[:-len('.py.gz')] for entry in os.listdir(corpus_path)]
94+
7895
results_file_path = os.path.join(results_path, 'results_' + python_version + '_' + sha + '.csv')
7996

80-
if os.path.isfile(results_file_path) and not regenerate_results:
81-
print('Results file already exists: %s', results_file_path)
82-
return
97+
if os.path.isfile(results_file_path):
98+
logging.info('Results file already exists: %s', results_file_path)
99+
if regenerate_results:
100+
os.remove(results_file_path)
101+
102+
total_entries = len(corpus_entries)
103+
logging.info('Testing python-minifier on %d entries' % total_entries)
104+
tested_entries = 0
105+
106+
start_time = time.time()
107+
next_checkpoint = time.time() + 60
83108

84109
with ResultWriter(results_file_path) as result_writer:
110+
logging.info('%d results already present' % len(result_writer))
111+
85112
for entry in corpus_entries:
86-
print(entry)
113+
if entry in result_writer:
114+
continue
115+
116+
logging.debug(entry)
117+
87118
result = minify_corpus_entry(corpus_path, entry)
88119
result_writer.write(result)
120+
tested_entries += 1
121+
122+
sys.stdout.flush()
123+
124+
if time.time() > next_checkpoint:
125+
percent = len(result_writer) / total_entries * 100
126+
time_per_entry = (time.time() - start_time) / tested_entries
127+
entries_remaining = len(corpus_entries) - len(result_writer)
128+
time_remaining = int(entries_remaining * time_per_entry)
129+
logging.info('Tested %d/%d entries (%d%%) %s seconds remaining' % (len(result_writer), total_entries, percent, time_remaining))
130+
sys.stdout.flush()
131+
next_checkpoint = time.time() + 60
89132

133+
logging.info('Finished')
90134

91135
def bool_parse(value):
92136
return value == 'true'

corpus_test/result.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import os
2+
3+
14
class Result(object):
25

36
def __init__(self, corpus_entry, original_size, minified_size, time, outcome):
@@ -21,15 +24,37 @@ def __init__(self, results_path):
2124
:param str results_path: The path to the results file
2225
"""
2326
self._results_path = results_path
27+
self._size = 0
28+
self._existing_result_set = set()
29+
30+
if not os.path.isfile(self._results_path):
31+
return
32+
33+
with open(self._results_path, 'r') as f:
34+
for line in f:
35+
if line != 'corpus_entry,original_size,minified_size,time,result\n':
36+
self._existing_result_set.add(line.split(',')[0])
37+
38+
self._size += len(self._existing_result_set)
2439

2540
def __enter__(self):
26-
self.results = open(self._results_path, 'w')
41+
self.results = open(self._results_path, 'a')
2742
self.results.write('corpus_entry,original_size,minified_size,time,result\n')
2843
return self
2944

3045
def __exit__(self, exc_type, exc_val, exc_tb):
3146
self.results.close()
3247

48+
def __contains__(self, item):
49+
"""
50+
:param str item: The name of the entry in the corpus
51+
:return bool: True if the entry already exists in the results file
52+
"""
53+
return item in self._existing_result_set
54+
55+
def __len__(self):
56+
return self._size
57+
3358
def write(self, result):
3459
"""
3560
:param Result result: The result to write to the file
@@ -41,6 +66,7 @@ def write(self, result):
4166
str(result.time) + ',' + result.outcome + '\n'
4267
)
4368
self.results.flush()
69+
self._size += 1
4470

4571

4672
class ResultReader:
@@ -66,7 +92,11 @@ def __next__(self):
6692
"""
6793
:return Result: The next result in the file
6894
"""
95+
6996
line = self.results.readline()
97+
while line == 'corpus_entry,original_size,minified_size,time,result\n':
98+
line = self.results.readline()
99+
70100
if line == '':
71101
raise StopIteration
72102
else:
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
def important(a):
2+
if a > 3:
3+
return a
4+
if a < 2:
5+
return None
6+
a.adjust(1)
7+
return None

0 commit comments

Comments
 (0)