diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ca27fe --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +LOCAL* diff --git a/.travis.yml b/.travis.yml index 927a485..844944a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,10 +4,9 @@ python: - "3.4" install: - pip install . -before_script: # configure a headless display to test plot generation - - "export DISPLAY=:99.0" - - "sh -e /etc/init.d/xvfb start" - - sleep 3 # give xvfb some time to start +dist: xenial +services: + - xvfb script: - git clone https://github.com/erikbern/git-of-theseus - git-of-theseus-analyze git-of-theseus --outdir got diff --git a/README.md b/README.md index c761813..1b4977f 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ After that, you can generate plots! Here are some ways you can do that: 1. Run `git-of-theseus-stack-plot cohorts.json` which will write to `stack_plot.png` 1. Run `git-of-theseus-survival-plot survival.json` which will write to `survival_plot.png` (run it with `--help` for some options) -If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot ` (optionally with the `--exp-fit` flag to fit an exponential decay) +If you want to plot multiple repositories, have to run `git-of-theseus-analyze` separately for each project and store the data in separate directories using the `--outdir` flag. Then you can run `git-of-theseus-survival-plot ` (optionally with the `--exp-fit` flag to fit an exponential decay) and `git-of-theseus-stack-plot `. Help ---- diff --git a/git_of_theseus/stack_plot.py b/git_of_theseus/stack_plot.py index 6d8ee1f..543a6cd 100644 --- a/git_of_theseus/stack_plot.py +++ b/git_of_theseus/stack_plot.py @@ -19,6 +19,7 @@ import argparse, dateutil.parser, itertools, json, numpy, sys from matplotlib import pyplot +from collections import defaultdict def generate_n_colors(n): @@ -32,8 +33,59 @@ def euclidean(a, b): return colors -def stack_plot(input_fn, display=False, outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False): - data = json.load(open(input_fn)) # TODO do we support multiple arguments here? +def stack_plot(input_fns, display=False, + outfile='stack_plot.png', max_n=20, normalize=False, dont_stack=False, outmerged=False): + + loc = {} # Helper data structure + authors = set() # All authors + tss = set() # All timestamps + for fn in input_fns: + print('Reading %s' % fn) + data = json.load(open(fn)) + locr = defaultdict(defaultdict) + for i, a in enumerate(data['labels']): + authors.add(a) + locr[a] = {} + for j, t in enumerate(data['ts']): + tss.add(t) + locr[a][t] = data['y'][i][j] + loc[fn] = locr + + authorss = sorted(authors) # Authors, sorted + tsss = sorted(tss) # Timestamps, sorted + + merged = [[0 for j in range(len(tsss))] for i in range(len(authorss))] + + for i, r in enumerate(loc): + # print("repo: ", r) + for j, a in enumerate(authorss): + # print(" ", a) + l = 0 + for k, t in enumerate(tsss): + # print(r, a, t) + if a in loc[r].keys(): + if t in loc[r][a].keys(): + l = loc[r][a][t] + # print("l = ", l) + merged[j][k] = merged[j][k] + l + + data = { + 'y': merged, + 'ts': [t for t in tsss], + 'labels': [a for a in authorss] + } + if outmerged: + mergefn = 'merged.json' + print('Writing data to %s' % mergefn) + f = open(mergefn, 'w') + json.dump( + { + 'y': merged, + 'ts': [t for t in tsss], + 'labels': [a for a in authorss] + }, f) + f.close() + y = numpy.array(data['y']) if y.shape[0] > max_n: js = sorted(range(len(data['labels'])), key=lambda j: max(y[j]), reverse=True) @@ -74,7 +126,8 @@ def stack_plot_cmdline(): parser.add_argument('--max-n', default=20, type=int, help='Max number of dataseries (will roll everything else into "other") (default: %(default)s)') parser.add_argument('--normalize', action='store_true', help='Normalize the plot to 100%%') parser.add_argument('--dont-stack', action='store_true', help='Don\'t stack plot') - parser.add_argument('input_fn') + parser.add_argument('--outmerged', action='store_true', help='Output merged data to merged.json') + parser.add_argument('input_fns', nargs='*') kwargs = vars(parser.parse_args()) stack_plot(**kwargs) diff --git a/tests/test_data_merged_1_2.json b/tests/test_data_merged_1_2.json new file mode 100644 index 0000000..baef7be --- /dev/null +++ b/tests/test_data_merged_1_2.json @@ -0,0 +1,19 @@ +{ + "y": [ + [100, 170, 600, 400, 700], + [0, 0, 150, 150, 300], + [0, 50, 150, 200, 200] + ], + "ts": [ + "2019-01-01T07:00:00", + "2019-02-01T07:00:00", + "2019-04-01T08:00:00", + "2019-06-01T08:00:00", + "2019-08-01T08:00:00" + ], + "labels": [ + "Author A", + "Author B", + "Author C" + ] +} \ No newline at end of file diff --git a/tests/test_data_repo_1.json b/tests/test_data_repo_1.json new file mode 100644 index 0000000..0af57d8 --- /dev/null +++ b/tests/test_data_repo_1.json @@ -0,0 +1,15 @@ +{ + "y": [ + [100, 200, 500], + [0, 150, 300] + ], + "ts": [ + "2019-01-01T07:00:00", + "2019-04-01T08:00:00", + "2019-08-01T08:00:00" + ], + "labels": [ + "Author A", + "Author B" + ] +} \ No newline at end of file diff --git a/tests/test_data_repo_2.json b/tests/test_data_repo_2.json new file mode 100644 index 0000000..d0559ec --- /dev/null +++ b/tests/test_data_repo_2.json @@ -0,0 +1,15 @@ +{ + "y": [ + [70, 400, 200], + [50, 150, 200] + ], + "ts": [ + "2019-02-01T07:00:00", + "2019-04-01T08:00:00", + "2019-06-01T08:00:00" + ], + "labels": [ + "Author A", + "Author C" + ] +} \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..a58c33a --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,35 @@ +# Tests for stack_plot +# +# To run tests: +# (1) ensure that git-of-theuseus in installed +# (2) go to folder tests and +# (3) python tests.py + +import json +from git_of_theseus import stack_plot + +print('Testing stack_plot...') + +print('Test 1 - Run stack_plot for repos 1 and 2') +out_fn = 'stack_plot.png' +in_fns = ['test_data_repo_1.json', 'test_data_repo_2.json'] + +stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True) + +# merged.json and test_data_merged.json must have equal JSON contents. +if json.load(open('merged.json')) == json.load(open('test_data_merged_1_2.json')): + print('Test succeeded') +else: + print('Test failed') + +print('Test 2 - Run stack_plot for repo 1') +out_fn = 'stack_plot.png' +in_fns = ['test_data_repo_1.json'] + +stack_plot(outfile=out_fn, input_fns=in_fns, outmerged=True) + +# merged.json and test_data_merged.json must have equal JSON contents. +if json.load(open('merged.json')) == json.load(open('test_data_repo_1.json')): + print('Test succeeded') +else: + print('Test failed')