Skip to content

Commit 5a14047

Browse files
committed
Update legacy data
1 parent 2b0e49b commit 5a14047

File tree

3 files changed

+251
-2
lines changed

3 files changed

+251
-2
lines changed

generate-legacy-timestamps.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
"""
2+
generate-legacy-dates.py WIKI_PAGES_PATH
3+
4+
"""
5+
6+
import os
7+
import re
8+
import sys
9+
10+
for fn in os.listdir('ipython'):
11+
base, ext = os.path.splitext(fn)
12+
if ext != '.ipynb':
13+
continue
14+
15+
pth_base = base
16+
pth_base = re.sub('^Matplotlib_', 'Matplotlib(2f)', pth_base)
17+
pth_base = pth_base.replace('MayaVi_', 'MayaVi(2f)')
18+
pth_base = pth_base.replace('KDTree_example', 'KDTree')
19+
pth_base = pth_base.replace('PIL_example', 'PIL')
20+
pth_base = pth_base.replace('ScriptingMayavi2_', 'ScriptingMayavi2(2f)')
21+
pth_base = pth_base.replace('C_Extensions_NumPy_arrays', 'C_Extensions(2f)NumPy_arrays')
22+
pth_base = pth_base.replace('Theoretical_Ecology_', 'Theoretical_Ecology(2f)')
23+
pth_base = pth_base.replace('FortranIO_', 'FortranIO(2f)')
24+
pth_base = pth_base.replace('TimeSeries_', 'TimeSeries(2f)')
25+
26+
editlog = os.path.abspath(sys.argv[1]) + '/'
27+
if base not in ('ParallelProgramming', 'PerformancePython'):
28+
editlog += 'Cookbook(2f)'
29+
editlog += pth_base + '/edit-log'
30+
31+
if not os.path.isfile(editlog):
32+
continue
33+
34+
with open(editlog, 'r') as f:
35+
lines = f.readlines()
36+
37+
timestamps = []
38+
for line in lines:
39+
line = line.strip().split()
40+
if not line:
41+
continue
42+
timestamps.append(int(float(line[0]) / 1e6))
43+
44+
print(base, min(timestamps), max(timestamps))

generate-legacy-users.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
"""
2+
generate-legacy-users.py WIKI_PAGES_PATH
3+
4+
"""
5+
from __future__ import absolute_import, print_function, division
6+
7+
import os
8+
import re
9+
import sys
10+
from HTMLParser import HTMLParser
11+
12+
13+
def main():
14+
path = sys.argv[1]
15+
16+
users = {}
17+
pages = {}
18+
19+
# Gather authors
20+
for root, dirs, files in os.walk(path):
21+
for d in dirs:
22+
revs = os.path.join(root, d, 'revisions')
23+
edit_log = os.path.join(root, d, 'edit-log')
24+
25+
if not os.path.isdir(revs):
26+
continue
27+
28+
if not os.path.isfile(edit_log):
29+
continue
30+
31+
with open(edit_log, 'r') as handle:
32+
log_text = handle.read().rstrip()
33+
log_items = [x.split() for x in log_text.splitlines()]
34+
35+
for fn in os.listdir(revs):
36+
fn = os.path.join(revs, fn)
37+
with open(fn, 'r') as handle:
38+
r_text = handle.read()
39+
40+
if ('CategoryHomepage' in r_text or 'home page' in r_text):
41+
# User definition
42+
for item in log_items:
43+
if len(item) > 6:
44+
if item[3] in ('About_SciPy', 'SciPy', 'Cookbook(2f)MayaVi(2f)tvtk'):
45+
continue
46+
users[item[6]] = item[3]
47+
break
48+
break
49+
50+
if 'Cookbook' in d or 'PerformancePython' in d or 'ParallelProgramming' in d:
51+
for item in log_items:
52+
pages.setdefault(d, []).append(item[6])
53+
54+
# Load predefined users
55+
users['1273234778.27.13541'] = 'arjen'
56+
users['1181049059.11.16046'] = 'WarrenWeckesser'
57+
users['1232509635.1.1790'] = 'WarrenWeckesser'
58+
users['1143464513.17.11899'] = 'GaelVaroquaux'
59+
users['1359829272.72.54252'] = 'FrankBreitling'
60+
users['1196968472.52.21357'] = 'jesrl'
61+
users['1310512145.5.35406'] = 'RalphMoore'
62+
users['1134987132.31.5715'] = 'AndrewStraw'
63+
users['1283944978.14.25260'] = 'UnuTbu'
64+
users['1143464513.17.11899'] = 'GaelVaroquaux'
65+
users['1150066934.85.44238'] = 'FredericPetit'
66+
users['1157157190.0.28500'] = 'AMArchibald'
67+
users['1193155369.79.45281'] = 'Elby'
68+
users['1162990926.75.41968'] = 'PauliVirtanen'
69+
users['1144823769.21.43377'] = 'AngusMcMorland'
70+
users['1199025820.05.62034'] = 'TimMichelsen'
71+
users['1165998335.9.59069'] = 'MartinSpacek'
72+
users['1169591527.88.61566'] = 'MattKnox'
73+
users['1278911090.12.12663'] = 'ChristopherCampo'
74+
users['1230492524.42.55666'] = 'nokfi'
75+
users['1166654035.38.11968'] = 'VincentNijs'
76+
users['1160664185.24.177'] = 'NeilMB'
77+
users['1148241299.31.23452'] = 'GabrielGellner'
78+
users['1143248516.72.17557'] = 'FrancescAltet'
79+
users['1138755498.13.1844'] = 'BillBaxter'
80+
users['1138639075.54.47297'] = 'jh'
81+
users['1135217126.43.265'] = 'FernandoPerez'
82+
users['1228612570.79.23812'] = 'EgorZindy'
83+
users['1166684071.04.43914'] = 'ScottSinclair'
84+
users['1153060908.53.58092'] = 'EmmanuelleGouillart'
85+
users['1152996811.03.49324'] = 'NickFotopoulos'
86+
users['1135013651.92.25239'] = 'PearuPeterson'
87+
users['1263714477.79.46523'] = 'newacct'
88+
users['1321067029.14.1791'] = 'KristjanOnu'
89+
users['1244315014.64.10666'] = 'IvoMaljevic'
90+
users['1342900640.41.32910'] = 'thomas.haslwanter'
91+
users['1138834037.11.63568'] = 'TimCera'
92+
users['1306523623.53.4799'] = 'DmitriyRybalkin'
93+
users['1316810730.93.46683'] = 'TimSwast'
94+
users['1294906831.24.3474'] = 'MikeToews'
95+
users['1259530275.5.20672'] = 'JorgeEduardoCardona'
96+
users['1254476605.52.59655'] = 'wolfganglechner'
97+
users['1220051786.85.3734'] = 'SimonHook'
98+
users['1321851999.81.53674'] = 'BAlexRobinson'
99+
users['1245975199.53.27497'] = 'DavidPowell'
100+
users['1277317890.88.15794'] = 'AlanLue'
101+
users['1249699417.54.61063'] = 'mauro'
102+
users['1151666835.94.32020'] = 'WilliamHunter'
103+
users['1209753612.57.31138'] = 'JamesNagel'
104+
users['1241897483.76.24144'] = 'DatChu'
105+
users['1245526844.29.46176'] = 'RalfGommers'
106+
users['1312558832.94.40303'] = 'Pierre_deBuyl'
107+
users['1205277370.55.64453'] = 'keflavich'
108+
users['1147324201.78.18433'] = 'MichaelMcNeilForbes'
109+
users['1139447249.42.46498'] = 'RobManagan'
110+
users['1246487580.75.24764'] = 'MarshallPerrin'
111+
users['1340544644.02.6056'] = 'WesTurner'
112+
113+
# Print results
114+
unknowns = {}
115+
page_uid = {}
116+
117+
unknown_counter = 1
118+
unknown_names = {}
119+
120+
for page, uids in sorted(pages.items()):
121+
editors = []
122+
seen = set()
123+
for uid in uids:
124+
if uid not in users:
125+
unknowns.setdefault(uid, 0)
126+
unknowns[uid] += 1
127+
128+
if uid in seen:
129+
continue
130+
131+
seen.add(uid)
132+
user = users.get(uid, 'unknown')
133+
if user == 'unknown':
134+
if uid not in unknown_names:
135+
unknown_names[uid] = "Unknown[{0}]".format(unknown_counter)
136+
unknown_counter += 1
137+
user = unknown_names[uid]
138+
editors.append(user)
139+
140+
if page != 'Cookbook(2f)MayaVi(2f)examples':
141+
page_uid[uids[-1]] = page
142+
143+
page = page.replace('(2f)', '/')
144+
page = page.replace('Cookbook/', '')
145+
page = page.replace('/', '_')
146+
print("{0}: {1}".format(page, ", ".join(editors)))
147+
148+
# Sort by unknown
149+
items = sorted(unknowns.items(), key=lambda x: (x[1], x), reverse=True)
150+
for uid, count in items:
151+
print(unknown_names[uid], ":", uid, count, page_uid.get(uid, ''))
152+
153+
154+
class MLStripper(HTMLParser):
155+
def __init__(self):
156+
self.reset()
157+
self.fed = []
158+
def handle_data(self, d):
159+
self.fed.append(d)
160+
def get_data(self):
161+
return ''.join(self.fed)
162+
163+
164+
def strip_tags(html):
165+
s = MLStripper()
166+
s.feed(html)
167+
return s.get_data()
168+
169+
170+
class StringMatcher(object):
171+
def __init__(self, items):
172+
self.fuzzyset = fuzzyset.FuzzySet(gram_size_lower=3,
173+
gram_size_upper=5)
174+
175+
for item in sorted(items):
176+
self.fuzzyset.add(item)
177+
178+
def get(self, item):
179+
r = []
180+
181+
for fmt in [normalize, splitsub]:
182+
x = fmt(item)
183+
if x:
184+
q = self.fuzzyset.get(x)
185+
if q is not None:
186+
r += q
187+
188+
r.sort(key=lambda x: -x[0])
189+
if r:
190+
score, r = r[0]
191+
return self.aliases[r], score
192+
else:
193+
return None, 0
194+
195+
196+
if __name__ == "__main__":
197+
main()

wiki-legacy-users.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ multiprocessing: Unknown[18], PauliVirtanen
152152
vtkVolumeRendering: Unknown[20], PauliVirtanen
153153
wxPython_dialogs: GabrielGellner, Unknown[150]
154154
xplt: TravisOliphant
155+
ParallelProgramming: AMArchibald, Unknown[151], Unknown[152], Unknown[153], MartinSpacek
156+
PerformancePython: Unknown[6], Unknown[4], DavidLinke, Unknown[154], Unknown[155], MartinSpacek, Unknown[156]
155157
wikis_topical_software_MatplotlibCookbook: AndrewStraw
156158

157159
#
@@ -161,6 +163,7 @@ Unknown[2] : 1140130090.73.26701 34
161163
Unknown[19] : 1192792982.32.33646 33
162164
Unknown[18] : 1222198261.41.13085 30
163165
Unknown[33] : 1161108413.93.13451 23
166+
Unknown[6] : 1136348958.12.55682 19
164167
Unknown[13] : 1161122172.92.12452 18
165168
Unknown[97] : 1143292253.36.33282 18
166169
Unknown[141] : 1273180957.22.38451 17
@@ -170,7 +173,6 @@ Unknown[1] : 1136350821.17.3858 14
170173
Unknown[5] : 1147350245.48.33297 13
171174
Unknown[47] : 1205084922.92.52394 11
172175
Unknown[8] : 1158133077.15.7840 11
173-
Unknown[6] : 1136348958.12.55682 11
174176
Unknown[15] : 1204762319.1.55436 10
175177
Unknown[95] : 1139528181.75.18655 10
176178
Unknown[74] : 1155134305.93.22544 9
@@ -180,10 +182,12 @@ Unknown[16] : 1204809158.67.49889 7
180182
Unknown[11] : 1173198834.69.25082 7
181183
Unknown[17] : 1155941540.81.54944 7
182184
Unknown[3] : 1139487334.99.50561 7
185+
Unknown[156] : 1257929087.73.42451 6
183186
Unknown[9] : 1158098774.38.64183 6
184187
Unknown[77] : 1154035039.71.41599 6
185188
Unknown[60] : 1243729904.58.14443 5
186189
Unknown[80] : 1234762887.66.14948 5
190+
Unknown[154] : 1168079382.36.25673 5
187191
Unknown[83] : 1141920306.92.21510 5
188192
Unknown[46] : 1242755893.43.12185 4
189193
Unknown[29] : 1178818414.24.14672 4
@@ -205,14 +209,17 @@ Unknown[48] : 1165257394.77.27638 3
205209
Unknown[72] : 1154198122.95.54949 3
206210
Unknown[55] : 1139176990.16.17019 3
207211
Unknown[129] : 1135159993.15.48123 3
212+
Unknown[4] : 1135026695.12.62922 3
208213
Unknown[82] : 1309988513.98.27729 2
209214
Unknown[43] : 1244511110.6.14864 2
210215
Unknown[143] : 1242186055.15.3458 2
211216
Unknown[89] : 1231625087.07.46719 2 Cookbook(2f)Matplotlib(2f)CompilingMatPlotLibOnSolaris10
212217
Unknown[61] : 1228370846.7.32755 2
218+
Unknown[153] : 1225139931.59.59734 2
213219
Unknown[93] : 1222182878.27.65500 2
214220
Unknown[58] : 1208880410.18.31097 2
215221
Unknown[126] : 1192485481.02.45422 2
222+
Unknown[151] : 1191610731.18.28222 2
216223
Unknown[103] : 1178427146.28.32767 2
217224
Unknown[36] : 1176717370.19.8188 2
218225
Unknown[133] : 1171379031.1.5592 2
@@ -269,6 +276,8 @@ Unknown[119] : 1228360607.51.54594 1 Cookbook(2f)Matplotlib(2f)UsingTex
269276
Unknown[113] : 1226988042.35.46252 1
270277
Unknown[59] : 1226656173.89.39060 1
271278
Unknown[150] : 1225700327.55.58243 1 Cookbook(2f)wxPython_dialogs
279+
Unknown[152] : 1219622952.53.49139 1
280+
Unknown[155] : 1208933160.91.56584 1
272281
Unknown[57] : 1207325198.0.20628 1
273282
Unknown[87] : 1199372458.44.702 1 Cookbook(2f)Matplotlib(2f)BarCharts
274283
Unknown[39] : 1194351481.19.9091 1
@@ -306,4 +315,3 @@ Unknown[106] : 1142444931.14.20090 1
306315
Unknown[26] : 1139360105.85.1212 1
307316
Unknown[115] : 1138659073.19.20633 1
308317
Unknown[138] : 1137718487.2.45248 1
309-
Unknown[4] : 1135026695.12.62922 1

0 commit comments

Comments
 (0)