-
Notifications
You must be signed in to change notification settings - Fork 56
/
migrateissues.py
executable file
·513 lines (418 loc) · 20.2 KB
/
migrateissues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import csv
import getpass
import logging
import optparse
import re
import sys
import urllib2
import time
from datetime import datetime
from github import Github
from github import GithubException
from pyquery import PyQuery as pq
# The maximum number of records to retrieve from Google Code in a single request
GOOGLE_MAX_RESULTS = 25
GOOGLE_ISSUE_TEMPLATE = '_Original issue: {}_'
GOOGLE_ISSUES_URL = 'https://code.google.com/p/{}/issues/csv?can=1&num={}&start={}&colspec=ID%20Type%20Status%20Owner%20Summary%20Opened%20Closed%20Reporter%20Stars&sort=id'
GOOGLE_URL = 'http://code.google.com/p/{}/issues/detail?id={}'
GOOGLE_URL_RE = 'http://code.google.com/p/%s/issues/detail\?id=(\d+)'
GOOGLE_ID_RE = GOOGLE_ISSUE_TEMPLATE.format(GOOGLE_URL_RE)
# The minimum number of remaining Github rate-limited API requests before we pre-emptively
# abort to avoid hitting the limit part-way through migrating an issue.
GITHUB_SPARE_REQUESTS = 50
# Mapping from Google Code issue labels to Github labels
LABEL_MAPPING = {
'Type-Defect' : 'bug',
'Type-Enhancement' : 'enhancement'
}
# Mapping from Google Code issue states to Github labels
STATE_MAPPING = {
'invalid': 'invalid',
'duplicate': 'duplicate',
'wontfix': 'wontfix'
}
def stars_to_label(stars):
'''Return a label string corresponding to a star range.
For example, '1' -> '1 star', '2' -> '2-5 stars', etc.
'''
stars = int(stars)
if stars == 1:
return '1 star'
elif stars <= 5:
return '2–5 stars'
elif stars <= 10:
return '6–10 stars'
elif stars <= 20:
return '11–20 stars'
else:
return '21+ stars'
def output(string):
sys.stdout.write(string)
sys.stdout.flush()
def spacing_template(wordList, spacing=12):
output = []
template = '\t{0:%d}' % (spacing)
for word in wordList:
output.append(template.format(word))
return ' : '.join(output)
def escape(s):
"""Process text to convert markup and escape things which need escaping"""
if s:
s = s.replace('%', '%') # Escape % signs
return s
def transform_to_markdown_compliant(string):
# Escape chars interpreted as markdown formatting by GH
string = re.sub(r'(\s)~~', r'\1\\~~', string)
string = re.sub(r'\n(\s*)>', r'\n\1\\>', string)
string = re.sub(r'\n(\s*)#', r'\n\1\\#', string)
string = re.sub(r'(?m)^-([- \r]*)$', r'\\-\1', string)
# '==' is also making headers, but can't nicely escape ('\' shows up)
string = re.sub(r'(\S\s*\n)(=[= ]*(\r?\n|$))', r'\1\n\2', string)
# Escape < to avoid being treated as an html tag
string = re.sub(r'(\s)<', r'\1\\<', string)
# Avoid links that should not be links.
# I can find no way to escape the # w/o using backtics:
string = re.sub(r'(\s+)(#\d+)(\W)', r'\1`\2`\3', string)
# Create issue links
string = re.sub(r'\bi#(\d+)', r'issue #\1', string)
string = re.sub(r'\bissue (\d+)', r'issue #\1', string)
return string
def github_label(name, color = "FFFFFF"):
""" Returns the Github label with the given name, creating it if necessary. """
try:
return label_cache[name]
except KeyError:
try:
return label_cache.setdefault(name, github_repo.get_label(name))
except GithubException:
return label_cache.setdefault(name, github_repo.create_label(name, color))
def get_github_milestone(name):
""" Returns the Github milestone with the given name, creating it if necessary. """
try:
return milestone_cache[name]
except KeyError:
for milestone in list(github_repo.get_milestones()):
if milestone.title == name:
return milestone_cache.setdefault(name, milestone)
return milestone_cache.setdefault(name, github_repo.create_milestone(name))
def parse_gcode_date(date_text):
""" Transforms a Google Code date into a more human readable string. """
try:
parsed = datetime.strptime(date_text, '%a %b %d %H:%M:%S %Y')
except ValueError:
return date_text
return parsed.strftime("%B %d, %Y %H:%M:%S")
def add_issue_to_github(issue):
""" Migrates the given Google Code issue to Github. """
# Github rate-limits API requests to 5000 per hour, and if we hit that limit part-way
# through adding an issue it could end up in an incomplete state. To avoid this we'll
# ensure that there are enough requests remaining before we start migrating an issue.
if github.rate_limiting[0] < GITHUB_SPARE_REQUESTS:
raise Exception('Aborting to to impending Github API rate-limit cutoff.')
body = issue['content'].replace('%', '%')
output('Adding issue %d' % issue['gid'])
if options.verbose:
output('\n')
outList = [
spacing_template(['Title', issue['title']]),
spacing_template(['State', issue['state']]),
spacing_template(['Labels', issue['labels']]),
spacing_template(['Milestone', issue['milestone']]),
spacing_template(['Source link', issue['link']])
]
output('\n'.join(outList))
github_issue = None
if not options.dry_run:
github_labels = [github_label(label) for label in issue['labels']]
text = body.encode('utf-8')
text = transform_to_markdown_compliant(text)
if issue['milestone']:
github_milestone = get_github_milestone(issue['milestone'])
github_issue = github_repo.create_issue(issue['title'], body = text, labels = github_labels,
milestone = github_milestone)
else:
github_issue = github_repo.create_issue(issue['title'], body = text, labels = github_labels)
if options.verbose and github_issue:
output('\n')
output(spacing_template(['Dest link', github_issue.url]))
# Assigns issues that originally had an owner to the current user
if issue['owner'] and options.assign_owner:
assignee = github.get_user(github_user.login)
if not options.dry_run:
github_issue.edit(assignee = assignee)
return github_issue
def add_comments_to_issue(github_issue, gcode_issue):
""" Migrates all comments from a Google Code issue to its Github copy. """
# Retrieve existing Github comments, to figure out which Google Code comments are new
existing_comments = [comment.body for comment in github_issue.get_comments()]
# Add any remaining comments to the Github issue
output("\nSyncing comments ")
for i, comment in enumerate(gcode_issue['comments']):
body = u'_From {author} on {date}_\n\n{body}'.format(**comment)
topost = transform_to_markdown_compliant(body)
if topost in existing_comments:
logging.info('Skipping comment %d: already present', i + 1)
else:
logging.info('Adding comment %d', i + 1)
if options.verbose:
output('\n\tAdd: From {author} on {date}'.format(**comment))
if not options.dry_run:
topost = topost.encode('utf-8')
github_issue.create_comment(topost)
# We use a delay to avoid comments being created on GitHub
# in the wrong order, due to network non-determinism.
# Without this delay, I consistently observed a full 1 in 3
# GoogleCode issue comments being reordered.
# XXX: querying GitHub in a loop to see when the comment has
# been posted may be faster, but will cut into the rate limit.
time.sleep(5)
def get_attachments(link, attachments):
if not attachments:
return ''
body = u'\n\n'
for attachment in (pq(a) for a in attachments):
if not attachment('a'): # Skip deleted attachments
continue
# Linking to the comment with the attachment rather than the
# attachment itself since Google Code uses download tokens for
# attachments
body += u'**Attachment:** [{}]({})'.format(attachment('b').text(), link)
return body
def get_gcode_issue(issue_summary):
def get_author(doc):
userlink = doc('.userlink')
return '[{}](https://code.google.com{})'.format(userlink.text(), userlink.attr('href'))
# Populate properties available from the summary CSV
issue = {
'gid': int(issue_summary['ID']),
'title': issue_summary['Summary'].replace('%', '%'),
'link': GOOGLE_URL.format(google_project_name, issue_summary['ID']),
'owner': issue_summary['Owner'],
'state': 'closed' if issue_summary['Closed'] else 'open',
'date': datetime.fromtimestamp(float(issue_summary['OpenedTimestamp'])),
'status': issue_summary['Status'].lower()
}
# Build a list of labels to apply to the new issue, including an 'imported' tag that
# we can use to identify this issue as one that's passed through migration.
# Also, build a milestone (google code sees it as a label)
milestone = None
labels = ['imported']
for label in issue_summary['AllLabels'].split(', '):
if label.startswith('Milestone-'):
milestone = label.replace('Milestone-', '')
continue
if label.startswith('Priority-') and options.omit_priority:
continue
if not label:
continue
labels.append(LABEL_MAPPING.get(label, label))
if options.migrate_stars and 'Stars' in issue_summary:
labels.append(stars_to_label(issue_summary['Stars']))
# Add additional labels based on the issue's state
if issue['status'] in STATE_MAPPING:
labels.append(STATE_MAPPING[issue['status']])
issue['labels'] = labels
issue['milestone'] = milestone
# Scrape the issue details page for the issue body and comments
opener = urllib2.build_opener()
if options.google_code_cookie:
opener.addheaders = [('Cookie', options.google_code_cookie)]
# Missing issues may still exist in csv; make sure link is good
try:
connection = opener.open(issue['link'])
except urllib2.HTTPError:
return None
encoding = connection.headers['content-type'].split('charset=')[-1]
# Pass "ignore" so malformed page data doesn't abort us
doc = pq(connection.read().decode(encoding, "ignore"))
description = doc('.issuedescription .issuedescription')
issue['author'] = get_author(description)
issue['comments'] = []
def split_comment(comment, text):
# Github has an undocumented maximum comment size (unless I just failed
# to find where it was documented), so split comments up into multiple
# posts as needed.
while text:
comment['body'] = text[:7000]
text = text[7000:]
if text:
comment['body'] += '...'
text = '...' + text
issue['comments'].append(comment.copy())
split_comment(issue, description('pre').text())
issue['content'] = u'_From {author} on {date:%B %d, %Y %H:%M:%S}_\n\n{content}{attachments}\n\n{footer}'.format(
content = issue['comments'].pop(0)['body'],
footer = GOOGLE_ISSUE_TEMPLATE.format(GOOGLE_URL.format(google_project_name, issue['gid'])),
attachments = get_attachments(issue['link'], doc('.issuedescription .issuedescription .attachments')),
**issue)
issue['comments'] = []
for comment in doc('.issuecomment'):
comment = pq(comment)
if not comment('.date'):
continue # Sign in prompt line uses same class
if comment.hasClass('delcom'):
continue # Skip deleted comments
date = parse_gcode_date(comment('.date').attr('title'))
try:
body = comment('pre').text()
except:
body = "(There was an error importing this comment body. See original issue on Google Code.)"
logging.error("Error importing comment body")
author = get_author(comment)
updates = comment('.updates .box-inner')
if updates:
body += '\n\n' + updates.html().strip().replace('\n', '').replace('<b>', '**').replace('</b>', '**').replace('<br/>', '\n')
body += get_attachments('{}#{}'.format(issue['link'], comment.attr('id')), comment('.attachments'))
# Strip the placeholder text if there's any other updates
body = body.replace('(No comment was entered for this change.)\n\n', '')
split_comment({'date': date, 'author': author}, body)
return issue
def get_gcode_issues(onlyissues=None):
count = 100
start_index = 0
issues = []
while True:
url = GOOGLE_ISSUES_URL.format(google_project_name, count, start_index)
if onlyissues:
issues.extend(row for row in csv.DictReader(urllib2.urlopen(url), dialect=csv.excel) if row['ID'] in onlyissues)
else:
issues.extend(row for row in csv.DictReader(urllib2.urlopen(url), dialect=csv.excel))
if issues and 'truncated' in issues[-1]['ID']:
issues.pop()
start_index += count
else:
return issues
def process_gcode_issues(existing_issues, onlyissues=None):
""" Migrates all Google Code issues in the given dictionary to Github. """
issues = get_gcode_issues(onlyissues)
previous_gid = 1
if options.start_at is not None:
issues = [x for x in issues if int(x['ID']) >= options.start_at]
previous_gid = options.start_at - 1
output('Starting at issue %d\n' % options.start_at)
for issue in issues:
issue = get_gcode_issue(issue)
# problem occured getting issue information from url, may be deleted
if issue is None:
continue
if options.skip_closed and (issue['state'] == 'closed'):
continue
# If we're trying to do a complete migration to a fresh Github project,
# and want to keep the issue numbers synced with Google Code's, then we
# need to create dummy closed issues for deleted or missing Google Code
# issues.
if options.synchronize_ids:
for gid in xrange(previous_gid + 1, issue['gid']):
if gid in existing_issues:
continue
output('Creating dummy entry for missing issue %d\n' % gid)
title = 'Google Code skipped issue %d' % gid
body = '_Skipping this issue number to maintain synchronization with Google Code issue IDs._'
footer = GOOGLE_ISSUE_TEMPLATE.format(GOOGLE_URL.format(google_project_name, gid))
body += '\n\n' + footer
github_issue = github_repo.create_issue(title, body = body, labels = [github_label('imported')])
github_issue.edit(state = 'closed')
existing_issues[previous_gid] = github_issue
previous_gid = issue['gid']
# Add the issue and its comments to Github, if we haven't already
if issue['gid'] in existing_issues:
github_issue = existing_issues[issue['gid']]
output('Not adding issue %d (exists)' % issue['gid'])
else:
github_issue = add_issue_to_github(issue)
if github_issue:
add_comments_to_issue(github_issue, issue)
if github_issue.state != issue['state']:
github_issue.edit(state = issue['state'])
output('\n')
log_rate_info()
def get_existing_github_issues():
""" Returns a dictionary of Github issues previously migrated from Google Code.
The result maps Google Code issue numbers to Github issue objects.
"""
output("Retrieving existing Github issues...\n")
id_re = re.compile(GOOGLE_ID_RE % google_project_name)
try:
existing_issues = list(github_repo.get_issues(state='open')) + list(github_repo.get_issues(state='closed'))
existing_count = len(existing_issues)
issue_map = {}
for issue in existing_issues:
id_match = id_re.search(issue.body)
if not id_match:
continue
google_id = int(id_match.group(1))
issue_map[google_id] = issue
labels = [l.name for l in issue.get_labels()]
if not 'imported' in labels:
# TODO we could fix up the label here instead of just warning
logging.warn('Issue missing imported label %s- %r - %s', google_id, labels, issue.title)
imported_count = len(issue_map)
logging.info('Found %d Github issues, %d imported',existing_count,imported_count)
except:
logging.error('Failed to enumerate existing issues')
raise
return issue_map
def log_rate_info():
logging.info('Rate limit (remaining/total) %r', github.rate_limiting)
# Note: this requires extended version of PyGithub from tfmorris/PyGithub repo
#logging.info('Rate limit (remaining/total) %s',repr(github.rate_limit(refresh=True)))
if __name__ == "__main__":
usage = "usage: %prog [options] <google project name> <github username> <github project>"
description = "Migrate all issues from a Google Code project to a Github project."
parser = optparse.OptionParser(usage = usage, description = description)
parser.add_option("-a", "--assign-owner", action = "store_true", dest = "assign_owner", help = "Assign owned issues to the Github user", default = False)
parser.add_option("-d", "--dry-run", action = "store_true", dest = "dry_run", help = "Don't modify anything on Github", default = False)
parser.add_option("-p", "--omit-priority", action = "store_true", dest = "omit_priority", help = "Don't migrate priority labels", default = False)
parser.add_option("-s", "--synchronize-ids", action = "store_true", dest = "synchronize_ids", help = "Ensure that migrated issues keep the same ID", default = False)
parser.add_option("-c", "--google-code-cookie", dest = "google_code_cookie", help = "Cookie to use for Google Code requests. Required to get unmangled names", default = '')
parser.add_option('--skip-closed', action = 'store_true', dest = 'skip_closed', help = 'Skip all closed bugs', default = False)
parser.add_option('--start-at', dest = 'start_at', help = 'Start at the given Google Code issue number', default = None, type = int)
parser.add_option('--migrate-stars', action = 'store_true', dest = 'migrate_stars', help = 'Migrate binned star counts as labels', default = False)
parser.add_option("-v", '--verbose', action = 'store_true', dest = 'verbose', help = 'Print more detailed information during migration', default = False)
parser.add_option("-o", "--only", dest="only", help="Migrate only the specified issues. Pass a single parameter containing a list of issues IDs, like \"1 3 98 110\"", default=None)
options, args = parser.parse_args()
if len(args) != 3:
parser.print_help()
sys.exit()
if options.only:
options.only = options.only.split()
if options.verbose:
logging.basicConfig(level = logging.INFO)
else:
logging.basicConfig(level = logging.ERROR)
label_cache = {} # Cache Github tags, to avoid unnecessary API requests
milestone_cache = {}
google_project_name, github_user_name, github_project = args
while True:
github_password = getpass.getpass("Github password: ")
try:
Github(github_user_name, github_password).get_user().login
break
except Exception:
print "Bad credentials, try again."
github = Github(github_user_name, github_password)
log_rate_info()
github_user = github.get_user()
# If the project name is specified as owner/project, assume that it's owned by either
# a different user than the one we have credentials for, or an organization.
if "/" in github_project:
owner_name, github_project = github_project.split("/")
try:
github_owner = github.get_user(owner_name)
except GithubException:
try:
github_owner = github.get_organization(owner_name)
except GithubException:
github_owner = github_user
else:
github_owner = github_user
github_repo = github_owner.get_repo(github_project)
try:
existing_issues = get_existing_github_issues()
log_rate_info()
process_gcode_issues(existing_issues, options.only)
except Exception:
parser.print_help()
raise