-
Notifications
You must be signed in to change notification settings - Fork 4
/
utfurl.py
executable file
·45 lines (38 loc) · 1.42 KB
/
utfurl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python2
# -*- coding: utf8 -*-
#
# This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 Unported License.
# http://creativecommons.org/licenses/by-sa/3.0/
#
# Based on code from http://stackoverflow.com/questions/804336
#
import urlparse
import urllib
def fixurl(url):
# turn string into unicode
if not isinstance(url, unicode):
url = url.decode('utf8')
# parse it
parsed = urlparse.urlsplit(url)
# divide the netloc further
userpass, at, hostport = parsed.netloc.rpartition('@')
user, colon1, pass_ = userpass.partition(':')
host, colon2, port = hostport.partition(':')
# encode each component
scheme = parsed.scheme.encode('utf8')
user = urllib.quote(user.encode('utf8'))
colon1 = colon1.encode('utf8')
pass_ = urllib.quote(pass_.encode('utf8'))
at = at.encode('utf8')
host = host.encode('idna')
colon2 = colon2.encode('utf8')
port = port.encode('utf8')
path = '/'.join( # could be encoded slashes!
urllib.quote(urllib.unquote(pce).encode('utf8'), '')
for pce in parsed.path.split('/')
)
query = urllib.quote(urllib.unquote(parsed.query).encode('utf8'), '=&?/')
fragment = urllib.quote(urllib.unquote(parsed.fragment).encode('utf8'))
# put it back together
netloc = ''.join((user, colon1, pass_, at, host, colon2, port))
return urlparse.urlunsplit((scheme, netloc, path, query, fragment))