aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichał Górny <mgorny@gentoo.org>2020-09-10 15:55:03 +0200
committerMichał Górny <mgorny@gentoo.org>2020-09-10 15:55:03 +0200
commitce93953b7412c5287fa92e6c5c824595b846f687 (patch)
tree0f0732b5532bff4f167859ec55cfc2465303c4ae /lib-python
parenttest, implement easy part of PyMemoryView_GetContiguous (diff)
downloadpypy-ce93953b7412c5287fa92e6c5c824595b846f687.tar.gz
pypy-ce93953b7412c5287fa92e6c5c824595b846f687.tar.bz2
pypy-ce93953b7412c5287fa92e6c5c824595b846f687.zip
sync httplib2 and urllib2 to cpython 2.7 git with security backports
Sync httplib and urllib2 stdlib modules and the respective tests to the current state of CPython 2.7 git (EOL-ed) + two patches backported from 3.6 that are present in the Gentoo patchset. This has only trivial changes compared to backporting the four relevant patches and should make it easier to apply a final stdlib update post-release. The Gentoo patches can be found as the two top patches on https://gitweb.gentoo.org/fork/cpython.git/log/?h=gentoo-2.7.18-r2
Diffstat (limited to 'lib-python')
-rw-r--r--lib-python/2.7/httplib.py67
-rw-r--r--lib-python/2.7/test/test_httplib.py53
-rw-r--r--lib-python/2.7/test/test_urllib2.py150
-rw-r--r--lib-python/2.7/urllib.py17
-rw-r--r--lib-python/2.7/urllib2.py68
5 files changed, 288 insertions, 67 deletions
diff --git a/lib-python/2.7/httplib.py b/lib-python/2.7/httplib.py
index f3bb22c2b6..e108d6f21c 100644
--- a/lib-python/2.7/httplib.py
+++ b/lib-python/2.7/httplib.py
@@ -247,6 +247,20 @@ _MAXHEADERS = 100
_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
+# These characters are not allowed within HTTP URL paths.
+# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
+# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
+# Prevents CVE-2019-9740. Includes control characters such as \r\n.
+# Restrict non-ASCII characters above \x7f (0x80-0xff).
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\xff]')
+# Arguably only these _should_ allowed:
+# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
+# We are more lenient for assumed real world compatibility purposes.
+
+# These characters are not allowed within HTTP method names
+# to prevent http header injection.
+_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
+
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
@@ -399,7 +413,7 @@ class HTTPResponse:
if not line:
# Presumably, the server closed the connection before
# sending a valid response.
- raise BadStatusLine(line)
+ raise BadStatusLine("No status line received - the server has closed the connection")
try:
[version, status, reason] = line.split(None, 2)
except ValueError:
@@ -735,6 +749,8 @@ class HTTPConnection:
(self.host, self.port) = self._get_hostport(host, port)
+ self._validate_host(self.host)
+
# This is stored as an instance variable to allow unittests
# to replace with a suitable mock
self._create_connection = socket.create_connection
@@ -923,13 +939,17 @@ class HTTPConnection:
else:
raise CannotSendRequest()
- # Save the method we use, we need it later in the response phase
+ self._validate_method(method)
+
+ # Save the method for use later in the response phase
self._method = method
- if not url:
- url = '/'
- hdr = '%s %s %s' % (method, url, self._http_vsn_str)
- self._output(hdr)
+ url = url or '/'
+ self._validate_path(url)
+
+ request = '%s %s %s' % (method, url, self._http_vsn_str)
+
+ self._output(self._encode_request(request))
if self._http_vsn == 11:
# Issue some standard headers for better HTTP/1.1 compliance
@@ -1002,6 +1022,41 @@ class HTTPConnection:
# For HTTP/1.0, the server will assume "not chunked"
pass
+ def _encode_request(self, request):
+ # On Python 2, request is already encoded (default)
+ return request
+
+ def _validate_method(self, method):
+ """Validate a method name for putrequest."""
+ # prevent http header injection
+ match = _contains_disallowed_method_pchar_re.search(method)
+ if match:
+ raise ValueError(
+ "method can't contain control characters. %r (found "
+ "at least %r)" % (method, match.group()))
+
+ def _validate_path(self, url):
+ """Validate a url for putrequest."""
+ # Prevent CVE-2019-9740.
+ match = _contains_disallowed_url_pchar_re.search(url)
+ if match:
+ msg = (
+ "URL can't contain control characters. {url!r} "
+ "(found at least {matched!r})"
+ ).format(matched=match.group(), url=url)
+ raise InvalidURL(msg)
+
+ def _validate_host(self, host):
+ """Validate a host so it doesn't contain control characters."""
+ # Prevent CVE-2019-18348.
+ match = _contains_disallowed_url_pchar_re.search(host)
+ if match:
+ msg = (
+ "URL can't contain control characters. {host!r} "
+ "(found at least {matched!r})"
+ ).format(matched=match.group(), host=host)
+ raise InvalidURL(msg)
+
def putheader(self, header, *values):
"""Send a request header line to the server.
diff --git a/lib-python/2.7/test/test_httplib.py b/lib-python/2.7/test/test_httplib.py
index 7e8b058e8b..e20a0986dc 100644
--- a/lib-python/2.7/test/test_httplib.py
+++ b/lib-python/2.7/test/test_httplib.py
@@ -384,6 +384,26 @@ class HeaderTests(TestCase):
with self.assertRaisesRegexp(ValueError, 'Invalid header'):
conn.putheader(name, value)
+ def test_invalid_method_names(self):
+ methods = (
+ 'GET\r',
+ 'POST\n',
+ 'PUT\n\r',
+ 'POST\nValue',
+ 'POST\nHOST:abc',
+ 'GET\nrHost:abc\n',
+ 'POST\rRemainder:\r',
+ 'GET\rHOST:\n',
+ '\nPUT'
+ )
+
+ for method in methods:
+ with self.assertRaisesRegexp(
+ ValueError, "method can't contain control characters"):
+ conn = httplib.HTTPConnection('example.com')
+ conn.sock = FakeSocket(None)
+ conn.request(method=method, url="/")
+
class BasicTest(TestCase):
def test_status_lines(self):
@@ -702,6 +722,31 @@ class BasicTest(TestCase):
with self.assertRaisesRegexp(socket.error, "Invalid response"):
conn._tunnel()
+ def test_putrequest_override_domain_validation(self):
+ """
+ It should be possible to override the default validation
+ behavior in putrequest (bpo-38216).
+ """
+ class UnsafeHTTPConnection(httplib.HTTPConnection):
+ def _validate_path(self, url):
+ pass
+
+ conn = UnsafeHTTPConnection('example.com')
+ conn.sock = FakeSocket('')
+ conn.putrequest('GET', '/\x00')
+
+ def test_putrequest_override_host_validation(self):
+ class UnsafeHTTPConnection(httplib.HTTPConnection):
+ def _validate_host(self, url):
+ pass
+
+ conn = UnsafeHTTPConnection('example.com\r\n')
+ conn.sock = FakeSocket('')
+ # set skip_host so a ValueError is not raised upon adding the
+ # invalid URL as the value of the "Host:" header
+ conn.putrequest('GET', '/', skip_host=1)
+
+
class OfflineTest(TestCase):
def test_responses(self):
self.assertEqual(httplib.responses[httplib.NOT_FOUND], "Not Found")
@@ -860,7 +905,7 @@ class HTTPSTest(TestCase):
import ssl
test_support.requires('network')
with test_support.transient_internet('self-signed.pythontest.net'):
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(CERT_selfsigned_pythontestdotnet)
h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context)
@@ -874,7 +919,7 @@ class HTTPSTest(TestCase):
import ssl
test_support.requires('network')
with test_support.transient_internet('self-signed.pythontest.net'):
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(CERT_localhost)
h = httplib.HTTPSConnection('self-signed.pythontest.net', 443, context=context)
@@ -895,7 +940,7 @@ class HTTPSTest(TestCase):
# The (valid) cert validates the HTTP hostname
import ssl
server = self.make_server(CERT_localhost)
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.verify_mode = ssl.CERT_REQUIRED
context.load_verify_locations(CERT_localhost)
h = httplib.HTTPSConnection('localhost', server.port, context=context)
@@ -907,7 +952,7 @@ class HTTPSTest(TestCase):
# The (valid) cert doesn't validate the HTTP hostname
import ssl
server = self.make_server(CERT_fakehostname)
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS)
context.verify_mode = ssl.CERT_REQUIRED
context.check_hostname = True
context.load_verify_locations(CERT_fakehostname)
diff --git a/lib-python/2.7/test/test_urllib2.py b/lib-python/2.7/test/test_urllib2.py
index 6783ba41c1..0bdfd179f0 100644
--- a/lib-python/2.7/test/test_urllib2.py
+++ b/lib-python/2.7/test/test_urllib2.py
@@ -15,6 +15,9 @@ try:
except ImportError:
ssl = None
+from test.test_urllib import FakeHTTPMixin
+
+
# XXX
# Request
# CacheFTPHandler (hard to write)
@@ -285,15 +288,12 @@ class MockHTTPResponse:
self.reason = reason
def read(self):
return ''
- def _reuse(self): pass
- def _drop(self): pass
class MockHTTPClass:
def __init__(self):
self.req_headers = []
self.data = None
self.raise_on_endheaders = False
- self.sock = None
self._tunnel_headers = {}
def __call__(self, host, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
@@ -1128,42 +1128,64 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.get_host(), "proxy.example.com:3128")
self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
- def test_basic_auth(self, quote_char='"'):
+ def check_basic_auth(self, headers, realm):
opener = OpenerDirector()
password_manager = MockPasswordManager()
auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
- realm = "ACME Widget Store"
- http_handler = MockHTTPHandler(
- 401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
- (quote_char, realm, quote_char) )
+ body = '\r\n'.join(headers) + '\r\n\r\n'
+ http_handler = MockHTTPHandler(401, body)
opener.add_handler(auth_handler)
opener.add_handler(http_handler)
self._test_basic_auth(opener, auth_handler, "Authorization",
realm, http_handler, password_manager,
"http://acme.example.com/protected",
- "http://acme.example.com/protected"
- )
-
- def test_basic_auth_with_single_quoted_realm(self):
- self.test_basic_auth(quote_char="'")
-
- def test_basic_auth_with_unquoted_realm(self):
- opener = OpenerDirector()
- password_manager = MockPasswordManager()
- auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
- realm = "ACME Widget Store"
- http_handler = MockHTTPHandler(
- 401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm)
- opener.add_handler(auth_handler)
- opener.add_handler(http_handler)
- msg = "Basic Auth Realm was unquoted"
- with test_support.check_warnings((msg, UserWarning)):
- self._test_basic_auth(opener, auth_handler, "Authorization",
- realm, http_handler, password_manager,
- "http://acme.example.com/protected",
- "http://acme.example.com/protected"
- )
-
+ "http://acme.example.com/protected")
+
+ def test_basic_auth(self):
+ realm = "realm2@example.com"
+ realm2 = "realm2@example.com"
+ basic = 'Basic realm="%s"' % (realm,)
+ basic2 = 'Basic realm="%s"' % (realm2,)
+ other_no_realm = 'Otherscheme xxx'
+ digest = ('Digest realm="%s", '
+ 'qop="auth, auth-int", '
+ 'nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", '
+ 'opaque="5ccc069c403ebaf9f0171e9517f40e41"') % (
+ (realm2,))
+ for realm_str in (
+ # test "quote" and 'quote'
+ 'Basic realm="%s"' % (realm,),
+ "Basic realm='%s'" % (realm,),
+
+ # charset is ignored
+ 'Basic realm="%s", charset="UTF-8"' % (realm,),
+
+ # Multiple challenges per header
+ ', '.join((basic, basic2)),
+ ', '.join((basic, other_no_realm)),
+ ', '.join((other_no_realm, basic)),
+ ', '.join((basic, digest)),
+ ', '.join((digest, basic)),
+ ):
+ headers = ['WWW-Authenticate: %s' % (realm_str,)]
+ self.check_basic_auth(headers, realm)
+
+ # no quote: expect a warning
+ with test_support.check_warnings(("Basic Auth Realm was unquoted",
+ UserWarning)):
+ headers = ['WWW-Authenticate: Basic realm=%s' % (realm,)]
+ self.check_basic_auth(headers, realm)
+
+ # Multiple headers: one challenge per header.
+ # Use the first Basic realm.
+ for challenges in (
+ [basic, basic2],
+ [basic, digest],
+ [digest, basic],
+ ):
+ headers = ['WWW-Authenticate: %s' % (challenge,)
+ for challenge in challenges]
+ self.check_basic_auth(headers, realm)
def test_proxy_basic_auth(self):
opener = OpenerDirector()
@@ -1265,7 +1287,7 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(len(http_handler.requests), 1)
self.assertFalse(http_handler.requests[0].has_header(auth_header))
-class MiscTests(unittest.TestCase):
+class MiscTests(unittest.TestCase, FakeHTTPMixin):
def test_build_opener(self):
class MyHTTPHandler(urllib2.HTTPHandler): pass
@@ -1320,6 +1342,70 @@ class MiscTests(unittest.TestCase):
"Unsupported digest authentication algorithm 'invalid'"
)
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_path_with_control_char_rejected(self):
+ for char_no in range(0, 0x21) + range(0x7f, 0x100):
+ char = chr(char_no)
+ schemeless_url = "//localhost:7777/test%s/" % char
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ try:
+ # We explicitly test urllib.request.urlopen() instead of the top
+ # level 'def urlopen()' function defined in this... (quite ugly)
+ # test suite. They use different url opening codepaths. Plain
+ # urlopen uses FancyURLOpener which goes via a codepath that
+ # calls urllib.parse.quote() on the URL which makes all of the
+ # above attempts at injection within the url _path_ safe.
+ escaped_char_repr = repr(char).replace('\\', r'\\')
+ InvalidURL = httplib.InvalidURL
+ with self.assertRaisesRegexp(
+ InvalidURL, "contain control.*" + escaped_char_repr):
+ urllib2.urlopen("http:" + schemeless_url)
+ with self.assertRaisesRegexp(
+ InvalidURL, "contain control.*" + escaped_char_repr):
+ urllib2.urlopen("https:" + schemeless_url)
+ finally:
+ self.unfakehttp()
+
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_path_with_newline_header_injection_rejected(self):
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
+ schemeless_url = "//" + host + ":8080/test/?test=a"
+ try:
+ # We explicitly test urllib2.urlopen() instead of the top
+ # level 'def urlopen()' function defined in this... (quite ugly)
+ # test suite. They use different url opening codepaths. Plain
+ # urlopen uses FancyURLOpener which goes via a codepath that
+ # calls urllib.parse.quote() on the URL which makes all of the
+ # above attempts at injection within the url _path_ safe.
+ InvalidURL = httplib.InvalidURL
+ with self.assertRaisesRegexp(InvalidURL,
+ r"contain control.*\\r.*(found at least . .)"):
+ urllib2.urlopen("http:{}".format(schemeless_url))
+ with self.assertRaisesRegexp(InvalidURL,
+ r"contain control.*\\n"):
+ urllib2.urlopen("https:{}".format(schemeless_url))
+ finally:
+ self.unfakehttp()
+
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_host_with_control_char_rejected(self):
+ for char_no in list(range(0, 0x21)) + [0x7f]:
+ char = chr(char_no)
+ schemeless_url = "//localhost{}/test/".format(char)
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ try:
+ escaped_char_repr = repr(char).replace('\\', r'\\')
+ InvalidURL = httplib.InvalidURL
+ with self.assertRaisesRegexp(InvalidURL,
+ "contain control.*{}".format(escaped_char_repr)):
+ urllib2.urlopen("http:{}".format(schemeless_url))
+ with self.assertRaisesRegexp(InvalidURL,
+ "contain control.*{}".format(escaped_char_repr)):
+ urllib2.urlopen("https:{}".format(schemeless_url))
+ finally:
+ self.unfakehttp()
+
class RequestTests(unittest.TestCase):
diff --git a/lib-python/2.7/urllib.py b/lib-python/2.7/urllib.py
index 33962968b1..87bcd94db5 100644
--- a/lib-python/2.7/urllib.py
+++ b/lib-python/2.7/urllib.py
@@ -203,7 +203,9 @@ class URLopener:
name = 'open_' + urltype
self.type = urltype
name = name.replace('-', '_')
- if not hasattr(self, name):
+
+ # bpo-35907: disallow the file reading with the type not allowed
+ if not hasattr(self, name) or name == 'open_local_file':
if proxy:
return self.open_unknown_proxy(proxy, fullurl, data)
else:
@@ -932,7 +934,13 @@ class ftpwrapper:
return (ftpobj, retrlen)
def endtransfer(self):
+ if not self.busy:
+ return
self.busy = 0
+ try:
+ self.ftp.voidresp()
+ except ftperrors():
+ pass
def close(self):
self.keepalive = False
@@ -1093,8 +1101,7 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- import re
- _hostprog = re.compile('^//([^/?]*)(.*)$')
+ _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
match = _hostprog.match(url)
if match:
@@ -1237,8 +1244,7 @@ def unquote(s):
return s
res = [bits[0]]
append = res.append
- for j in xrange(1, len(bits)):
- item = bits[j]
+ for item in bits[1:]:
try:
append(_hextochr[item[:2]])
append(item[2:])
@@ -1428,6 +1434,7 @@ def proxy_bypass_environment(host, proxies=None):
no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
for name in no_proxy_list:
if name:
+ name = name.lstrip('.') # ignore leading dots
name = re.escape(name)
pattern = r'(.+\.)?%s$' % name
if (re.match(pattern, hostonly, re.I)
diff --git a/lib-python/2.7/urllib2.py b/lib-python/2.7/urllib2.py
index 62d639fa42..b2d1fad6f2 100644
--- a/lib-python/2.7/urllib2.py
+++ b/lib-python/2.7/urllib2.py
@@ -856,8 +856,15 @@ class AbstractBasicAuthHandler:
# allow for double- and single-quoted realm values
# (single quotes are a violation of the RFC, but appear in the wild)
- rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\']?)([^"\']*)\\2', re.I)
+ rx = re.compile('(?:^|,)' # start of the string or ','
+ '[ \t]*' # optional whitespaces
+ '([^ \t]+)' # scheme like "Basic"
+ '[ \t]+' # mandatory whitespaces
+ # realm=xxx
+ # realm='xxx'
+ # realm="xxx"
+ 'realm=(["\']?)([^"\']*)\\2',
+ re.I)
# XXX could pre-emptively send auth info already accepted (RFC 2617,
# end of section 2, and section 1.2 immediately after "credentials"
@@ -869,23 +876,52 @@ class AbstractBasicAuthHandler:
self.passwd = password_mgr
self.add_password = self.passwd.add_password
+ def _parse_realm(self, header):
+ # parse WWW-Authenticate header: accept multiple challenges per header
+ found_challenge = False
+ for mo in AbstractBasicAuthHandler.rx.finditer(header):
+ scheme, quote, realm = mo.groups()
+ if quote not in ['"', "'"]:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 3)
+
+ yield (scheme, realm)
+
+ found_challenge = True
+
+ if not found_challenge:
+ if header:
+ scheme = header.split()[0]
+ else:
+ scheme = ''
+ yield (scheme, None)
def http_error_auth_reqed(self, authreq, host, req, headers):
# host may be an authority (without userinfo) or a URL with an
# authority
- # XXX could be multiple headers
- authreq = headers.get(authreq, None)
+ headers = headers.getheaders(authreq)
+ if not headers:
+ # no header found
+ return
- if authreq:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if quote not in ['"', "'"]:
- warnings.warn("Basic Auth Realm was unquoted",
- UserWarning, 2)
- if scheme.lower() == 'basic':
+ unsupported = None
+ for header in headers:
+ for scheme, realm in self._parse_realm(header):
+ if scheme.lower() != 'basic':
+ unsupported = scheme
+ continue
+
+ if realm is not None:
+ # Use the first matching Basic challenge.
+ # Ignore following challenges even if they use the Basic
+ # scheme.
return self.retry_http_basic_auth(host, req, realm)
+ if unsupported is not None:
+ raise ValueError("AbstractBasicAuthHandler does not "
+ "support the following scheme: %r"
+ % (scheme,))
+
def retry_http_basic_auth(self, host, req, realm):
user, pw = self.passwd.find_user_password(realm, host)
if pw is not None:
@@ -1201,12 +1237,6 @@ class AbstractHTTPHandler(BaseHandler):
r = h.getresponse(buffering=True)
except TypeError: # buffering kw not supported
r = h.getresponse()
- # If the server does not send us a 'Connection: close' header,
- # HTTPConnection assumes the socket should be left open. Manually
- # mark the socket to be closed when this response object goes away.
- if h.sock:
- h.sock.close()
- h.sock = None
# Pick apart the HTTPResponse object to get the addinfourl
# object initialized properly.
@@ -1220,8 +1250,6 @@ class AbstractHTTPHandler(BaseHandler):
# out of socket._fileobject() and into a base class.
r.recv = r.read
- r._reuse = lambda: None
- r._drop = lambda: None
fp = socket._fileobject(r, close=True)
resp = addinfourl(fp, r.msg, req.get_full_url())