From 225f24da9cc7a54ca293d23006fd575a9151ec02 Mon Sep 17 00:00:00 2001
From: Heikki Toivonen <heikki@heikkitoivonen.net>
Date: Tue, 5 Dec 2006 19:24:14 +0000
Subject: [PATCH] Bug 6975, CONNECT proxy for httpslib and m2urllib2, by James
 Bowes.

git-svn-id: http://svn.osafoundation.org/m2crypto/trunk@486 2715db39-9adf-0310-9c64-84f055769b4b
---
 CHANGES               |   1 +
 M2Crypto/httpslib.py  | 111 ++++++++++++++++++++++++++++++++++++++++++
 M2Crypto/m2urllib2.py |  15 ++++--
 3 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/CHANGES b/CHANGES
index 10fb9d9..f94f434 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,7 @@
 0.17
 ----
 - Added m2urllib2, by James Bowes (python 2.4 and later, at least for now)
+- Added CONNECT proxy for httpslib and m2urllib2, by James Bowes
 - Fixed m2urllib.open_https to return the response headers, otherwise code
   that relied on that would break (for example msnlib-3.5), by Arno bakker
 - Fixed twisted wrapper to work with >16kb BIO buffers, by Martin Paljak
diff --git a/M2Crypto/httpslib.py b/M2Crypto/httpslib.py
index b5fb638..dbc9922 100644
--- a/M2Crypto/httpslib.py
+++ b/M2Crypto/httpslib.py
@@ -3,6 +3,10 @@
 Copyright (c) 1999-2004 Ng Pheng Siong. All rights reserved."""
 
 import string, sys
+import socket
+import urllib
+import base64
+
 from httplib import *
 from httplib import HTTPS_PORT # This is not imported with just '*'
 import SSL
@@ -72,3 +76,110 @@ class HTTPS(HTTP):
             self.ssl_ctx = SSL.Context('sslv23')
         assert isinstance(self._conn, HTTPSConnection)
         self._conn.ssl_ctx = self.ssl_ctx
+
+
+class ProxyHTTPSConnection(HTTPSConnection):
+
+    """
+    An HTTPS Connection that uses a proxy and the CONNECT request.
+
+    When the connection is initiated, CONNECT is first sent to the proxy (along
+    with authorization headers, if supplied). If successful, an SSL connection
+    will be established over the socket through the proxy and to the target
+    host.
+
+    Finally, the actual request is sent over the SSL connection tunneling
+    through the proxy.
+    """
+
+    _ports = {'http' : 80, 'https' : 443}
+    _AUTH_HEADER = "Proxy-Authorization"
+
+    def __init__(self, host, port=None, strict=None, username=None,
+        password=None, **ssl):
+        """
+        Create the ProxyHTTPSConnection object.
+
+        host and port are the hostname and port number of the proxy server.
+        """
+        HTTPSConnection.__init__(self, host, port, strict, **ssl)
+
+        self._username = username
+        self._password = password
+        self._proxy_auth = None
+
+    def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
+        #putrequest is called before connect, so can interpret url and get
+        #real host/port to be used to make CONNECT request to proxy
+        proto, rest = urllib.splittype(url)
+        if proto is None:
+            raise ValueError, "unknown URL type: %s" % url
+        #get host
+        host, rest = urllib.splithost(rest)
+        #try to get port
+        host, port = urllib.splitport(host)
+        #if port is not defined try to get from proto
+        if port is None:
+            try:
+                port = self._ports[proto]
+            except KeyError:
+                raise ValueError, "unknown protocol for: %s" % url
+        self._real_host = host
+        self._real_port = port
+        HTTPSConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
+
+    def putheader(self, header, value):
+        # Store the auth header if passed in.
+        if header.lower() == self._AUTH_HEADER.lower():
+            self._proxy_auth = value
+        else:
+            HTTPSConnection.putheader(self, header, value)
+
+    def endheaders(self):
+        # We've recieved all of hte headers. Use the supplied username
+        # and password for authorization, possibly overriding the authstring
+        # supplied in the headers.
+        if not self._proxy_auth:
+            self._proxy_auth = self._encode_auth()
+
+        HTTPSConnection.endheaders(self)
+
+    def connect(self):
+        HTTPConnection.connect(self)
+
+        #send proxy CONNECT request
+        self.sock.sendall(self._get_connect_msg())
+        response = HTTPResponse(self.sock)
+        response.begin()
+        
+        code = response.status
+        if code != 200:
+            #proxy returned and error, abort connection, and raise exception
+            self.close()
+            raise socket.error, "Proxy connection failed: %d" % code
+       
+        self._start_ssl()
+
+    def _get_connect_msg(self):
+        """ Return an HTTP CONNECT request to send to the proxy. """
+        msg = "CONNECT %s:%d HTTP/1.1\r\n" % (self._real_host, self._real_port)
+        if self._proxy_auth:
+            msg = msg + "%s: %s\r\n" % (self._AUTH_HEADER, self._proxy_auth) 
+        msg = msg + "\r\n"
+        return msg
+
+    def _start_ssl(self):
+        """ Make this connection's socket SSL-aware. """
+        self.sock = SSL.Connection(self.ssl_ctx, self.sock)
+        self.sock.setup_ssl()
+        self.sock.set_connect_state()
+        self.sock.connect_ssl()
+
+    def _encode_auth(self):
+        """ Encode the username and password for use in the auth header. """
+        if not (self._username and self._password):
+            return None
+        # Authenticated proxy
+        userpass = "%s:%s" % (self._username, self._password)
+        enc_userpass = base64.encodestring(userpass).replace("\n", "")
+        return "Basic %s" % enc_userpass
diff --git a/M2Crypto/m2urllib2.py b/M2Crypto/m2urllib2.py
index 9ae5014..3dc700a 100644
--- a/M2Crypto/m2urllib2.py
+++ b/M2Crypto/m2urllib2.py
@@ -6,11 +6,13 @@ Code from urllib2 is Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006
 Python Software Foundation; All Rights Reserved
 
 Summary of changes:
+ * Use an HTTPSProxyConnection if the request is going through a proxy.
  * Add the SSL context to the https connection when performing https_open.
  * Add the M2Crypto HTTPSHandler when building a default opener.
 """
 
 from urllib2 import *
+import urlparse
 
 import SSL
 import httpslib
@@ -39,8 +41,15 @@ class HTTPSHandler(AbstractHTTPHandler):
         if not host:
             raise URLError('no host given')
 
-        # Our change: add the ssl context.
-        h = httpslib.HTTPSConnection(host = host, ssl_context = self.ctx)
+        # Our change: Check to see if we're using a proxy.
+        # Then create an appropriate ssl-aware connection.
+        full_url = req.get_full_url() 
+        target_host = urlparse.urlparse(full_url)[1]
+
+        if (target_host != host):
+            h = httpslib.ProxyHTTPSConnection(host = host, ssl_context = self.ctx)
+        else:
+            h = httpslib.HTTPSConnection(host = host, ssl_context = self.ctx)
         # End our change
         h.set_debuglevel(self._debuglevel)
 
@@ -54,7 +63,7 @@ class HTTPSHandler(AbstractHTTPHandler):
         # request.
         headers["Connection"] = "close"
         try:
-            h.request(req.get_method(), req.get_selector(), req.data, headers)
+            h.request(req.get_method(), req.get_full_url(), req.data, headers)
             r = h.getresponse()
         except socket.error, err: # XXX what error?
             raise URLError(err)
-- 
GitLab