1 files changed, 713 insertions, 0 deletions
diff --git a/tools/bug_tool/ClientCookie/_urllib2_support.py b/tools/bug_tool/ClientCookie/_urllib2_support.py
new file mode 100644
index 0000000000..d767d08b25
--- /dev/null
+++ b/tools/bug_tool/ClientCookie/_urllib2_support.py
@@ -0,0 +1,713 @@
+"""Integration with Python standard library module urllib2.
+
+Also includes a redirection bugfix, support for parsing HTML HEAD blocks for
+the META HTTP-EQUIV tag contents, and following Refresh header redirects.
+
+Copyright 2002-2003 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it under
+the terms of the BSD License (see the file COPYING included with the
+distribution).
+
+"""
+
+import copy, time
+
+import ClientCookie
+from _ClientCookie import CookieJar, request_host
+from _Util import isstringlike
+from _Debug import _debug
+
+try: True
+except NameError:
+    True = 1
+    False = 0
+
+CHUNK = 1024  # size of chunks fed to HTML HEAD parser, in bytes
+
+try:
+    from urllib2 import AbstractHTTPHandler
+except ImportError:
+    pass
+else:
+    import urlparse, urllib2, urllib, httplib, htmllib, formatter, string
+    from urllib2 import URLError, HTTPError
+    import types, string, socket
+    from cStringIO import StringIO
+    from _Util import seek_wrapper
+    try:
+        import threading
+        _threading = threading; del threading
+    except ImportError:
+        import dummy_threading
+        _threading = dummy_threading; del dummy_threading
+
+    # This fixes a bug in urllib2 as of Python 2.1.3 and 2.2.2
+    #  (http://www.python.org/sf/549151)
+    # 2.2.3 is broken here (my fault!), 2.3 is fixed.
+    class HTTPRedirectHandler(urllib2.BaseHandler):
+        # maximum number of redirections before assuming we're in a loop
+        max_redirections = 10
+
+        # Implementation notes:
+
+        # To avoid the server sending us into an infinite loop, the request
+        # object needs to track what URLs we have already seen.  Do this by
+        # adding a handler-specific attribute to the Request object.  The value
+        # of the dict is used to count the number of times the same url has
+        # been visited.  This is needed because this isn't necessarily a loop:
+        # there is more than one way to redirect (Refresh, 302, 303, 307).
+
+        # Another handler-specific Request attribute, original_url, is used to
+        # remember the URL of the original request so that it is possible to
+        # decide whether or not RFC 2965 cookies should be turned on during
+        # redirect.
+
+        # Always unhandled redirection codes:
+        # 300 Multiple Choices: should not handle this here.
+        # 304 Not Modified: no need to handle here: only of interest to caches
+        #     that do conditional GETs
+        # 305 Use Proxy: probably not worth dealing with here
+        # 306 Unused: what was this for in the previous versions of protocol??
+
+        def redirect_request(self, newurl, req, fp, code, msg, headers):
+            """Return a Request or None in response to a redirect.
+
+            This is called by the http_error_30x methods when a redirection
+            response is received.  If a redirection should take place, return a
+            new Request to allow http_error_30x to perform the redirect;
+            otherwise, return None to indicate that an HTTPError should be
+            raised.
+
+            """
+            if code in (301, 302, 303) or (code == 307 and not req.has_data()):
+                # Strictly (according to RFC 2616), 301 or 302 in response to
+                # a POST MUST NOT cause a redirection without confirmation
+                # from the user (of urllib2, in this case).  In practice,
+                # essentially all clients do redirect in this case, so we do
+                # the same.
+                return Request(newurl, headers=req.headers)
+            else:
+                raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+        def http_error_302(self, req, fp, code, msg, headers):
+            if headers.has_key('location'):
+                newurl = headers['location']
+            elif headers.has_key('uri'):
+                newurl = headers['uri']
+            else:
+                return
+            newurl = urlparse.urljoin(req.get_full_url(), newurl)
+
+            # XXX Probably want to forget about the state of the current
+            # request, although that might interact poorly with other
+            # handlers that also use handler-specific request attributes
+            new = self.redirect_request(newurl, req, fp, code, msg, headers)
+            if new is None:
+                return
+
+            # remember where we started from
+            if hasattr(req, "original_url"):
+                new.original_url = req.original_url
+            else:
+                new.original_url = req.get_full_url()
+
+            # loop detection
+            # .error_302_dict[(url, code)] is number of times url
+            # previously visited as a result of a redirection with this
+            # code (error_30x_dict would be a better name).
+            new.origin_req_host = req.origin_req_host
+            if not hasattr(req, 'error_302_dict'):
+                new.error_302_dict = req.error_302_dict = {(newurl, code): 1}
+            else:
+                ed = new.error_302_dict = req.error_302_dict
+                nr_visits = ed.get((newurl, code), 0)
+                # Refreshes generate fake 302s, so we can hit the same URL as
+                # a result of the same redirection code twice without
+                # necessarily being in a loop!  So, allow two visits to each
+                # URL as a result of each redirection code.
+                if len(ed) < self.max_redirections and nr_visits < 2:
+                    ed[(newurl, code)] = nr_visits + 1
+                else:
+                    raise HTTPError(req.get_full_url(), code,
+                                    self.inf_msg + msg, headers, fp)
+
+            if ClientCookie.REDIRECT_DEBUG:
+                _debug("redirecting to %s", newurl)
+
+            # Don't close the fp until we are sure that we won't use it
+            # with HTTPError.  
+            fp.read()
+            fp.close()
+
+            return self.parent.open(new)
+
+        http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+        inf_msg = "The HTTP server returned a redirect error that would " \
+                  "lead to an infinite loop.\n" \
+                  "The last 30x error message was:\n"
+
+
+    class Request(urllib2.Request):
+        def __init__(self, url, data=None, headers={}):
+            urllib2.Request.__init__(self, url, data, headers)
+            self.unredirected_hdrs = {}
+
+        def add_unredirected_header(self, key, val):
+            # these headers do not persist from one request to the next in a chain
+            # of requests
+            self.unredirected_hdrs[string.capitalize(key)] = val
+
+        def has_key(self, header_name):
+            if (self.headers.has_key(header_name) or
+                self.unredirected_hdrs.has_key(header_name)):
+                return True
+            return False
+
+        def get(self, header_name, failobj=None):
+            if self.headers.has_key(header_name):
+                return self.headers[header_name]
+            if self.unredirected_headers.has_key(header_name):
+                return self.unredirected_headers[header_name]
+            return failobj
+
+
+    class BaseProcessor:
+        processor_order = 500
+
+        def add_parent(self, parent):
+            self.parent = parent
+        def close(self):
+            self.parent = None
+        def __lt__(self, other):
+            if not hasattr(other, "processor_order"):
+                return True
+            return self.processor_order < other.processor_order
+
+    class HTTPRequestUpgradeProcessor(BaseProcessor):
+        # upgrade Request to class with support for headers that don't get
+        # redirected
+        processor_order = 0  # before anything else
+
+        def http_request(self, request):
+            if not hasattr(request, "add_unredirected_header"):
+                request = Request(request._Request__original, request.data,
+                                  request.headers)
+            return request
+
+        https_request = http_request
+
+    class HTTPEquivProcessor(BaseProcessor):
+        """Append META HTTP-EQUIV headers to regular HTTP headers."""
+        def http_response(self, request, response):
+            if not hasattr(response, "seek"):
+                response = seek_wrapper(response)
+            # grab HTTP-EQUIV headers and add them to the true HTTP headers
+            headers = response.info()
+            for hdr, val in parse_head(response):
+                headers[hdr] = val
+            response.seek(0)
+            return response
+
+        https_response = http_response
+
+    # XXX ATM this only takes notice of http responses -- probably
+    #   should be independent of protocol scheme (http, ftp, etc.)
+    class SeekableProcessor(BaseProcessor):
+        """Make responses seekable."""
+
+        def http_response(self, request, response):
+            if not hasattr(response, "seek"):
+                return seek_wrapper(response)
+            return response
+
+        https_response = http_response
+
+    # XXX if this gets added to urllib2, unverifiable would end up as an
+    #   attribute on Request.
+    class HTTPCookieProcessor(BaseProcessor):
+        """Handle HTTP cookies."""
+        def __init__(self, cookies=None):
+            if cookies is None:
+                cookies = CookieJar()
+            self.cookies = cookies
+
+        def _unverifiable(self, request):
+            if hasattr(request, "error_302_dict") and request.error_302_dict:
+                redirect = True
+            else:
+                redirect = False
+            if (redirect or
+                (hasattr(request, "unverifiable") and request.unverifiable)):
+                unverifiable = True
+            else:
+                unverifiable = False
+            return unverifiable
+
+        def http_request(self, request):
+            unverifiable = self._unverifiable(request)
+            if not unverifiable:
+                # Stuff request-host of this origin transaction into Request
+                # object, because we need to know it to know whether cookies
+                # should be in operation during derived requests (redirects,
+                # specifically -- including refreshes).
+                request.origin_req_host = request_host(request)
+            self.cookies.add_cookie_header(request, unverifiable)
+            return request
+
+        def http_response(self, request, response): 
+            unverifiable = self._unverifiable(request)
+            self.cookies.extract_cookies(response, request, unverifiable)
+            return response
+
+        https_request = http_request
+        https_response = http_response
+
+    class HTTPRefererProcessor(BaseProcessor):
+        """Add Referer header to requests.
+
+        This only makes sense if you use each RefererProcessor for a single
+        chain of requests only (so, for example, if you use a single
+        HTTPRefererProcessor to fetch a series of URLs extracted from a single
+        page, this will break).
+
+        """
+        def __init__(self):
+            self.referer = None
+
+        def http_request(self, request):
+            if ((self.referer is not None) and
+                not request.has_key("Referer")):
+                request.add_unredirected_header("Referer", self.referer)
+            return request
+
+        def http_response(self, request, response):
+            self.referer = response.geturl()
+            return response
+
+        https_request = http_request
+        https_response = http_response
+
+    class HTTPStandardHeadersProcessor(BaseProcessor):
+        def http_request(self, request):
+            host = request.get_host()
+            if not host:
+                raise URLError('no host given')
+
+            if request.has_data():  # POST
+                data = request.get_data()
+                if not request.has_key('Content-type'):
+                    request.add_unredirected_header(
+                        'Content-type',
+                        'application/x-www-form-urlencoded')
+                if not request.has_key('Content-length'):
+                    request.add_unredirected_header(
+                        'Content-length', '%d' % len(data))
+
+            scheme, sel = urllib.splittype(request.get_selector())
+            sel_host, sel_path = urllib.splithost(sel)
+            if not request.has_key('Host'):
+                request.add_unredirected_header('Host', sel_host or host)
+            for name, value in self.parent.addheaders:
+                name = string.capitalize(name)
+                if not request.has_key(name):
+                    request.add_unredirected_header(name, value)
+
+            return request
+
+        https_request = http_request
+
+    class HTTPResponseDebugProcessor(BaseProcessor):
+        processor_order = 900  # before redirections, after everything else
+
+        def http_response(self, request, response):
+            if not hasattr(response, "seek"):
+                response = seek_wrapper(response)
+            _debug(response.read())
+            _debug("*****************************************************")
+            response.seek(0)
+            return response
+
+        https_response = http_response
+
+    class HTTPRefreshProcessor(BaseProcessor):
+        """Perform HTTP Refresh redirections.
+
+        Note that if a non-200 HTTP code has occurred (for example, a 30x
+        redirect), this processor will do nothing.
+
+        By default, only zero-time Refresh headers are redirected.  Use the
+        max_time constructor argument to allow Refresh with longer pauses.
+        Use the honor_time argument to control whether the requested pause
+        is honoured (with a time.sleep()) or skipped in favour of immediate
+        redirection.
+
+        """
+        processor_order = 1000
+
+        def __init__(self, max_time=0, honor_time=True):
+            self.max_time = max_time
+            self.honor_time = honor_time
+
+        def http_response(self, request, response):
+            code, msg, hdrs = response.code, response.msg, response.info()
+
+            if code == 200 and hdrs.has_key("refresh"):
+                refresh = hdrs["refresh"]
+                i = string.find(refresh, ";")
+                if i != -1:
+                    pause, newurl_spec = refresh[:i], refresh[i+1:]
+                    i = string.find(newurl_spec, "=")
+                    if i != -1:
+                        pause = int(pause)
+                        if pause <= self.max_time:
+                            if pause != 0 and self.honor_time:
+                                time.sleep(pause)
+                            newurl = newurl_spec[i+1:]
+                            # fake a 302 response
+                            hdrs["location"] = newurl
+                            response = self.parent.error(
+                                'http', request, response, 302, msg, hdrs)
+
+            return response
+
+        https_response = http_response
+
+    class HTTPErrorProcessor(BaseProcessor):
+        """Process non-200 HTTP error responses.
+
+        This just passes the job on to the Handler.<proto>_error_<code>
+        methods, via the OpenerDirector.error method.
+
+        """
+        processor_order = 1000
+
+        def http_response(self, request, response):
+            code, msg, hdrs = response.code, response.msg, response.info()
+
+            if code != 200:
+                response = self.parent.error(
+                    'http', request, response, code, msg, hdrs)
+
+            return response
+
+        https_response = http_response
+
+
+    class OpenerDirector(urllib2.OpenerDirector):
+        # XXX might be useful to have remove_processor, too (say you want to
+        #   set a new RefererProcessor, but keep the old CookieProcessor --
+        #   could always just create everything anew, though (using old
+        #   CookieJar object to create CookieProcessor)
+        def __init__(self):
+            urllib2.OpenerDirector.__init__(self)
+            #self.processors = []
+            self.process_response = {}
+            self.process_request = {}
+
+        def add_handler(self, handler):
+            # XXX
+            # tidy me
+            # the same handler could be added twice without detection
+            added = 0
+            for meth in dir(handler.__class__):
+                if meth[-5:] == '_open':
+                    protocol = meth[:-5]
+                    if self.handle_open.has_key(protocol):
+                        self.handle_open[protocol].append(handler)
+                        self.handle_open[protocol].sort()
+                    else:
+                        self.handle_open[protocol] = [handler]
+                    added = 1
+                    continue
+                i = string.find(meth, '_')
+                j = string.find(meth[i+1:], '_') + i + 1
+                if j != -1 and meth[i+1:j] == 'error':
+                    proto = meth[:i]
+                    kind = meth[j+1:]
+                    try:
+                        kind = int(kind)
+                    except ValueError:
+                        pass
+                    dict = self.handle_error.get(proto, {})
+                    if dict.has_key(kind):
+                        dict[kind].append(handler)
+                        dict[kind].sort()
+                    else:
+                        dict[kind] = [handler]
+                    self.handle_error[proto] = dict
+                    added = 1
+                    continue
+                if meth[-9:] == "_response":
+                    protocol = meth[:-9]
+                    if self.process_response.has_key(protocol):
+                        self.process_response[protocol].append(handler)
+                        self.process_response[protocol].sort()
+                    else:
+                        self.process_response[protocol] = [handler]
+                    added = True
+                    continue
+                elif meth[-8:] == "_request":
+                    protocol = meth[:-8]
+                    if self.process_request.has_key(protocol):
+                        self.process_request[protocol].append(handler)
+                        self.process_request[protocol].sort()
+                    else:
+                        self.process_request[protocol] = [handler]
+                    added = True
+                    continue
+            if added:
+                self.handlers.append(handler)
+                self.handlers.sort()
+                handler.add_parent(self)
+
+##         def add_processor(self, processor):
+##             added = False
+##             for meth in dir(processor):
+##                 if meth[-9:] == "_response":
+##                     protocol = meth[:-9]
+##                     if self.process_response.has_key(protocol):
+##                         self.process_response[protocol].append(processor)
+##                         self.process_response[protocol].sort()
+##                     else:
+##                         self.process_response[protocol] = [processor]
+##                     added = True
+##                     continue
+##                 elif meth[-8:] == "_request":
+##                     protocol = meth[:-8]
+##                     if self.process_request.has_key(protocol):
+##                         self.process_request[protocol].append(processor)
+##                         self.process_request[protocol].sort()
+##                     else:
+##                         self.process_request[protocol] = [processor]
+##                     added = True
+##                     continue
+##             if added:
+##                 self.processors.append(processor)
+##                 # XXX base class sorts .handlers, but I have no idea why
+##                 #self.processors.sort()
+##                 processor.add_parent(self)
+
+        def _request(self, url_or_req, data):
+            if isstringlike(url_or_req):
+                req = Request(url_or_req, data)
+            else:
+                # already a urllib2.Request instance
+                req = url_or_req
+                if data is not None:
+                    req.add_data(data)
+            return req
+
+        def open(self, fullurl, data=None):
+            req = self._request(fullurl, data)
+            type = req.get_type()
+
+            # pre-process request
+            # XXX should we allow a Processor to change the type (URL
+            #   scheme) of the request?
+            meth_name = type+"_request"
+            for processor in self.process_request.get(type, []):
+                meth = getattr(processor, meth_name)
+                req = meth(req)
+
+            response = urllib2.OpenerDirector.open(self, req, data)
+
+            # post-process response
+            meth_name = type+"_response"
+            for processor in self.process_response.get(type, []):
+                meth = getattr(processor, meth_name)
+                response = meth(req, response)
+
+            return response
+
+##         def close(self):
+##             urllib2.OpenerDirector.close(self)
+##             for processor in self.processors:
+##                 processor.close()
+##             self.processors = []
+
+
+    # Note the absence of redirect and header-adding code here
+    # (AbstractHTTPHandler), and the lack of other clutter that would be
+    # here without Processors.
+    class AbstractHTTPHandler(urllib2.BaseHandler):
+        def do_open(self, http_class, req):
+            host = req.get_host()
+            if not host:
+                raise URLError('no host given')
+
+            h = http_class(host) # will parse host:port
+            if ClientCookie.HTTP_DEBUG:
+                h.set_debuglevel(1)
+
+            if req.has_data():
+                h.putrequest('POST', req.get_selector())
+            else:
+                h.putrequest('GET', req.get_selector())
+
+            for k, v in req.headers.items():
+                h.putheader(k, v)
+            for k, v in req.unredirected_hdrs.items():
+                h.putheader(k, v)
+
+            # httplib will attempt to connect() here.  be prepared
+            # to convert a socket error to a URLError.
+            try:
+                h.endheaders()
+            except socket.error, err:
+                raise URLError(err)
+            if req.has_data():
+                h.send(req.get_data())
+
+            code, msg, hdrs = h.getreply()
+            fp = h.getfile()
+
+            response = urllib.addinfourl(fp, hdrs, req.get_full_url())
+            response.code = code
+            response.msg = msg
+
+            return response
+
+
+    # XXX would self.reset() work, instead of raising this exception?
+    class EndOfHeadError(Exception): pass
+    class HeadParser(htmllib.HTMLParser):
+        # only these elements are allowed in or before HEAD of document
+        head_elems = ("html", "head",
+                      "title", "base",
+                      "script", "style", "meta", "link", "object")
+        def __init__(self):
+            htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
+            self.http_equiv = []
+
+        def start_meta(self, attrs):
+            http_equiv = content = None
+            for key, value in attrs:
+                if key == "http-equiv":
+                    http_equiv = value
+                elif key == "content":
+                    content = value
+            if http_equiv is not None:
+                self.http_equiv.append((http_equiv, content))
+
+        def handle_starttag(self, tag, method, attrs):
+            if tag in self.head_elems:
+                method(attrs)
+            else:
+                raise EndOfHeadError()
+
+        def handle_endtag(self, tag, method):
+            if tag in self.head_elems:
+                method()
+            else:
+                raise EndOfHeadError()
+
+        def end_head(self):
+            raise EndOfHeadError()
+
+    def parse_head(file):
+        """Return a list of key, value pairs."""
+        hp = HeadParser()
+        while 1:
+            data = file.read(CHUNK)
+            try:
+                hp.feed(data)
+            except EndOfHeadError:
+                break
+            if len(data) != CHUNK:
+                # this should only happen if there is no HTML body, or if
+                # CHUNK is big
+                break
+        return hp.http_equiv
+
+
+    class HTTPHandler(AbstractHTTPHandler):
+        def http_open(self, req):
+            return self.do_open(httplib.HTTP, req)
+
+    if hasattr(httplib, 'HTTPS'):
+        class HTTPSHandler(AbstractHTTPHandler):
+            def https_open(self, req):
+                return self.do_open(httplib.HTTPS, req)
+
+
+    def build_opener(*handlers):
+        """Create an opener object from a list of handlers and processors.
+
+        The opener will use several default handlers and processors, including
+        support for HTTP and FTP.  If there is a ProxyHandler, it must be at the
+        front of the list of handlers.  (Yuck.  This is fixed in 2.3.)
+
+        If any of the handlers passed as arguments are subclasses of the
+        default handlers, the default handlers will not be used.
+        """
+        opener = OpenerDirector()
+        default_classes = [
+            # handlers
+            urllib2.ProxyHandler,
+            urllib2.UnknownHandler,
+            HTTPHandler,  # from this module (derived from new AbstractHTTPHandler)
+            urllib2.HTTPDefaultErrorHandler,
+            HTTPRedirectHandler,  # from this module (bugfixed)
+            urllib2.FTPHandler,
+            urllib2.FileHandler,
+            # processors
+            HTTPRequestUpgradeProcessor,
+            #HTTPEquivProcessor,
+            #SeekableProcessor,
+            HTTPCookieProcessor,
+            #HTTPRefererProcessor,
+            HTTPStandardHeadersProcessor,
+            #HTTPRefreshProcessor,
+            HTTPErrorProcessor
+            ]
+        if hasattr(httplib, 'HTTPS'):
+            default_classes.append(HTTPSHandler)
+        skip = []
+        for klass in default_classes:
+            for check in handlers:
+                if type(check) == types.ClassType:
+                    if issubclass(check, klass):
+                        skip.append(klass)
+                elif type(check) == types.InstanceType:
+                    if isinstance(check, klass):
+                        skip.append(klass)
+        for klass in skip:
+            default_classes.remove(klass)
+
+        to_add = []
+        for klass in default_classes:
+            to_add.append(klass())
+        for h in handlers:
+            if type(h) == types.ClassType:
+                h = h()
+            to_add.append(h)
+
+        for instance in to_add:
+            opener.add_handler(instance)
+##             # yuck
+##             if hasattr(instance, "processor_order"):
+##                 opener.add_processor(instance)
+##             else:
+##                 opener.add_handler(instance)
+
+        return opener
+
+
+    _opener = None
+    urlopen_lock = _threading.Lock()
+    def urlopen(url, data=None):
+        global _opener
+        if _opener is None:
+            urlopen_lock.acquire()
+            try:
+                if _opener is None:
+                    _opener = build_opener()
+            finally:
+                urlopen_lock.release()
+        return _opener.open(url, data)
+
+    def install_opener(opener):
+        global _opener
+        _opener = opener