summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorRobin Gareus <robin@gareus.org>2019-02-28 18:21:05 +0100
committerRobin Gareus <robin@gareus.org>2019-02-28 18:21:05 +0100
commit06911bd7e25a156ff4ee57ff764a6b1ec9128c19 (patch)
tree43947cfb7877f6f7b01559e11fe8c387067c916a /tools
parentc83ba533990f15c95b167d5f6e5b4762966d050d (diff)
Move bug-tool into the Python Museum
Diffstat (limited to 'tools')
-rw-r--r--tools/bug_tool/ClientCookie/_ClientCookie.py1833
-rw-r--r--tools/bug_tool/ClientCookie/_Debug.py9
-rw-r--r--tools/bug_tool/ClientCookie/_HeadersUtil.py224
-rw-r--r--tools/bug_tool/ClientCookie/_MSIECookieJar.py377
-rw-r--r--tools/bug_tool/ClientCookie/_MozillaCookieJar.py171
-rw-r--r--tools/bug_tool/ClientCookie/_Util.py459
-rw-r--r--tools/bug_tool/ClientCookie/__init__.py49
-rw-r--r--tools/bug_tool/ClientCookie/_urllib2_support.py713
-rw-r--r--tools/bug_tool/ClientForm.py2699
-rwxr-xr-xtools/bug_tool/ardour_bugs349
10 files changed, 0 insertions, 6883 deletions
diff --git a/tools/bug_tool/ClientCookie/_ClientCookie.py b/tools/bug_tool/ClientCookie/_ClientCookie.py
deleted file mode 100644
index abcb2c86f1..0000000000
--- a/tools/bug_tool/ClientCookie/_ClientCookie.py
+++ /dev/null
@@ -1,1833 +0,0 @@
-"""HTTP cookie handling for web clients, plus some other stuff.
-
-This module originally developed from my port of Gisle Aas' Perl module
-HTTP::Cookies, from the libwww-perl library.
-
-Docstrings, comments and debug strings in this code refer to the
-attributes of the HTTP cookie system as cookie-attributes, to distinguish
-them clearly from Python attributes.
-
-Comments to John J Lee <jjl@pobox.com>.
-
-
-Copyright 2002-2003 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-VERSION = "0.4.9"
-
-
-# Public health warning: anyone who thought 'cookies are simple, aren't they?',
-# run away now :-(
-
-import sys, re, urlparse, string, copy, time, struct
-try:
- import threading
- _threading = threading; del threading
-except ImportError:
- import dummy_threading
- _threading = dummy_threading; del dummy_threading
-import httplib # only for the default HTTP port
-
-MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
- "instance initialised with one)")
-DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-try: StopIteration
-except NameError:
- class StopIteration(Exception): pass
-
-import ClientCookie
-from _HeadersUtil import split_header_words, join_header_words, \
- parse_ns_headers
-from _Util import startswith, endswith, iso2time, time2isoz
-from _Debug import debug
-
-try: bool
-except NameError:
- def bool(expr):
- if expr: return True
- else: return False
-
-try: issubclass(Exception, (Exception,))
-except TypeError:
- real_issubclass = issubclass
- from _Util import compat_issubclass
- issubclass = compat_issubclass
- del compat_issubclass
-
-SPACE_DICT = {}
-for c in string.whitespace:
- SPACE_DICT[c] = None
-del c
-def isspace(string):
- for c in string:
- if not SPACE_DICT.has_key(c): return False
- return True
-
-def getheaders(msg, name):
- """Get all values for a header.
-
- This returns a list of values for headers given more than once; each
- value in the result list is stripped in the same way as the result of
- getheader(). If the header is not given, return an empty list.
- """
- result = []
- current = ''
- have_header = 0
- for s in msg.getallmatchingheaders(name):
- if isspace(s[0]):
- if current:
- current = "%s\n %s" % (current, string.strip(s))
- else:
- current = string.strip(s)
- else:
- if have_header:
- result.append(current)
- current = string.strip(s[string.find(s, ":") + 1:])
- have_header = 1
- if have_header:
- result.append(current)
- return result
-
-def reraise_unmasked_exceptions(unmasked=()):
- # There are a few catch-all except: statements in this module, for
- # catching input that's bad in unexpected ways.
- # This function re-raises some exceptions we don't want to trap.
- if ClientCookie.CLIENTCOOKIE_DEBUG:
- raise
- unmasked = unmasked + (KeyboardInterrupt, SystemExit)
- etype = sys.exc_info()[0]
- if issubclass(etype, unmasked):
- raise
-
-
-IPV4_RE = re.compile(r"\.\d+$")
-def is_HDN(text):
- """Return True if text is a host domain name."""
- # XXX
- # This may well be wrong. Which RFC is HDN defined in, if any (for
- # the purposes of RFC 2965)?
- # For the current implementation, what about IPv6? Remember to look
- # at other uses of IPV4_RE also, if change this.
- if IPV4_RE.search(text):
- return False
- if text == "":
- return False
- if text[0] == "." or text[-1] == ".":
- return False
- return True
-
-def domain_match(A, B):
- """Return True if domain A domain-matches domain B, according to RFC 2965.
-
- A and B may be host domain names or IP addresses.
-
- RFC 2965, section 1:
-
- Host names can be specified either as an IP address or a HDN string.
- Sometimes we compare one host name with another. (Such comparisons SHALL
- be case-insensitive.) Host A's name domain-matches host B's if
-
- * their host name strings string-compare equal; or
-
- * A is a HDN string and has the form NB, where N is a non-empty
- name string, B has the form .B', and B' is a HDN string. (So,
- x.y.com domain-matches .Y.com but not Y.com.)
-
- Note that domain-match is not a commutative operation: a.b.c.com
- domain-matches .c.com, but not the reverse.
-
- """
- # Note that, if A or B are IP addresses, the only relevant part of the
- # definition of the domain-match algorithm is the direct string-compare.
- A = string.lower(A)
- B = string.lower(B)
- if A == B:
- return True
- if not is_HDN(A):
- return False
- i = string.rfind(A, B)
- if i == -1 or i == 0:
- # A does not have form NB, or N is the empty string
- return False
- if not startswith(B, "."):
- return False
- if not is_HDN(B[1:]):
- return False
- return True
-
-def liberal_is_HDN(text):
- """Return True if text is a sort-of-like a host domain name.
-
- For accepting/blocking domains.
-
- """
- if IPV4_RE.search(text):
- return False
- return True
-
-def user_domain_match(A, B):
- """For blocking/accepting domains.
-
- A and B may be host domain names or IP addresses.
-
- """
- A = string.lower(A)
- B = string.lower(B)
- if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
- if A == B:
- # equal IP addresses
- return True
- return False
- initial_dot = startswith(B, ".")
- if initial_dot and endswith(A, B):
- return True
- if not initial_dot and A == B:
- return True
- return False
-
-cut_port_re = re.compile(r":\d+$")
-def request_host(request):
- """Return request-host, as defined by RFC 2965.
-
- Variation from RFC: returned value is lowercased, for convenient
- comparison.
-
- """
- url = request.get_full_url()
- host = urlparse.urlparse(url)[1]
- if host == "":
- host = request.headers.get("Host", "")
-
- # remove port, if present
- host = cut_port_re.sub("", host, 1)
- return string.lower(host)
-
-def eff_request_host(request):
- """Return a tuple (request-host, effective request-host name).
-
- As defined by RFC 2965, except both are lowercased.
-
- """
- erhn = req_host = request_host(request)
- if string.find(req_host, ".") == -1 and not IPV4_RE.search(req_host):
- erhn = req_host + ".local"
- return req_host, erhn
-
-def request_path(request):
- """request-URI, as defined by RFC 2965."""
- url = request.get_full_url()
- #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
- req_path = normalize_path(string.join(urlparse.urlparse(url)[2:], ""))
- if not startswith(req_path, "/"):
- # fix bad RFC 2396 absoluteURI
- req_path = "/"+req_path
- return req_path
-
-def request_port(request):
- # ATM (Python 2.3) request.port is always None, and unused by urllib2
- port = request.port
- host = request.get_host()
- if port is None:
- i = string.find(host, ':')
- if i >= 0:
- port = host[i+1:]
- try:
- int(port)
- except ValueError:
- debug("nonnumeric port: '%s'" % port)
- return None
- else:
- port = DEFAULT_HTTP_PORT
- return port
-
-def unescape_path_fn(match):
- x = string.upper(match.group(1))
- if x == "2F" or x == "25":
- return "%%%s" % (x,)
- else:
- # string.atoi deprecated in 2.0, but 1.5.2 int function won't do
- # radix conversion
- return struct.pack("B", string.atoi(x, 16))
-def normalize_path_fn(match):
- return "%%%02X" % ord(match.group(1))
-
-unescape_re = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
-normalize_re = re.compile(r"([\0-\x20\x7f-\xff])")
-def normalize_path(path):
- """Normalise URI path so that plain string compare can be used.
-
- >>> normalize_path("%19\xd3%Fb%2F%25%26")
- '%19%D3%FB%2F%25&'
- >>>
-
- In normalised form, all non-printable characters are %-escaped, and all
- printable characters are given literally (not escaped). All remaining
- %-escaped characters are capitalised. %25 and %2F are special-cased,
- because they represent the printable characters"%" and "/", which are used
- as escape and URI path separator characters respectively.
-
- """
- path = unescape_re.sub(unescape_path_fn, path)
- path = normalize_re.sub(normalize_path_fn, path)
- return path
-
-def reach(h):
- """Return reach of host h, as defined by RFC 2965, section 1.
-
- The reach R of a host name H is defined as follows:
-
- * If
-
- - H is the host domain name of a host; and,
-
- - H has the form A.B; and
-
- - A has no embedded (that is, interior) dots; and
-
- - B has at least one embedded dot, or B is the string "local".
- then the reach of H is .B.
-
- * Otherwise, the reach of H is H.
-
- >>> reach("www.acme.com")
- '.acme.com'
- >>> reach("acme.com")
- 'acme.com'
- >>> reach("acme.local")
- '.local'
-
- """
- i = string.find(h, ".")
- if i >= 0:
- #a = h[:i] # this line is only here to show what a is
- b = h[i+1:]
- i = string.find(b, ".")
- if is_HDN(h) and (i >= 0 or b == "local"):
- return "."+b
- return h
-
-def is_third_party(request):
- """
-
- RFC 2965, section 3.3.6:
-
- An unverifiable transaction is to a third-party host if its request-
- host U does not domain-match the reach R of the request-host O in the
- origin transaction.
-
- """
- req_host = string.lower(request_host(request))
- # the origin request's request-host was stuffed into request by
- # _urllib2_support.AbstractHTTPHandler
- if not domain_match(req_host, reach(request.origin_req_host)):
- return True
- else:
- return False
-
-
-class Cookie:
- """HTTP Cookie.
-
- This class represents both Netscape and RFC 2965 cookies.
-
- This is deliberately a very simple class. It just holds attributes. It's
- possible to construct Cookie instances that don't comply with the cookie
- standards. CookieJar.make_cookies is the factory function for Cookie
- objects -- it deals with cookie parsing, supplying defaults, and
- normalising to the representation used in this class. CookiePolicy is
- responsible for checking them to see whether they should be accepted from
- and returned to the server.
-
- version: integer;
- name: string (may be None);
- value: string;
- port: string; None indicates no attribute was supplied (eg. "Port", rather
- than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
- string (eg. "80,8080")
- port_specified: boolean; true if a value was supplied with the Port
- cookie-attribute
- domain: string;
- domain_specified: boolean; true if Domain was explicitly set
- domain_initial_dot: boolean; true if Domain as set in HTTP header by server
- started with a dot (yes, this really is necessary!)
- path: string;
- path_specified: boolean; true if Path was explicitly set
- secure: boolean; true if should only be returned over secure connection
- expires: integer; seconds since epoch (RFC 2965 cookies should calculate
- this value from the Max-Age attribute)
- discard: boolean, true if this is a session cookie; (if no expires value,
- this should be true)
- comment: string;
- comment_url: string;
- rest: mapping of other attributes
-
- Note that the port may be present in the headers, but unspecified ("Port"
- rather than"Port=80", for example); if this is the case, port is None.
-
- """
-
- def __init__(self, version, name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest):
-
- if version is not None: version = int(version)
- if expires is not None: expires = int(expires)
- if port is None and port_specified is True:
- raise ValueError("if port is None, port_specified must be false")
-
- self.version = version
- self.name = name
- self.value = value
- self.port = port
- self.port_specified = port_specified
- # normalise case, as per RFC 2965 section 3.3.3
- self.domain = string.lower(domain)
- self.domain_specified = domain_specified
- # Sigh. We need to know whether the domain given in the
- # cookie-attribute had an initial dot, in order to follow RFC 2965
- # (as clarified in draft errata). Needed for the returned $Domain
- # value.
- self.domain_initial_dot = domain_initial_dot
- self.path = path
- self.path_specified = path_specified
- self.secure = secure
- self.expires = expires
- self.discard = discard
- self.comment = comment
- self.comment_url = comment_url
-
- self.rest = copy.copy(rest)
-
- def is_expired(self, now=None):
- if now is None: now = time.time()
- if (self.expires is not None) and (self.expires <= now):
- return True
- return False
-
- def __str__(self):
- if self.port is None: p = ""
- else: p = ":"+self.port
- limit = self.domain + p + self.path
- if self.name is not None:
- namevalue = "%s=%s" % (self.name, self.value)
- else:
- namevalue = self.value
- return "<Cookie %s for %s>" % (namevalue, limit)
-
- def __repr__(self):
- args = []
- for name in ["version", "name", "value",
- "port", "port_specified",
- "domain", "domain_specified", "domain_initial_dot",
- "path", "path_specified",
- "secure", "expires", "discard", "comment", "comment_url"]:
- attr = getattr(self, name)
- args.append("%s=%s" % (name, attr))
- args.append(repr(self.rest))
- return "Cookie(%s)" % string.join(args, ", ")
-
-
-class CookiePolicy:
- """Defines which cookies get accepted from and returned to server.
-
- The subclass DefaultCookiePolicy defines the standard rules for Netscape
- and RFC 2965 cookies -- override that if you want a customised policy.
-
- As well as implementing set_ok and return_ok, implementations of this
- interface must also supply the following attributes, indicating which
- protocols should be used, and how. These can be read and set at any time,
- though whether that makes complete sense from the protocol point of view is
- doubtful.
-
- Public attributes:
-
- netscape: implement netscape protocol
- rfc2965: implement RFC 2965 protocol
- hide_cookie2: don't add Cookie2 header to requests (the presence of
- this header indicates to the server that we understand RFC 2965
- cookies)
-
- """
- def set_ok(self, cookie, request, unverifiable):
- """Return true if (and only if) cookie should be accepted from server.
-
- Currently, pre-expired cookies never get this far -- the CookieJar
- class deletes such cookies itself.
-
- cookie: ClientCookie.Cookie object
- request: object implementing the interface defined by
- CookieJar.extract_cookies.__doc__
- unverifiable: flag indicating whether the transaction is unverifiable,
- as defined by RFC 2965
-
- """
- raise NotImplementedError()
-
- def return_ok(self, cookie, request, unverifiable):
- """Return true if (and only if) cookie should be returned to server.
-
- cookie: ClientCookie.Cookie object
- request: object implementing the interface defined by
- CookieJar.add_cookie_header.__doc__
- unverifiable: flag indicating whether the transaction is unverifiable,
- as defined by RFC 2965
-
- """
- raise NotImplementedError()
-
- def domain_return_ok(self, domain, request, unverifiable):
- """Return false if cookies should not be returned, given cookie domain.
-
- This is here as an optimization, to remove the need for checking every
- cookie with a particular domain (which may involve reading many files).
- The default implementations of domain_return_ok and path_return_ok
- (return True) leave all the work to return_ok.
-
- If domain_return_ok returns true for the cookie domain, path_return_ok
- is called for the cookie path. Otherwise, path_return_ok and return_ok
- are never called for that cookie domain. If path_return_ok returns
- true, return_ok is called with the Cookie object itself for a full
- check. Otherwise, return_ok is never called for that cookie path.
-
- Note that domain_return_ok is called for every *cookie* domain, not
- just for the *request* domain. For example, the function might be
- called with both ".acme.com" and "www.acme.com" if the request domain is
- "www.acme.com". The same goes for path_return_ok.
-
- For argument documentation, see the docstring for return_ok.
-
- """
- return True
-
- def path_return_ok(self, path, request, unverifiable):
- """Return false if cookies should not be returned, given cookie path.
-
- See the docstring for domain_return_ok.
-
- """
- return True
-
-
-class DefaultCookiePolicy(CookiePolicy):
- """Implements the standard rules for accepting and returning cookies.
-
- Both RFC 2965 and Netscape cookies are covered.
-
- The easiest way to provide your own policy is to override this class and
- call its methods in your overridden implementations before adding your own
- additional checks.
-
- import ClientCookie
- class MyCookiePolicy(ClientCookie.DefaultCookiePolicy):
- def set_ok(self, cookie, request, unverifiable):
- if not ClientCookie.DefaultCookiePolicy.set_ok(
- self, cookie, request, unverifiable):
- return False
- if i_dont_want_to_store_this_cookie():
- return False
- return True
-
- In addition to the features required to implement the CookiePolicy
- interface, this class allows you to block and allow domains from setting
- and receiving cookies. There are also some strictness switches that allow
- you to tighten up the rather loose Netscape protocol rules a little bit (at
- the cost of blocking some benign cookies).
-
- A domain blacklist and whitelist is provided (both off by default). Only
- domains not in the blacklist and present in the whitelist (if the whitelist
- is active) participate in cookie setting and returning. Use the
- blocked_domains constructor argument, and blocked_domains and
- set_blocked_domains methods (and the corresponding argument and methods for
- allowed_domains). If you set a whitelist, you can turn it off again by
- setting it to None.
-
- Domains in block or allow lists that do not start with a dot must
- string-compare equal. For example, "acme.com" matches a blacklist entry of
- "acme.com", but "www.acme.com" does not. Domains that do start with a dot
- are matched by more specific domains too. For example, both "www.acme.com"
- and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
- not). IP addresses are an exception, and must match exactly. For example,
- if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
- blocked, but 193.168.1.2 is not.
-
- Additional Public Attributes:
-
- General strictness switches
-
- strict_domain: don't allow sites to set two-component domains with
- country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
- This is far from perfect and isn't guaranteed to work!
-
- RFC 2965 protocol strictness switches
-
- strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
- transactions (usually, an unverifiable transaction is one resulting from
- a redirect or an image hosted on another site); if this is false, cookies
- are NEVER blocked on the basis of verifiability
-
- Netscape protocol strictness switches
-
- strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
- even to Netscape cookies
- strict_ns_domain: flags indicating how strict to be with domain-matching
- rules for Netscape cookies:
- DomainStrictNoDots: when setting cookies, host prefix must not contain a
- dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
- www.foo contains a dot)
- DomainStrictNonDomain: cookies that did not explicitly specify a Domain
- cookie-attribute can only be returned to a domain that string-compares
- equal to the domain that set the cookie (eg. rockets.acme.com won't
- be returned cookies from acme.com that had no Domain cookie-attribute)
- DomainRFC2965Match: when setting cookies, require a full RFC 2965
- domain-match
- DomainLiberal and DomainStrict are the most useful combinations of the
- above flags, for convenience
- strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
- have names starting with '$'
- strict_ns_set_path: don't allow setting cookies whose path doesn't
- path-match request URI
-
- """
-
- DomainStrictNoDots = 1
- DomainStrictNonDomain = 2
- DomainRFC2965Match = 4
-
- DomainLiberal = 0
- DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
-
- def __init__(self,
- blocked_domains=None, allowed_domains=None,
- netscape=True, rfc2965=True,
- hide_cookie2=False,
- strict_domain=False,
- strict_rfc2965_unverifiable=True,
- strict_ns_unverifiable=False,
- strict_ns_domain=DomainLiberal,
- strict_ns_set_initial_dollar=False,
- strict_ns_set_path=False):
- """
- blocked_domains: sequence of domain names that we never accept cookies
- from, nor return cookies to
- allowed_domains: if not None, this is a sequence of the only domains
- for which we accept and return cookies
-
- For other arguments, see CookiePolicy.__doc__ and
- DefaultCookiePolicy.__doc__..
-
- """
- self.netscape = netscape
- self.rfc2965 = rfc2965
- self.hide_cookie2 = hide_cookie2
- self.strict_domain = strict_domain
- self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
- self.strict_ns_unverifiable = strict_ns_unverifiable
- self.strict_ns_domain = strict_ns_domain
- self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
- self.strict_ns_set_path = strict_ns_set_path
-
- if blocked_domains is not None:
- self._blocked_domains = tuple(blocked_domains)
- else:
- self._blocked_domains = ()
-
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def blocked_domains(self):
- """Return the sequence of blocked domains (as a tuple)."""
- return self._blocked_domains
- def set_blocked_domains(self, blocked_domains):
- """Set the sequence of blocked domains."""
- self._blocked_domains = tuple(blocked_domains)
-
- def is_blocked(self, domain):
- for blocked_domain in self._blocked_domains:
- if user_domain_match(domain, blocked_domain):
- return True
- return False
-
- def allowed_domains(self):
- """Return None, or the sequence of allowed domains (as a tuple)."""
- return self._allowed_domains
- def set_allowed_domains(self, allowed_domains):
- """Set the sequence of allowed domains, or None."""
- if allowed_domains is not None:
- allowed_domains = tuple(allowed_domains)
- self._allowed_domains = allowed_domains
-
- def is_not_allowed(self, domain):
- if self._allowed_domains is None:
- return False
- for allowed_domain in self._allowed_domains:
- if user_domain_match(domain, allowed_domain):
- return False
- return True
-
- def set_ok(self, cookie, request, unverifiable):
- """
- If you override set_ok, be sure to call this method. If it returns
- false, so should your subclass (assuming your subclass wants to be more
- strict about which cookies to accept).
-
- """
- debug(" - checking cookie %s=%s" % (cookie.name, cookie.value))
-
- assert cookie.value is not None
-
- for n in "version", "verifiability", "name", "path", "domain", "port":
- fn_name = "set_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request, unverifiable):
- return False
- return True
-
- def set_ok_version(self, cookie, request, unverifiable):
- if cookie.version is None:
- # Version is always set to 0 by parse_ns_headers if it's a Netscape
- # cookie, so this must be an invalid RFC 2965 cookie.
- debug(" Set-Cookie2 without version attribute (%s=%s)" %
- (cookie.name, cookie.value))
- return False
- if cookie.version > 0 and not self.rfc2965:
- debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- debug(" Netscape cookies are switched off")
- return False
- return True
-
- def set_ok_verifiability(self, cookie, request, unverifiable):
- if unverifiable and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- debug(" third-party RFC 2965 cookie during unverifiable "
- "transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- debug(" third-party Netscape cookie during unverifiable "
- "transaction")
- return False
- return True
-
- def set_ok_name(self, cookie, request, unverifiable):
- # Try and stop servers setting V0 cookies designed to hack other
- # servers that know both V0 and V1 protocols.
- if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
- (cookie.name is not None) and startswith(cookie.name, "$")):
- debug(" illegal name (starts with '$'): '%s'" % cookie.name)
- return False
- return True
-
- def set_ok_path(self, cookie, request, unverifiable):
- if cookie.path_specified:
- req_path = request_path(request)
- if ((cookie.version > 0 or
- (cookie.version == 0 and self.strict_ns_set_path)) and
- not startswith(req_path, cookie.path)):
- debug(" path attribute %s is not a prefix of request "
- "path %s" % (cookie.path, req_path))
- return False
- return True
-
- def set_ok_domain(self, cookie, request, unverifiable):
- if self.is_blocked(cookie.domain):
- debug(" domain %s is in user block-list" % cookie.domain)
- return False
- if self.is_not_allowed(cookie.domain):
- debug(" domain %s is not in user allow-list" % cookie.domain)
- return False
- if cookie.domain_specified:
- req_host, erhn = eff_request_host(request)
- domain = cookie.domain
- if self.strict_domain and (string.count(domain, ".") >= 2):
- i = string.rfind(domain, ".")
- j = string.rfind(domain, ".", 0, i)
- if j == 0: # domain like .foo.bar
- tld = domain[i+1:]
- sld = domain[j+1:i]
- if (string.lower(sld) in [
- "co", "ac",
- "com", "edu", "org", "net", "gov", "mil", "int"] and
- len(tld) == 2):
- # domain like .co.uk
- debug(" country-code second level domain %s" %
- domain)
- return False
- if startswith(domain, "."):
- undotted_domain = domain[1:]
- else:
- undotted_domain = domain
- embedded_dots = (string.find(undotted_domain, ".") >= 0)
- if not embedded_dots and domain != ".local":
- debug(" non-local domain %s contains no embedded dot" %
- domain)
- return False
- if cookie.version == 0:
- if (not endswith(erhn, domain) and
- (not startswith(erhn, ".") and
- not endswith("."+erhn, domain))):
- debug(" effective request-host %s (even with added "
- "initial dot) does not end end with %s" %
- (erhn, domain))
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainRFC2965Match)):
- if not domain_match(erhn, domain):
- debug(" effective request-host %s does not domain-match "
- "%s" % (erhn, domain))
- return False
- if (cookie.version > 0 or
- (self.strict_ns_domain & self.DomainStrictNoDots)):
- host_prefix = req_host[:-len(domain)]
- if (string.find(host_prefix, ".") >= 0 and
- not IPV4_RE.search(req_host)):
- debug(" host prefix %s for domain %s contains a dot" %
- (host_prefix, domain))
- return False
- return True
-
- def set_ok_port(self, cookie, request, unverifiable):
- if cookie.port_specified:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- else:
- req_port = str(req_port)
- for p in string.split(cookie.port, ","):
- try:
- int(p)
- except ValueError:
- debug(" bad port %s (not numeric)" % p)
- return False
- if p == req_port:
- break
- else:
- debug(" request port (%s) not found in %s" %
- (req_port, cookie.port))
- return False
- return True
-
- def return_ok(self, cookie, request, unverifiable):
- """
- If you override return_ok, be sure to call this method. If it returns
- false, so should your subclass.
-
- """
- # Path has already been checked by path_return_ok, and domain blocking
- # done by domain_return_ok.
- debug(" - checking cookie %s=%s" % (cookie.name, cookie.value))
-
- for n in "version", "verifiability", "secure", "expires", "port", "domain":
- fn_name = "return_ok_"+n
- fn = getattr(self, fn_name)
- if not fn(cookie, request, unverifiable):
- return False
- return True
-
- def return_ok_version(self, cookie, request, unverifiable):
- if cookie.version > 0 and not self.rfc2965:
- debug(" RFC 2965 cookies are switched off")
- return False
- elif cookie.version == 0 and not self.netscape:
- debug(" Netscape cookies are switched off")
- return False
- return True
-
- def return_ok_verifiability(self, cookie, request, unverifiable):
- if unverifiable and is_third_party(request):
- if cookie.version > 0 and self.strict_rfc2965_unverifiable:
- debug(" third-party RFC 2965 cookie during unverifiable "
- "transaction")
- return False
- elif cookie.version == 0 and self.strict_ns_unverifiable:
- debug(" third-party Netscape cookie during unverifiable "
- "transaction")
- return False
- return True
-
- def return_ok_secure(self, cookie, request, unverifiable):
- if cookie.secure and request.get_type() != "https":
- debug(" secure cookie with non-secure request")
- return False
- return True
-
- def return_ok_expires(self, cookie, request, unverifiable):
- if cookie.is_expired(self._now):
- debug(" cookie expired")
- return False
- return True
-
- def return_ok_port(self, cookie, request, unverifiable):
- if cookie.port:
- req_port = request_port(request)
- if req_port is None:
- req_port = "80"
- for p in string.split(cookie.port, ","):
- if p == req_port:
- break
- else:
- debug(" request port %s does not match cookie port %s" %
- (req_port, cookie.port))
- return False
- return True
-
- def return_ok_domain(self, cookie, request, unverifiable):
- req_host, erhn = eff_request_host(request)
- domain = cookie.domain
-
- # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
- if (cookie.version == 0 and
- (self.strict_ns_domain & self.DomainStrictNonDomain) and
- not cookie.domain_specified and domain != erhn):
- debug(" cookie with unspecified domain does not string-compare "
- "equal to request domain")
- return False
-
- if cookie.version > 0 and not domain_match(erhn, domain):
- debug(" effective request-host name %s does not domain-match "
- "RFC 2965 cookie domain %s" % (erhn, domain))
- return False
- if cookie.version == 0 and not endswith("."+req_host, domain):
- debug(" request-host %s does not match Netscape cookie domain "
- "%s" % (req_host, domain))
- return False
- return True
-
- def domain_return_ok(self, domain, request, unverifiable):
- if self.is_blocked(domain):
- debug(" domain %s is in user block-list" % domain)
- return False
- if self.is_not_allowed(domain):
- debug(" domain %s is not in user allow-list" % domain)
- return False
- return True
-
- def path_return_ok(self, path, request, unverifiable):
- debug("- checking cookie path=%s" % path)
- req_path = request_path(request)
- if not startswith(req_path, path):
- debug(" %s does not path-match %s" % (req_path, path))
- return False
- return True
-
-
-def lwp_cookie_str(cookie):
- """Return string representation of Cookie in an the LWP cookie file format.
-
- Actually, the format is slightly extended from that used by LWP's
- (libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965
- information not recorded by LWP.
-
- Used by the CookieJar base class for saving cookies to a file.
-
- """
- h = [(cookie.name, cookie.value),
- ("path", cookie.path),
- ("domain", cookie.domain)]
- if cookie.port is not None: h.append(("port", cookie.port))
- if cookie.path_specified: h.append(("path_spec", None))
- if cookie.port_specified: h.append(("port_spec", None))
- if cookie.domain_initial_dot: h.append(("domain_dot", None))
- if cookie.secure: h.append(("secure", None))
- if cookie.expires: h.append(("expires",
- time2isoz(float(cookie.expires))))
- if cookie.discard: h.append(("discard", None))
- if cookie.comment: h.append(("comment", cookie.comment))
- if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
-
- keys = cookie.rest.keys()
- keys.sort()
- for k in keys:
- h.append((k, str(cookie.rest[k])))
-
- h.append(("version", str(cookie.version)))
-
- return join_header_words([h])
-
-def vals_sorted_by_key(adict):
- keys = adict.keys()
- keys.sort()
- return map(adict.get, keys)
-
-class MappingIterator:
- """Iterates over nested mapping, depth-first, in sorted order by key."""
- def __init__(self, mapping):
- self._s = [(vals_sorted_by_key(mapping), 0, None)] # LIFO stack
-
- def __iter__(self): return self
-
- def next(self):
- # this is hairy because of lack of generators
- while 1:
- try:
- vals, i, prev_item = self._s.pop()
- except IndexError:
- raise StopIteration()
- if i < len(vals):
- item = vals[i]
- i = i + 1
- self._s.append((vals, i, prev_item))
- try:
- item.items
- except AttributeError:
- # non-mapping
- break
- else:
- # mapping
- self._s.append((vals_sorted_by_key(item), 0, item))
- continue
- return item
-
-
-# Used as second parameter to dict.get method, to distinguish absent
-# dict key from one with a None value.
-class Absent: pass
-
-class CookieJar:
- """Collection of HTTP cookies.
-
- The major methods are extract_cookies and add_cookie_header; these are all
- you are likely to need. In fact, you probably don't even need to know
- about this class: use the cookie-aware extensions to the urllib2 callables
- provided by this module: urlopen in particular (and perhaps also
- build_opener, install_opener, HTTPCookieProcessor, HTTPRefererProcessor,
- HTTPRefreshHandler, HTTPEquivProcessor, SeekableProcessor, etc.).
-
- CookieJar supports the iterator protocol. Iteration also works in 1.5.2:
-
- for cookie in cookiejar:
- # do something with cookie
-
- Methods:
-
- CookieJar(filename=None, delayload=False, policy=None)
- add_cookie_header(request, unverifiable=False)
- extract_cookies(response, request, unverifiable=False)
- make_cookies(response, request)
- set_cookie_if_ok(cookie, request, unverifiable=False)
- set_cookie(cookie)
- save(filename=None, ignore_discard=False, ignore_expires=False)
- load(filename=None, ignore_discard=False, ignore_expires=False)
- revert(filename=None, ignore_discard=False, ignore_expires=False)
- clear(domain=None, path=None, key=None)
- clear_session_cookies()
- clear_expired_cookies()
- as_string(skip_discard=False) (str(cookies) also works)
-
-
- Public attributes
-
- filename: filename for loading and saving cookies
- policy: CookiePolicy object
-
- Public readable attributes
-
- delayload: request that cookies are lazily loaded from disk; this is only
- a hint since this only affects performance, not behaviour (unless the
- cookies on disk are changing); a CookieJar object may ignore it (in fact,
- only MSIECookieJar lazily loads cookies at the moment)
- cookies: a three-level dictionary [domain][path][key] containing Cookie
- instances; you almost certainly don't need to use this
-
- """
-
- non_word_re = re.compile(r"\W")
- quote_re = re.compile(r"([\"\\])")
- strict_domain_re = re.compile(r"\.?[^.]*")
- domain_re = re.compile(r"[^.]*")
- dots_re = re.compile(r"^\.+")
-
- magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
-
- def __init__(self, filename=None, delayload=False, policy=None):
- """
- See CookieJar.__doc__ for argument documentation.
-
- Cookies are NOT loaded from the named file until either the load or
- revert method is called.
-
- """
- self.filename = filename
- self.delayload = delayload
-
- if policy is None:
- policy = DefaultCookiePolicy()
- self.policy = policy
-
- self._cookies_lock = _threading.RLock()
- self.cookies = {}
-
- # for __getitem__ iteration in pre-2.2 Pythons
- self._prev_getitem_index = 0
-
- def _cookies_for_domain(self, domain, request, unverifiable):
- """Return a list of cookies to be returned to server."""
- debug("Checking %s for cookies to return" % domain)
- if not self.policy.domain_return_ok(domain, request, unverifiable):
- return []
-
- cookies_by_path = self.cookies.get(domain)
- if cookies_by_path is None:
- return []
-
- cookies = []
- for path in cookies_by_path.keys():
- if not self.policy.path_return_ok(path, request, unverifiable):
- continue
- for name, cookie in cookies_by_path[path].items():
- if not self.policy.return_ok(cookie, request, unverifiable):
- debug(" not returning cookie")
- continue
- debug(" it's a match")
- cookies.append(cookie)
-
- return cookies
-
- def _cookie_attrs(self, cookies):
- """Return a list of cookie-attributes to be returned to server.
-
- like ['foo="bar"; $Path="/"', ...]
-
- The $Version attribute is also added when appropriate (currently only
- once per request).
-
- """
- # add cookies in order of most specific (ie. longest) path first
- def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
- cookies.sort(decreasing_size)
-
- version_set = False
-
- attrs = []
- for cookie in cookies:
- # set version of Cookie header
- # XXX
- # What should it be if multiple matching Set-Cookie headers have
- # different versions themselves?
- # Answer: there is no answer; was supposed to be settled by
- # RFC 2965 errata, but that may never appear...
- version = cookie.version
- if not version_set:
- version_set = True
- if version > 0:
- attrs.append("$Version=%s" % version)
-
- # quote cookie value if necessary
- # (not for Netscape protocol, which already has any quotes
- # intact, due to the poorly-specified Netscape Cookie: syntax)
- if self.non_word_re.search(cookie.value) and version > 0:
- value = self.quote_re.sub(r"\\\1", cookie.value)
- else:
- value = cookie.value
-
- # add cookie-attributes to be returned in Cookie header
- if cookie.name is None:
- attrs.append(value)
- else:
- attrs.append("%s=%s" % (cookie.name, value))
- if version > 0:
- if cookie.path_specified:
- attrs.append('$Path="%s"' % cookie.path)
- if startswith(cookie.domain, "."):
- domain = cookie.domain
- if (not cookie.domain_initial_dot and
- startswith(domain, ".")):
- domain = domain[1:]
- attrs.append('$Domain="%s"' % domain)
- if cookie.port is not None:
- p = "$Port"
- if cookie.port_specified:
- p = p + ('="%s"' % cookie.port)
- attrs.append(p)
-
- return attrs
-
- def add_cookie_header(self, request, unverifiable=False):
- """Add correct Cookie: header to request (urllib2.Request object).
-
- The Cookie2 header is also added unless policy.hide_cookie2 is true.
-
- The request object (usually a urllib2.Request instance) must support
- the methods get_full_url, get_host, get_type and add_header, as
- documented by urllib2, and the attributes headers (a mapping containing
- the request's HTTP headers) and port (the port number).
-
- If unverifiable is true, it will be assumed that the transaction is
- unverifiable as defined by RFC 2965, and appropriate action will be
- taken.
-
- """
- debug("add_cookie_header")
- self._cookies_lock.acquire()
-
- self.policy._now = self._now = int(time.time())
-
- req_host, erhn = eff_request_host(request)
- strict_non_domain = \
- self.policy.strict_ns_domain & self.policy.DomainStrictNonDomain
-
- cookies = []
-
- domain = erhn
- # First check origin server effective host name for an exact match.
- cookies.extend(self._cookies_for_domain(domain, request, unverifiable))
- # Then, start with effective request-host with initial dot prepended
- # (for Netscape cookies with explicitly-set Domain cookie-attributes)
- # -- eg. .foo.bar.baz.com and check all possible derived domain strings
- # (.bar.baz.com, bar.baz.com, .baz.com) for cookies.
- # This isn't too finicky about which domains to check, because we have
- # to cover both V0 and V1 cookies, and policy.return_ok will check the
- # domain in any case.
- if not IPV4_RE.search(req_host):
- # IP addresses must string-compare equal in order to domain-match
- # (IP address case will have been checked above as erhn == req_host
- # in that case).
- if domain != ".local":
- domain = "."+domain
- while string.find(domain, ".") >= 0:
- cookies.extend(self._cookies_for_domain(
- domain, request, unverifiable))
- if strict_non_domain:
- domain = self.strict_domain_re.sub("", domain, 1)
- else:
- # strip either initial dot only, or initial component only
- # .www.foo.com --> www.foo.com
- # www.foo.com --> .foo.com
- if startswith(domain, "."):
- domain = domain[1:]
- # we've already done the erhn
- if domain == erhn:
- domain = self.domain_re.sub("", domain, 1)
- else:
- domain = self.domain_re.sub("", domain, 1)
-
- attrs = self._cookie_attrs(cookies)
- if attrs:
- request.add_header("Cookie", string.join(attrs, "; "))
-
- # if necessary, advertise that we know RFC 2965
- if self.policy.rfc2965 and not self.policy.hide_cookie2:
- for cookie in cookies:
- if cookie.version != 1:
- request.add_header("Cookie2", '$Version="1"')
- break
-
- self._cookies_lock.release()
-
- self.clear_expired_cookies()
-
- def _normalized_cookie_tuples(self, attrs_set):
- """Return list of tuples containing normalised cookie information.
-
- attrs_set is the list of lists of key,value pairs extracted from
- the Set-Cookie or Set-Cookie2 headers.
-
- Tuples are name, value, standard, rest, where name and value are the
- cookie name and value, standard is a dictionary containing the standard
- cookie-attributes (discard, secure, version, expires or max-age,
- domain, path and port) and rest is a dictionary containing the rest of
- the cookie-attributes.
-
- """
- cookie_tuples = []
-
- boolean_attrs = "discard", "secure"
- value_attrs = ("version",
- "expires", "max-age",
- "domain", "path", "port",
- "comment", "commenturl")
-
- for cookie_attrs in attrs_set:
- name, value = cookie_attrs[0]
-
- # Build dictionary of standard cookie-attributes (standard) and
- # dictionary of other cookie-attributes (rest).
-
- # Note: expiry time is normalised to seconds since epoch. V0
- # cookies should have the Expires cookie-attribute, and V1 cookies
- # should have Max-Age, but since V1 includes RFC 2109 cookies (and
- # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
- # accept either (but prefer Max-Age).
- max_age_set = False
-
- bad_cookie = False
-
- standard = {}
- rest = {}
- for k, v in cookie_attrs[1:]:
- lc = string.lower(k)
- # don't lose case distinction for unknown fields
- if lc in value_attrs or lc in boolean_attrs:
- k = lc
- if k in boolean_attrs and v is None:
- # boolean cookie-attribute is present, but has no value
- # (like "discard", rather than "port=80")
- v = True
- if standard.has_key(k):
- # only first value is significant
- continue
- if k == "domain":
- if v is None:
- debug(" missing value for domain attribute")
- bad_cookie = True
- break
- # RFC 2965 section 3.3.3
- v = string.lower(v)
- if k == "expires":
- if max_age_set:
- # Prefer max-age to expires (like Mozilla)
- continue
- if v is None:
- debug(" missing or invalid value for expires "
- "attribute: treating as session cookie")
- continue
- if k == "max-age":
- max_age_set = True
- try:
- v = int(v)
- except ValueError:
- debug(" missing or invalid (non-numeric) value for "
- "max-age attribute")
- bad_cookie = True
- break
- # convert RFC 2965 Max-Age to seconds since epoch
- # XXX Strictly you're supposed to follow RFC 2616
- # age-calculation rules. Remember that zero Max-Age is a
- # is a request to discard (old and new) cookie, though.
- k = "expires"
- v = self._now + v
- if (k in value_attrs) or (k in boolean_attrs):
- if (v is None and
- k not in ["port", "comment", "commenturl"]):
- debug(" missing value for %s attribute" % k)
- bad_cookie = True
- break
- standard[k] = v
- else:
- rest[k] = v
-
- if bad_cookie:
- continue
-
- cookie_tuples.append((name, value, standard, rest))
-
- return cookie_tuples
-
- def _cookie_from_cookie_tuple(self, tup, request):
- # standard is dict of standard cookie-attributes, rest is dict of the
- # rest of them
- name, value, standard, rest = tup
-
- domain = standard.get("domain", Absent)
- path = standard.get("path", Absent)
- port = standard.get("port", Absent)
- expires = standard.get("expires", Absent)
-
- # set the easy defaults
- version = standard.get("version", None)
- if version is not None: version = int(version)
- secure = standard.get("secure", False)
- # (discard is also set if expires is Absent)
- discard = standard.get("discard", False)
- comment = standard.get("comment", None)
- comment_url = standard.get("commenturl", None)
-
- # set default path
- if path is not Absent and path != "":
- path_specified = True
- path = normalize_path(path)
- else:
- path_specified = False
- path = request_path(request)
- i = string.rfind(path, "/")
- if i != -1:
- if version == 0:
- # Netscape spec parts company from reality here
- path = path[:i]
- else:
- path = path[:i+1]
- if len(path) == 0: path = "/"
-
- # set default domain
- domain_specified = domain is not Absent
- # but first we have to remember whether it starts with a dot
- domain_initial_dot = False
- if domain_specified:
- domain_initial_dot = bool(startswith(domain, "."))
- if domain is Absent:
- req_host, erhn = eff_request_host(request)
- domain = erhn
- elif not startswith(domain, "."):
- domain = "."+domain
-
- # set default port
- port_specified = False
- if port is not Absent:
- if port is None:
- # Port attr present, but has no value: default to request port.
- # Cookie should then only be sent back on that port.
- port = request_port(request)
- else:
- port_specified = True
- port = re.sub(r"\s+", "", port)
- else:
- # No port attr present. Cookie can be sent back on any port.
- port = None
-
- # set default expires and discard
- if expires is Absent:
- expires = None
- discard = True
- elif expires <= self._now:
- # Expiry date in past is request to delete cookie. This can't be
- # in DefaultCookiePolicy, because can't delete cookies there.
- try:
- del self.cookies[domain][path][name]
- except KeyError:
- pass
- else:
- debug("Expiring cookie, domain='%s', path='%s', name='%s'" %
- (domain, path, name))
- return None
-
- return Cookie(version,
- name, value,
- port, port_specified,
- domain, domain_specified, domain_initial_dot,
- path, path_specified,
- secure,
- expires,
- discard,
- comment,
- comment_url,
- rest)
-
- def _cookies_from_attrs_set(self, attrs_set, request):
- cookie_tuples = self._normalized_cookie_tuples(attrs_set)
- cookies = []
- for tup in cookie_tuples:
- cookie = self._cookie_from_cookie_tuple(tup, request)
- if cookie: cookies.append(cookie)
- return cookies
-
- def make_cookies(self, response, request):
- """Return sequence of Cookie objects extracted from response object.
-
- See extract_cookies.__doc__ for the interfaces required of the
- response and request arguments.
-
- """
- # get cookie-attributes for RFC 2965 and Netscape protocols
- headers = response.info()
- rfc2965_hdrs = getheaders(headers, "Set-Cookie2")
- ns_hdrs = getheaders(headers, "Set-Cookie")
-
- rfc2965 = self.policy.rfc2965
- netscape = self.policy.netscape
-
- if ((not rfc2965_hdrs and not ns_hdrs) or
- (not ns_hdrs and not rfc2965) or
- (not rfc2965_hdrs and not netscape) or
- (not netscape and not rfc2965)):
- return [] # no relevant cookie headers: quick exit
-
- try:
- cookies = self._cookies_from_attrs_set(
- split_header_words(rfc2965_hdrs), request)
- except:
- reraise_unmasked_exceptions()
- cookies = []
-
- if ns_hdrs and netscape:
- try:
- ns_cookies = self._cookies_from_attrs_set(
- parse_ns_headers(ns_hdrs), request)
- except:
- reraise_unmasked_exceptions()
- ns_cookies = []
-
- # Look for Netscape cookies (from Set-Cookie headers) that match
- # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
- # For each match, keep the RFC 2965 cookie and ignore the Netscape
- # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
- # bundled in with the Netscape cookies for this purpose, which is
- # reasonable behaviour.
- if rfc2965:
- lookup = {}
- for cookie in cookies:
- lookup[(cookie.domain, cookie.path, cookie.name)] = None
-
- def no_matching_rfc2965(ns_cookie, lookup=lookup):
- key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
- return not lookup.has_key(key)
- ns_cookies = filter(no_matching_rfc2965, ns_cookies)
-
- if ns_cookies:
- cookies.extend(ns_cookies)
-
- return cookies
-
- def set_cookie_if_ok(self, cookie, request, unverifiable=False):
- """Set a cookie if policy says it's OK to do so.
-
- cookie: ClientCookie.Cookie instance
- request: see extract_cookies.__doc__ for the required interface
- unverifiable: see extract_cookies.__doc__
-
- """
- self._cookies_lock.acquire()
- self.policy._now = self._now = int(time.time())
-
- if self.policy.set_ok(cookie, request, unverifiable):
- self.set_cookie(cookie)
-
- self._cookies_lock.release()
-
- def set_cookie(self, cookie):
- """Set a cookie, without checking whether or not it should be set.
-
- cookie: ClientCookie.Cookie instance
- """
- c = self.cookies
- self._cookies_lock.acquire()
- try:
- if not c.has_key(cookie.domain): c[cookie.domain] = {}
- c2 = c[cookie.domain]
- if not c2.has_key(cookie.path): c2[cookie.path] = {}
- c3 = c2[cookie.path]
- c3[cookie.name] = cookie
- finally:
- self._cookies_lock.release()
-
- def extract_cookies(self, response, request, unverifiable=False):
- """Extract cookies from response, where allowable given the request.
-
- Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
- object passed as argument. Any of these headers that are found are
- used to update the state of the object (subject to the policy.set_ok
- method's approval).
-
- The response object (usually be the result of a call to
- ClientCookie.urlopen, or similar) should support an info method, which
- returns a mimetools.Message object (in fact, the 'mimetools.Message
- object' may be any object that provides a getallmatchingheaders
- method).
-
- The request object (usually a urllib2.Request instance) must support
- the methods get_full_url and get_host, as documented by urllib2, and
- the attributes headers (a mapping containing the request's HTTP
- headers) and port (the port number). The request is used to set
- default values for cookie-attributes as well as for checking that the
- cookie is OK to be set.
-
- If unverifiable is true, it will be assumed that the transaction is
- unverifiable as defined by RFC 2965, and appropriate action will be
- taken.
-
- """
- debug("extract_cookies: %s" % response.info())
- self._cookies_lock.acquire()
- self.policy._now = self._now = int(time.time())
-
- for cookie in self.make_cookies(response, request):
- if self.policy.set_ok(cookie, request, unverifiable):
- debug(" setting cookie: "+str(cookie))
- self.set_cookie(cookie)
- self._cookies_lock.release()
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Save cookies to a file.
-
- filename: name of file in which to save cookies
- ignore_discard: save even cookies set to be discarded
- ignore_expires: save even cookies that have expired
-
- The file is overwritten if it already exists, thus wiping all its
- cookies. Saved cookies can be restored later using the load or revert
- methods. If filename is not specified, self.filename is used; if
- self.filename is None, ValueError is raised.
-
- The CookieJar base class saves a sequence of "Set-Cookie3" lines.
- "Set-Cookie3" is the format used by the libwww-perl libary, not known
- to be compatible with any browser. The MozillaCookieJar subclass can
- be used to save in a format compatible with the Netscape/Mozilla
- browsers.
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- # There really isn't an LWP Cookies 2.0 format, but this indicates
- # that there is extra information in here (domain_dot and
- # port_spec) while still being compatible with libwww-perl, I hope.
- f.write("#LWP-Cookies-2.0\n")
- f.write(self.as_lwp_str(not ignore_discard, not ignore_expires))
- finally:
- f.close()
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Load cookies from a file.
-
- Old cookies are kept unless overwritten by newly loaded ones.
-
- Cookies in the file will be loaded even if they have expired or are
- marked to be discarded.
-
- If filename is not specified, self.filename is used; if self.filename
- is None, ValueError is raised. The named file must be in the format
- understood by the class, or IOError will be raised. This format will
- be identical to that written by the save method, unless the load format
- is not sufficiently well understood (as is the case for MSIECookieJar).
-
- Note for subclassers: overridden versions of this method should not
- alter the object's state other than by calling self.set_cookie.
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename)
- try:
- self._really_load(f, filename, ignore_discard, ignore_expires)
- finally:
- f.close()
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- magic = f.readline()
- if not re.search(self.magic_re, magic):
- msg = "%s does not seem to contain cookies" % filename
- raise IOError(msg)
-
- now = time.time()
-
- header = "Set-Cookie3:"
- boolean_attrs = ("port_spec", "path_spec", "domain_dot",
- "secure", "discard")
- value_attrs = ("version",
- "port", "path", "domain",
- "expires",
- "comment", "commenturl")
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
- if not startswith(line, header):
- continue
- line = string.strip(line[len(header):])
-
- for data in split_header_words([line]):
- name, value = data[0]
- # name and value are an exception here, since a plain "foo"
- # (with no "=", unlike "bar=foo") means a cookie with no
- # name and value "foo". With all other cookie-attributes,
- # the situation is reversed: "foo" means an attribute named
- # "foo" with no value!
- if value is None:
- name, value = value, name
- standard = {}
- rest = {}
- for k in boolean_attrs:
- standard[k] = False
- for k, v in data[1:]:
- if k is not None:
- lc = string.lower(k)
- else:
- lc = None
- # don't lose case distinction for unknown fields
- if (lc in value_attrs) or (lc in boolean_attrs):
- k = lc
- if k in boolean_attrs:
- if v is None: v = True
- standard[k] = v
- elif k in value_attrs:
- standard[k] = v
- else:
- rest[k] = v
-
- h = standard.get
- expires = h("expires")
- discard = h("discard")
- if expires is not None:
- expires = iso2time(expires)
- if expires is None:
- discard = True
- domain = h("domain")
- domain_specified = startswith(domain, ".")
- c = Cookie(h("version"), name, value,
- h("port"), h("port_spec"),
- domain, domain_specified, h("domain_dot"),
- h("path"), h("path_spec"),
- h("secure"),
- expires,
- discard,
- h("comment"),
- h("commenturl"),
- rest)
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
- except:
- reraise_unmasked_exceptions((IOError,))
- raise IOError("invalid Set-Cookie3 format file %s" % filename)
-
- def revert(self, filename=None,
- ignore_discard=False, ignore_expires=False):
- """Clear all cookies and reload cookies from a saved file.
-
- Raises IOError if reversion is not successful; the object's state will
- not be altered if this happens.
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- self._cookies_lock.acquire()
-
- old_state = copy.deepcopy(self.cookies)
- self.cookies = {}
- try:
- self.load(filename, ignore_discard, ignore_expires)
- except IOError:
- self.cookies = old_state
- raise
-
- self._cookies_lock.release()
-
- def clear(self, domain=None, path=None, name=None):
- """Clear some cookies.
-
- Invoking this method without arguments will clear all cookies. If
- given a single argument, only cookies belonging to that domain will be
- removed. If given two arguments, cookies belonging to the specified
- path within that domain are removed. If given three arguments, then
- the cookie with the specified name, path and domain is removed.
-
- Raises KeyError if no matching cookie exists.
-
- """
- if name is not None:
- if (domain is None) or (path is None):
- raise ValueError(
- "domain and path must be given to remove a cookie by name")
- del self.cookies[domain][path][name]
- elif path is not None:
- if domain is None:
- raise ValueError(
- "domain must be given to remove cookies by path")
- del self.cookies[domain][path]
- elif domain is not None:
- del self.cookies[domain]
- else:
- self.cookies = {}
-
- def clear_session_cookies(self):
- """Discard all session cookies.
-
- Discards all cookies held by object which had either no Max-Age or
- Expires cookie-attribute or an explicit Discard cookie-attribute, or
- which otherwise have ended up with a true discard attribute. For
- interactive browsers, the end of a session usually corresponds to
- closing the browser window.
-
- Note that the save method won't save session cookies anyway, unless you
- ask otherwise by passing a true ignore_discard argument.
-
- """
- self._cookies_lock.acquire()
- for cookie in self:
- if cookie.discard:
- del self.cookies[cookie.domain][cookie.path][cookie.name]
- self._cookies_lock.release()
-
- def clear_expired_cookies(self):
- """Discard all expired cookies.
-
- You probably don't need to call this method: expired cookies are never
- sent back to the server (provided you're using DefaultCookiePolicy),
- this method is called by CookieJar itself every so often, and the save
- method won't save expired cookies anyway (unless you ask otherwise by
- passing a true ignore_expires argument).
-
- """
- self._cookies_lock.acquire()
- now = time.time()
- for cookie in self:
- if cookie.is_expired(now):
- del self.cookies[cookie.domain][cookie.path][cookie.name]
- self._cookies_lock.release()
-
- def __getitem__(self, i):
- if i == 0:
- self._getitem_iterator = self.__iter__()
- elif self._prev_getitem_index != i-1: raise IndexError(
- "CookieJar.__getitem__ only supports sequential iteration")
- self._prev_getitem_index = i
- try:
- return self._getitem_iterator.next()
- except StopIteration:
- raise IndexError()
-
- def __iter__(self):
- return MappingIterator(self.cookies)
-
- def __len__(self):
- """Return number of contained cookies."""
- i = 0
- for cookie in self: i = i + 1
- return i
-
- def __repr__(self):
- r = []
- for cookie in self: r.append(repr(cookie))
- return "<%s[%s]>" % (self.__class__, string.join(r, ", "))
-
- def __str__(self):
- r = []
- for cookie in self: r.append(str(cookie))
- return "<%s[%s]>" % (self.__class__, string.join(r, ", "))
-
- def as_lwp_str(self, skip_discard=False, skip_expired=False):
- """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
-
- If skip_discard is true, it will not return lines for cookies with the
- Discard cookie-attribute.
-
- """
- now = time.time()
- r = []
- for cookie in self:
- if skip_discard and cookie.discard:
- continue
- if skip_expired and cookie.is_expired(now):
- continue
- r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
- return string.join(r+[""], "\n")
diff --git a/tools/bug_tool/ClientCookie/_Debug.py b/tools/bug_tool/ClientCookie/_Debug.py
deleted file mode 100644
index 17f050e252..0000000000
--- a/tools/bug_tool/ClientCookie/_Debug.py
+++ /dev/null
@@ -1,9 +0,0 @@
-import ClientCookie
-
-def debug(text):
- if ClientCookie.CLIENTCOOKIE_DEBUG: _debug(text)
-
-def _debug(text, *args):
- if args:
- text = text % args
- ClientCookie.DEBUG_STREAM.write(text+"\n")
diff --git a/tools/bug_tool/ClientCookie/_HeadersUtil.py b/tools/bug_tool/ClientCookie/_HeadersUtil.py
deleted file mode 100644
index da7852c4e0..0000000000
--- a/tools/bug_tool/ClientCookie/_HeadersUtil.py
+++ /dev/null
@@ -1,224 +0,0 @@
-"""HTTP header value parsing utility functions.
-
-from ClientCookie._HeadersUtil import split_header_words
-values = split_header_words(h.headers["Content-Type"])
-
-This module provides a few functions that help parsing and construction of
-valid HTTP header values.
-
-
-Copyright 1997-1998, Gisle Aas
-Copyright 2002-2003, John J. Lee
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-import re, string
-from types import StringType
-try:
- from types import UnicodeType
- STRING_TYPES = StringType, UnicodeType
-except:
- STRING_TYPES = StringType,
-
-from _Util import startswith, endswith, http2time
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-def unmatched(match):
- """Return unmatched part of re.Match object."""
- start, end = match.span(0)
- return match.string[:start]+match.string[end:]
-
-# XXX I really can't see what this =* was for (came from LWP, I guess)
-#token_re = re.compile(r"^\s*(=*[^\s=;,]+)")
-token_re = re.compile(r"^\s*([^=\s;,]+)")
-quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
-value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
-escape_re = re.compile(r"\\(.)")
-def split_header_words(header_values):
- r"""Parse header values into a list of lists containing key,value pairs.
-
- The function knows how to deal with ",", ";" and "=" as well as quoted
- values after "=". A list of space separated tokens are parsed as if they
- were separated by ";".
-
- If the header_values passed as argument contains multiple values, then they
- are treated as if they were a single value separated by comma ",".
-
- This means that this function is useful for parsing header fields that
- follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
- the requirement for tokens).
-
- headers = #header
- header = (token | parameter) *( [";"] (token | parameter))
-
- token = 1*<any CHAR except CTLs or separators>
- separators = "(" | ")" | "<" | ">" | "@"
- | "," | ";" | ":" | "\" | <">
- | "/" | "[" | "]" | "?" | "="
- | "{" | "}" | SP | HT
-
- quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
- qdtext = <any TEXT except <">>
- quoted-pair = "\" CHAR
-
- parameter = attribute "=" value
- attribute = token
- value = token | quoted-string
-
- Each header is represented by a list of key/value pairs. The value for a
- simple token (not part of a parameter) is None. Syntactically incorrect
- headers will not necessarily be parsed as you would want.
-
- This is easier to describe with some examples:
-
- >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
- [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
- >>> split_header_words(['text/html; charset="iso-8859-1"'])
- [[('text/html', None), ('charset', 'iso-8859-1')]]
- >>> split_header_words([r'Basic realm="\"foo\bar\""'])
- [[('Basic', None), ('realm', '"foobar"')]]
-
- """
- assert type(header_values) not in STRING_TYPES
- result = []
- for text in header_values:
- orig_text = text
- pairs = []
- while text:
- m = token_re.search(text)
- if m:
- text = unmatched(m)
- name = m.group(1)
- m = quoted_value_re.search(text)
- if m: # quoted value
- text = unmatched(m)
- value = m.group(1)
- value = escape_re.sub(r"\1", value)
- else:
- m = value_re.search(text)
- if m: # unquoted value
- text = unmatched(m)
- value = m.group(1)
- value = string.rstrip(value)
- else:
- # no value, a lone token
- value = None
- pairs.append((name, value))
- elif startswith(string.lstrip(text), ","):
- # concatenated headers, as per RFC 2616 section 4.2
- text = string.lstrip(text)[1:]
- if pairs: result.append(pairs)
- pairs = []
- else:
- # skip junk
- non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
- assert nr_junk_chars > 0, (
- "split_header_words bug: '%s', '%s', %s" %
- (orig_text, text, pairs))
- text = non_junk
- if pairs: result.append(pairs)
- return result
-
-join_escape_re = re.compile(r"([\"\\])")
-def join_header_words(lists):
- """Do the inverse of the conversion done by split_header_words.
-
- Takes a list of lists of (key, value) pairs and produces a single header
- value. Attribute values are quoted if needed.
-
- >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
- 'text/plain; charset="iso-8859/1"'
- >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
- 'text/plain, charset="iso-8859/1"'
-
- """
- headers = []
- for pairs in lists:
- attr = []
- for k, v in pairs:
- if v is not None:
- if not re.search(r"^\w+$", v):
- v = join_escape_re.sub(r"\\\1", v) # escape " and \
- v = '"%s"' % v
- if k is None: # Netscape cookies may have no name
- k = v
- else:
- k = "%s=%s" % (k, v)
- attr.append(k)
- if attr: headers.append(string.join(attr, "; "))
- return string.join(headers, ", ")
-
-def parse_ns_headers(ns_headers):
- """Ad-hoc parser for Netscape protocol cookie-attributes.
-
- The old Netscape cookie format for Set-Cookie can for instance contain
- an unquoted "," in the expires field, so we have to use this ad-hoc
- parser instead of split_header_words.
-
- XXX This may not make the best possible effort to parse all the crap
- that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
- parser is probably better, so could do worse than following that if
- this ever gives any trouble.
-
- Currently, this is also used for parsing RFC 2109 cookies.
-
- """
- known_attrs = ("expires", "domain", "path", "secure",
- # RFC 2109 attrs (may turn up in Netscape cookies, too)
- "port", "max-age")
-
- result = []
- for ns_header in ns_headers:
- pairs = []
- version_set = False
- for param in re.split(r";\s*", ns_header):
- param = string.rstrip(param)
- if param == "": continue
- if "=" not in param:
- if string.lower(param) in known_attrs:
- k, v = param, None
- else:
- # cookie with missing name
- k, v = None, param
- else:
- k, v = re.split(r"\s*=\s*", param, 1)
- k = string.lstrip(k)
- if k is not None:
- lc = string.lower(k)
- if lc in known_attrs:
- k = lc
- if k == "version":
- # This is an RFC 2109 cookie. Will be treated as RFC 2965
- # cookie in rest of code.
- # Probably it should be parsed with split_header_words, but
- # that's too much hassle.
- version_set = True
- if k == "expires":
- # convert expires date to seconds since epoch
- if startswith(v, '"'): v = v[1:]
- if endswith(v, '"'): v = v[:-1]
- v = http2time(v) # None if invalid
- pairs.append((k, v))
-
- if pairs:
- if not version_set:
- pairs.append(("version", "0"))
- result.append(pairs)
-
- return result
-
-
-def _test():
- import doctest, _HeadersUtil
- return doctest.testmod(_HeadersUtil)
-
-if __name__ == "__main__":
- _test()
diff --git a/tools/bug_tool/ClientCookie/_MSIECookieJar.py b/tools/bug_tool/ClientCookie/_MSIECookieJar.py
deleted file mode 100644
index 5c2d3fcf24..0000000000
--- a/tools/bug_tool/ClientCookie/_MSIECookieJar.py
+++ /dev/null
@@ -1,377 +0,0 @@
-"""Mozilla / Netscape cookie loading / saving.
-
-Copyright 1997-1999 Gisle Aas (libwww-perl)
-Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
-Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-import os, re, string, time, struct
-if os.name == "nt":
- import _winreg
-
-from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
-from _Util import startswith
-from _Debug import debug
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-
-def regload(path, leaf):
- key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0, _winreg.KEY_ALL_ACCESS)
- try:
- value = _winreg.QueryValueEx(key, leaf)[0]
- except WindowsError:
- value = None
- return value
-
-WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
-
-def epoch_time_offset_from_win32_filetime(filetime):
- """Convert from win32 filetime to seconds-since-epoch value.
-
- MSIE stores create and expire times as Win32 FILETIME, which is 64
- bits of 100 nanosecond intervals since Jan 01 1601.
-
- Cookies code expects time in 32-bit value expressed in seconds since
- the epoch (Jan 01 1970).
-
- """
- if filetime < WIN32_EPOCH:
- raise ValueError("filetime (%d) is before epoch (%d)" %
- (filetime, WIN32_EPOCH))
-
- return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
-
-def binary_to_char(c): return "%02X" % ord(c)
-def binary_to_str(d): return string.join(map(binary_to_char, list(d)), "")
-
-class MSIECookieJar(CookieJar):
- """
- This class differs from CookieJar only in the format it uses to load cookies
- from a file.
-
- MSIECookieJar can read the cookie files of Microsoft Internet Explorer
- (MSIE) for Windows, versions 5 and 6, on Windows NT and XP respectively.
- Other configurations may also work, but are untested. Saving cookies in
- MSIE format is NOT supported. If you save cookies, they'll be in the usual
- Set-Cookie3 format, which you can read back in using an instance of the
- plain old CookieJar class. Don't save using the same filename that you
- loaded cookies from, because you may succeed in clobbering your MSIE
- cookies index file!
-
- You should be able to have LWP share Internet Explorer's cookies like
- this (note you need to supply a username to load_from_registry if you're on
- Windows 9x):
-
- cookies = MSIECookieJar(delayload=1)
- # find cookies index file in registry and load cookies from it
- cookies.load_from_registry()
- opener = ClientCookie.build_opener(ClientCookie.HTTPHandler(cookies))
- response = opener.open("http://foo.bar.com/")
-
- Iterating over a delayloaded MSIECookieJar instance will not cause any
- cookies to be read from disk. To force reading of all cookies from disk,
- call read_all_cookies. Note that the following methods iterate over self:
- clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
- and as_string.
-
- Additional methods:
-
- load_from_registry(ignore_discard=False, ignore_expires=False,
- username=None)
- load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
- read_all_cookies()
-
- """
- magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
- padding = "\x0d\xf0\xad\x0b"
-
- msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
- cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
- "(.+\@[\x21-\xFF]+\.txt)")
-
- # path under HKEY_CURRENT_USER from which to get location of index.dat
- reg_path = r"software\microsoft\windows" \
- r"\currentversion\explorer\shell folders"
- reg_key = "Cookies"
-
- def __init__(self, *args, **kwargs):
- apply(CookieJar.__init__, (self, args, kwargs))
- self._delayload_domains = {}
-
- def set_cookie(self, cookie):
- if self.delayload:
- self._delayload_domain(cookie.domain)
- CookieJar.set_cookie(self, cookie)
-
- def _cookies_for_domain(self, domain, request, unverifiable):
- debug("Checking %s for cookies to return" % domain)
- if not self.policy.domain_return_ok(domain, request, unverifiable):
- return []
-
- if self.delayload:
- self._delayload_domain(domain)
-
- return CookieJar._cookies_for_domain(
- self, domain, request, unverifiable)
-
- def read_all_cookies(self):
- """Eagerly read in all cookies."""
- if self.delayload:
- for domain in self._delayload_domains.keys():
- self._delayload_domain(domain)
-
- def _delayload_domain(self, domain):
- # if necessary, lazily load cookies for this domain
- delayload_info = self._delayload_domains.get(domain)
- if delayload_info is not None:
- cookie_file, ignore_discard, ignore_expires = delayload_info
- try:
- self.load_cookie_data(cookie_file,
- ignore_discard, ignore_expires)
- except IOError:
- debug("error reading cookie file, skipping: %s" % cookie_file)
- else:
- del self._delayload_domains[domain]
-
- def _load_cookies_from_file(self, filename):
- cookies = []
-
- cookies_fh = open(filename)
-
- try:
- while 1:
- key = cookies_fh.readline()
- if key == "": break
-
- rl = cookies_fh.readline
- def getlong(rl=rl): return long(rl().rstrip())
- def getstr(rl=rl): return rl().rstrip()
-
- key = key.rstrip()
- value = getstr()
- domain_path = getstr()
- flags = getlong() # 0x2000 bit is for secure I think
- lo_expire = getlong()
- hi_expire = getlong()
- lo_create = getlong()
- hi_create = getlong()
- sep = getstr()
-
- if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
- hi_create, lo_create, sep) or (sep != "*"):
- break
-
- m = self.msie_domain_re.search(domain_path)
- if m:
- domain = m.group(1)
- path = m.group(2)
-
- cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
- "PATH": path, "FLAGS": flags, "HIXP": hi_expire,
- "LOXP": lo_expire, "HICREATE": hi_create,
- "LOCREATE": lo_create})
- finally:
- cookies_fh.close()
-
- return cookies
-
- def load_cookie_data(self, filename,
- ignore_discard=False, ignore_expires=False):
- """Load cookies from file containing actual cookie data.
-
- Old cookies are kept unless overwritten by newly loaded ones.
-
- You should not call this method if the delayload attribute is set.
-
- I think each of these files contain all cookies for one user, domain,
- and path.
-
- filename: file containing cookies -- usually found in a file like
- C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
-
- """
- now = int(time.time())
-
- cookie_data = self._load_cookies_from_file(filename)
-
- for cookie in cookie_data:
- flags = cookie["FLAGS"]
- secure = ((flags & 0x2000) != 0)
- filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
- expires = epoch_time_offset_from_win32_filetime(filetime)
- if expires < now:
- discard = True
- else:
- discard = False
- domain = cookie["DOMAIN"]
- initial_dot = startswith(domain, ".")
- if initial_dot:
- domain_specified = True
- else:
- # MSIE 5 does not record whether the domain cookie-attribute
- # was specified.
- # Assuming it wasn't is conservative, because with strict
- # domain matching this will match less frequently; with regular
- # Netscape tail-matching, this will match at exactly the same
- # times that domain_specified = True would. It also means we
- # don't have to prepend a dot to achieve consistency with our
- # own & Mozilla's domain-munging scheme.
- domain_specified = False
-
- # assume path_specified is false
- # XXX is there other stuff in here? -- eg. comment, commentURL?
- c = Cookie(0,
- cookie["KEY"], cookie["VALUE"],
- None, False,
- domain, domain_specified, initial_dot,
- cookie["PATH"], False,
- secure,
- expires,
- discard,
- None,
- None,
- {"flags": flags})
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
-
- def load_from_registry(self, ignore_discard=False, ignore_expires=False,
- username=None):
- """
- username: only required on win9x
-
- """
- cookies_dir = regload(self.reg_path, self.reg_key)
- filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
- self.load(filename, ignore_discard, ignore_expires, username)
-
- def load(self, filename, ignore_discard=False, ignore_expires=False,
- username=None):
- """Load cookies from an MSIE 'index.dat' cookies index file.
-
- filename: full path to cookie index file
- username: only required on win9x
-
- """
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- index = open(filename, "rb")
-
- try:
- self._really_load(index, filename, ignore_discard, ignore_expires,
- username)
- finally:
- index.close()
-
- def _really_load(self, index, filename, ignore_discard, ignore_expires,
- username):
- now = int(time.time())
-
- if username is None:
- username = string.lower(os.environ['USERNAME'])
-
- cookie_dir = os.path.dirname(filename)
-
- data = index.read(256)
- if len(data) != 256:
- raise IOError("%s file is too short" % filename)
-
- # Cookies' index.dat file starts with 32 bytes of signature
- # followed by an offset to the first record, stored as a little-
- # endian DWORD.
- sig, size, data = data[:32], data[32:36], data[36:]
- size = struct.unpack("<L", size)[0]
-
- # check that sig is valid
- if not self.magic_re.match(sig) or size != 0x4000:
- raise IOError("%s ['%s' %s] does not seem to contain cookies" %
- (str(filename), sig, size))
-
- # skip to start of first record
- index.seek(size, 0)
-
- sector = 128 # size of sector in bytes
-
- while 1:
- data = ""
-
- # Cookies are usually in two contiguous sectors, so read in two
- # sectors and adjust if not a Cookie.
- to_read = 2 * sector
- d = index.read(to_read)
- if len(d) != to_read:
- break
- data = data + d
-
- # Each record starts with a 4-byte signature and a count
- # (little-endian DWORD) of sectors for the record.
- sig, size, data = data[:4], data[4:8], data[8:]
- size = struct.unpack("<L", size)[0]
-
- to_read = (size - 2) * sector
-
-## from urllib import quote
-## print "data", quote(data)
-## print "sig", quote(sig)
-## print "size in sectors", size
-## print "size in bytes", size*sector
-## print "size in units of 16 bytes", (size*sector) / 16
-## print "size to read in bytes", to_read
-## print
-
- if sig != "URL ":
- assert (sig in ("HASH", "LEAK",
- self.padding, "\x00\x00\x00\x00"),
- "unrecognized MSIE index.dat record: %s" %
- binary_to_str(sig))
- if sig == "\x00\x00\x00\x00":
- # assume we've got all the cookies, and stop
- break
- if sig == self.padding:
- continue
- # skip the rest of this record
- assert to_read >= 0
- if size != 2:
- assert to_read != 0
- index.seek(to_read, 1)
- continue
-
- # read in rest of record if necessary
- if size > 2:
- more_data = index.read(to_read)
- if len(more_data) != to_read: break
- data = data + more_data
-
- cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
- "(%s\@[\x21-\xFF]+\.txt)" % username)
- m = re.search(cookie_re, data, re.I)
- if m:
- cookie_file = os.path.join(cookie_dir, m.group(2))
- if not self.delayload:
- try:
- self.load_cookie_data(cookie_file,
- ignore_discard, ignore_expires)
- except IOError:
- debug("error reading cookie file, skipping: %s" %
- cookie_file)
- else:
- domain = m.group(1)
- i = domain.find("/")
- if i != -1:
- domain = domain[:i]
-
- self._delayload_domains[domain] = (
- cookie_file, ignore_discard, ignore_expires)
diff --git a/tools/bug_tool/ClientCookie/_MozillaCookieJar.py b/tools/bug_tool/ClientCookie/_MozillaCookieJar.py
deleted file mode 100644
index 13239c3c54..0000000000
--- a/tools/bug_tool/ClientCookie/_MozillaCookieJar.py
+++ /dev/null
@@ -1,171 +0,0 @@
-"""Mozilla / Netscape cookie loading / saving.
-
-Copyright 1997-1999 Gisle Aas (libwww-perl)
-Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-import sys, re, string, time
-
-import ClientCookie
-from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
-from _Util import startswith, endswith
-from _Debug import debug
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-try: issubclass(Exception(), (Exception,))
-except TypeError:
- real_issubclass = issubclass
- from _Util import compat_issubclass
- issubclass = compat_issubclass
- del compat_issubclass
-
-
-class MozillaCookieJar(CookieJar):
- """
-
- WARNING: you may want to backup your browser's cookies file if you use
- this class to save cookies. I *think* it works, but there have been
- bugs in the past!
-
- This class differs from CookieJar only in the format it uses to save and
- load cookies to and from a file. This class uses the Netscape/Mozilla
- `cookies.txt' format.
-
- Don't expect cookies saved while the browser is running to be noticed by
- the browser (in fact, Mozilla on unix will overwrite your saved cookies if
- you change them on disk while it's running; on Windows, you probably can't
- save at all while the browser is running).
-
- Note that the Netscape/Mozilla format will downgrade RFC2965 cookies to
- Netscape cookies on saving.
-
- In particular, the cookie version and port number information is lost,
- together with information about whether or not Path, Port and Discard were
- specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
- domain as set in the HTTP header started with a dot (yes, I'm aware some
- domains in Netscape files start with a dot and some don't -- trust me, you
- really don't want to know any more about this).
-
- Note that though Mozilla and Netscape use the same format, they use
- slightly different headers. The class saves cookies using the Netscape
- header by default (Mozilla can cope with that).
-
- """
- magic_re = "#( Netscape)? HTTP Cookie File"
- header = """\
- # Netscape HTTP Cookie File
- # http://www.netscape.com/newsref/std/cookie_spec.html
- # This is a generated file! Do not edit.
-
-"""
-
- def _really_load(self, f, filename, ignore_discard, ignore_expires):
- now = time.time()
-
- magic = f.readline()
- if not re.search(self.magic_re, magic):
- f.close()
- raise IOError(
- "%s does not look like a Netscape format cookies file" %
- filename)
-
- try:
- while 1:
- line = f.readline()
- if line == "": break
-
- # last field may be absent, so keep any trailing tab
- if endswith(line, "\n"): line = line[:-1]
-
- # skip comments and blank lines XXX what is $ for?
- if (startswith(string.strip(line), "#") or
- startswith(string.strip(line), "$") or
- string.strip(line) == ""):
- continue
-
- domain, domain_specified, path, secure, expires, name, value = \
- string.split(line, "\t")
- secure = (secure == "TRUE")
- domain_specified = (domain_specified == "TRUE")
- if name == "": name = None
-
- initial_dot = startswith(domain, ".")
- assert domain_specified == initial_dot
-
- discard = False
- if expires == "":
- expires = None
- discard = True
-
- # assume path_specified is false
- c = Cookie(0, name, value,
- None, False,
- domain, domain_specified, initial_dot,
- path, False,
- secure,
- expires,
- discard,
- None,
- None,
- {})
- if not ignore_discard and c.discard:
- continue
- if not ignore_expires and c.is_expired(now):
- continue
- self.set_cookie(c)
-
- except:
- unmasked = (KeyboardInterrupt, SystemExit)
- if ClientCookie.CLIENTCOOKIE_DEBUG:
- unmasked = (Exception,)
- etype = sys.exc_info()[0]
- if issubclass(etype, IOError) or \
- issubclass(etype, unmasked):
- raise
- raise IOError("invalid Netscape format file %s: %s" %
- (filename, line))
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- if filename is None:
- if self.filename is not None: filename = self.filename
- else: raise ValueError(MISSING_FILENAME_TEXT)
-
- f = open(filename, "w")
- try:
- f.write(self.header)
- now = time.time()
- debug("Saving Netscape cookies.txt file")
- for cookie in self:
- if not ignore_discard and cookie.discard:
- debug(" Not saving %s: marked for discard" % cookie.name)
- continue
- if not ignore_expires and cookie.is_expired(now):
- debug(" Not saving %s: expired" % cookie.name)
- continue
- if cookie.secure: secure = "TRUE"
- else: secure = "FALSE"
- if startswith(cookie.domain, "."): initial_dot = "TRUE"
- else: initial_dot = "FALSE"
- if cookie.expires is not None:
- expires = str(cookie.expires)
- else:
- expires = ""
- if cookie.name is not None:
- name = cookie.name
- else:
- name = ""
- f.write(
- string.join([cookie.domain, initial_dot, cookie.path,
- secure, expires, name, cookie.value], "\t")+
- "\n")
- finally:
- f.close()
diff --git a/tools/bug_tool/ClientCookie/_Util.py b/tools/bug_tool/ClientCookie/_Util.py
deleted file mode 100644
index f4c4e37ccf..0000000000
--- a/tools/bug_tool/ClientCookie/_Util.py
+++ /dev/null
@@ -1,459 +0,0 @@
-"""Python backwards-compat., date/time routines, seekable file object wrapper.
-
- Copyright 2002-2003 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-import re, string, time
-from types import TupleType
-from StringIO import StringIO
-
-try:
- from exceptions import StopIteration
-except ImportError:
- from ClientCookie._ClientCookie import StopIteration
-
-def startswith(string, initial):
- if len(initial) > len(string): return False
- return string[:len(initial)] == initial
-
-def endswith(string, final):
- if len(final) > len(string): return False
- return string[-len(final):] == final
-
-def compat_issubclass(obj, tuple_or_class):
- # for 2.1 and below
- if type(tuple_or_class) == TupleType:
- for klass in tuple_or_class:
- if issubclass(obj, klass):
- return True
- return False
- return issubclass(obj, tuple_or_class)
-
-def isstringlike(x):
- try: x+""
- except: return False
- else: return True
-
-
-try:
- from calendar import timegm
- timegm((2045, 1, 1, 22, 23, 32)) # overflows in 2.1
-except:
- # Number of days per month (except for February in leap years)
- mdays = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
-
- # Return 1 for leap years, 0 for non-leap years
- def isleap(year):
- return year % 4 == 0 and (year % 100 <> 0 or year % 400 == 0)
-
- # Return number of leap years in range [y1, y2)
- # Assume y1 <= y2 and no funny (non-leap century) years
- def leapdays(y1, y2):
- return (y2+3)/4 - (y1+3)/4
-
- EPOCH = 1970
- def timegm(tuple):
- """Unrelated but handy function to calculate Unix timestamp from GMT."""
- year, month, day, hour, minute, second = tuple[:6]
- assert year >= EPOCH
- assert 1 <= month <= 12
- days = 365*(year-EPOCH) + leapdays(EPOCH, year)
- for i in range(1, month):
- days = days + mdays[i]
- if month > 2 and isleap(year):
- days = days + 1
- days = days + day - 1
- hours = days*24 + hour
- minutes = hours*60 + minute
- seconds = minutes*60L + second
- return seconds
-
-
-# Date/time conversion routines for formats used by the HTTP protocol.
-
-EPOCH = 1970
-def my_timegm(tt):
- year, month, mday, hour, min, sec = tt[:6]
- if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
- (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
- return timegm(tt)
- else:
- return None
-
-days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
-months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
- "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
-months_lower = []
-for month in months: months_lower.append(string.lower(month))
-
-
-def time2isoz(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
- representing Universal Time (UTC, aka GMT). An example of this format is:
-
- 1994-11-24 08:49:37Z
-
- """
- if t is None: t = time.time()
- year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
- return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
- year, mon, mday, hour, min, sec)
-
-def time2netscape(t=None):
- """Return a string representing time in seconds since epoch, t.
-
- If the function is called without an argument, it will use the current
- time.
-
- The format of the returned string is like this:
-
- Wdy, DD-Mon-YYYY HH:MM:SS GMT
-
- """
- if t is None: t = time.time()
- year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
- return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
- days[wday], mday, months[mon-1], year, hour, min, sec)
-
-
-UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
-
-timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
-def offset_from_tz_string(tz):
- offset = None
- if UTC_ZONES.has_key(tz):
- offset = 0
- else:
- m = timezone_re.search(tz)
- if m:
- offset = 3600 * int(m.group(2))
- if m.group(3):
- offset = offset + 60 * int(m.group(3))
- if m.group(1) == '-':
- offset = -offset
- return offset
-
-def _str2time(day, mon, yr, hr, min, sec, tz):
- # translate month name to number
- # month numbers start with 1 (January)
- try:
- mon = months_lower.index(string.lower(mon))+1
- except ValueError:
- # maybe it's already a number
- try:
- imon = int(mon)
- except ValueError:
- return None
- if 1 <= imon <= 12:
- mon = imon
- else:
- return None
-
- # make sure clock elements are defined
- if hr is None: hr = 0
- if min is None: min = 0
- if sec is None: sec = 0
-
- yr = int(yr)
- day = int(day)
- hr = int(hr)
- min = int(min)
- sec = int(sec)
-
- if yr < 1000:
- # find "obvious" year
- cur_yr = time.localtime(time.time())[0]
- m = cur_yr % 100
- tmp = yr
- yr = yr + cur_yr - m
- m = m - tmp
- if abs(m) > 50:
- if m > 0: yr = yr + 100
- else: yr = yr - 100
-
- # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
- t = my_timegm((yr, mon, day, hr, min, sec, tz))
-
- if t is not None:
- # adjust time using timezone string, to get absolute time since epoch
- if tz is None:
- tz = "UTC"
- tz = string.upper(tz)
- offset = offset_from_tz_string(tz)
- if offset is None:
- return None
- t = t - offset
-
- return t
-
-
-strict_re = re.compile(r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
-wkday_re = re.compile(
- r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
-loose_http_re = re.compile(
- r"""^
- (\d\d?) # day
- (?:\s+|[-\/])
- (\w+) # month
- (?:\s+|[-\/])
- (\d+) # year
- (?:
- (?:\s+|:) # separator before clock
- (\d\d?):(\d\d) # hour:min
- (?::(\d\d))? # optional seconds
- )? # optional clock
- \s*
- ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
- \s*
- (?:\(\w+\))? # ASCII representation of timezone in parens.
- \s*$""", re.X)
-def http2time(text):
- """Returns time in seconds since epoch of time represented by a string.
-
- Return value is an integer.
-
- None is returned if the format of str is unrecognized, the time is outside
- the representable range, or the timezone string is not recognized. The
- time formats recognized are the same as for parse_date. If the string
- contains no timezone, UTC is assumed.
-
- The timezone in the string may be numerical (like "-0800" or "+0100") or a
- string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
- timezone strings equivalent to UTC (zero offset) are known to the function.
-
- The function loosely parses the following formats:
-
- Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
- Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
- Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
- 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
- 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
- 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
-
- The parser ignores leading and trailing whitespace. The time may be
- absent.
-
- If the year is given with only 2 digits, then parse_date will select the
- century that makes the year closest to the current date.
-
- """
- # fast exit for strictly conforming string
- m = strict_re.search(text)
- if m:
- g = m.groups()
- mon = months_lower.index(string.lower(g[1])) + 1
- tt = (int(g[2]), mon, int(g[0]),
- int(g[3]), int(g[4]), float(g[5]))
- return my_timegm(tt)
-
- # No, we need some messy parsing...
-
- # clean up
- text = string.lstrip(text)
- text = wkday_re.sub("", text, 1) # Useless weekday
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = loose_http_re.search(text)
- if m is not None:
- day, mon, yr, hr, min, sec, tz = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
-
-
-iso_re = re.compile(
- """^
- (\d{4}) # year
- [-\/]?
- (\d\d?) # numerical month
- [-\/]?
- (\d\d?) # day
- (?:
- (?:\s+|[-:Tt]) # separator before clock
- (\d\d?):?(\d\d) # hour:min
- (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
- )? # optional clock
- \s*
- ([-+]?\d\d?:?(:?\d\d)?
- |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
- \s*$""", re.X)
-def iso2time(text):
- """
- As for httpstr2time, but parses the ISO 8601 formats:
-
- 1994-02-03 14:15:29 -0100 -- ISO 8601 format
- 1994-02-03 14:15:29 -- zone is optional
- 1994-02-03 -- only date
- 1994-02-03T14:15:29 -- Use T as separator
- 19940203T141529Z -- ISO 8601 compact format
- 19940203 -- only date
-
- """
- # clean up
- text = string.lstrip(text)
-
- # tz is time zone specifier string
- day, mon, yr, hr, min, sec, tz = [None]*7
-
- # loose regexp parse
- m = iso_re.search(text)
- if m is not None:
- # XXX there's an extra bit of the timezone I'm ignoring here: is
- # this the right thing to do?
- yr, mon, day, hr, min, sec, tz, _ = m.groups()
- else:
- return None # bad format
-
- return _str2time(day, mon, yr, hr, min, sec, tz)
-
-
-
-# XXX Andrew Dalke kindly sent me a similar class in response to my request on
-# comp.lang.python, which I then proceeded to lose. I wrote this class
-# instead, but I think he's released his code publicly since, could pinch the
-# tests from it, at least...
-class seek_wrapper:
- """Adds a seek method to a file object.
-
- This is only designed for seeking on readonly file-like objects.
-
- Wrapped file-like object must have a read method. The readline method is
- only supported if that method is present on the wrapped object. The
- readlines method is always supported. xreadlines and iteration are
- supported only for Python 2.2 and above.
-
- Public attribute: wrapped (the wrapped file object).
-
- WARNING: All other attributes of the wrapped object (ie. those that are not
- one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
- are passed through unaltered, which may or may not make sense for your
- particular file object.
-
- """
- # General strategy is to check that cache is full enough, then delegate
- # everything to the cache (self._cache, which is a StringIO.StringIO
- # instance. Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
- # get a StringOobject, with no readlines method.
-
- # Invariant: the end of the cache is always at the same place as the
- # end of the wrapped file:
- # self.wrapped.tell() == len(self._cache.getvalue())
-
- def __init__(self, wrapped):
- self.wrapped = wrapped
- self.__have_readline = hasattr(self.wrapped, "readline")
- self.__cache = StringIO()
-
- def __getattr__(self, name): return getattr(self.wrapped, name)
-
- def seek(self, offset, whence=0):
- # make sure we have read all data up to the point we are seeking to
- pos = self.__cache.tell()
- if whence == 0: # absolute
- to_read = offset - pos
- elif whence == 1: # relative to current position
- to_read = offset
- elif whence == 2: # relative to end of *wrapped* file
- # since we don't know yet where the end of that file is, we must
- # read everything
- to_read = None
- if to_read >= 0 or to_read is None:
- if to_read is None:
- self.__cache.write(self.wrapped.read())
- else:
- self.__cache.write(self.wrapped.read(to_read))
- self.__cache.seek(pos)
-
- return self.__cache.seek(offset, whence)
-
- def read(self, size=-1):
- pos = self.__cache.tell()
-
- self.__cache.seek(pos)
-
- end = len(self.__cache.getvalue())
- available = end - pos
-
- # enough data already cached?
- if size <= available and size != -1:
- return self.__cache.read(size)
-
- # no, so read sufficient data from wrapped file and cache it
- to_read = size - available
- assert to_read > 0 or size == -1
- self.__cache.seek(0, 2)
- if size == -1:
- self.__cache.write(self.wrapped.read())
- else:
- self.__cache.write(self.wrapped.read(to_read))
- self.__cache.seek(pos)
-
- return self.__cache.read(size)
-
- def readline(self, size=-1):
- if not self.__have_readline:
- raise NotImplementedError("no readline method on wrapped object")
-
- # line we're about to read might not be complete in the cache, so
- # read another line first
- pos = self.__cache.tell()
- self.__cache.seek(0, 2)
- self.__cache.write(self.wrapped.readline())
- self.__cache.seek(pos)
-
- data = self.__cache.readline()
- if size != -1:
- r = data[:size]
- self.__cache.seek(pos+size)
- else:
- r = data
- return r
-
- def readlines(self, sizehint=-1):
- pos = self.__cache.tell()
- self.__cache.seek(0, 2)
- self.__cache.write(self.wrapped.read())
- self.__cache.seek(pos)
- try:
- return self.__cache.readlines(sizehint)
- except TypeError: # 1.5.2 hack
- return self.__cache.readlines()
-
- def __iter__(self): return self
- def next(self):
- line = self.readline()
- if line == "": raise StopIteration
- return line
-
- xreadlines = __iter__
-
- def __repr__(self):
- return ("<%s at %s whose wrapped object = %s>" %
- (self.__class__.__name__, `id(self)`, `self.wrapped`))
-
- def close(self):
- self.read = None
- self.readline = None
- self.readlines = None
- self.seek = None
- if self.wrapped: self.wrapped.close()
- self.wrapped = None
diff --git a/tools/bug_tool/ClientCookie/__init__.py b/tools/bug_tool/ClientCookie/__init__.py
deleted file mode 100644
index a5d9c95f4b..0000000000
--- a/tools/bug_tool/ClientCookie/__init__.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Import names so that they can be imported directly from the package, like
-# this:
-#from ClientCookie import <whatever>
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-import sys
-
-# don't edit these here: do eg.
-# import ClientCookie; ClientCookie.HTTP_DEBUG = 1
-DEBUG_STREAM = sys.stderr
-CLIENTCOOKIE_DEBUG = False
-REDIRECT_DEBUG = False
-HTTP_DEBUG = False
-
-from _ClientCookie import VERSION, __doc__, \
- CookieJar, Cookie, \
- CookiePolicy, DefaultCookiePolicy, \
- lwp_cookie_str
-from _MozillaCookieJar import MozillaCookieJar
-from _MSIECookieJar import MSIECookieJar
-try:
- from urllib2 import AbstractHTTPHandler
-except ImportError:
- pass
-else:
- from ClientCookie._urllib2_support import \
- HTTPHandler, build_opener, install_opener, urlopen, \
- HTTPRedirectHandler
- from ClientCookie._urllib2_support import \
- OpenerDirector, BaseProcessor, \
- HTTPRequestUpgradeProcessor, \
- HTTPEquivProcessor, SeekableProcessor, HTTPCookieProcessor, \
- HTTPRefererProcessor, HTTPStandardHeadersProcessor, \
- HTTPRefreshProcessor, HTTPErrorProcessor, \
- HTTPResponseDebugProcessor
-
- import httplib
- if hasattr(httplib, 'HTTPS'):
- from ClientCookie._urllib2_support import HTTPSHandler
- del AbstractHTTPHandler, httplib
-from _Util import http2time
-str2time = http2time
-del http2time
-
-del sys
diff --git a/tools/bug_tool/ClientCookie/_urllib2_support.py b/tools/bug_tool/ClientCookie/_urllib2_support.py
deleted file mode 100644
index d767d08b25..0000000000
--- a/tools/bug_tool/ClientCookie/_urllib2_support.py
+++ /dev/null
@@ -1,713 +0,0 @@
-"""Integration with Python standard library module urllib2.
-
-Also includes a redirection bugfix, support for parsing HTML HEAD blocks for
-the META HTTP-EQUIV tag contents, and following Refresh header redirects.
-
-Copyright 2002-2003 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD License (see the file COPYING included with the
-distribution).
-
-"""
-
-import copy, time
-
-import ClientCookie
-from _ClientCookie import CookieJar, request_host
-from _Util import isstringlike
-from _Debug import _debug
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-CHUNK = 1024 # size of chunks fed to HTML HEAD parser, in bytes
-
-try:
- from urllib2 import AbstractHTTPHandler
-except ImportError:
- pass
-else:
- import urlparse, urllib2, urllib, httplib, htmllib, formatter, string
- from urllib2 import URLError, HTTPError
- import types, string, socket
- from cStringIO import StringIO
- from _Util import seek_wrapper
- try:
- import threading
- _threading = threading; del threading
- except ImportError:
- import dummy_threading
- _threading = dummy_threading; del dummy_threading
-
- # This fixes a bug in urllib2 as of Python 2.1.3 and 2.2.2
- # (http://www.python.org/sf/549151)
- # 2.2.3 is broken here (my fault!), 2.3 is fixed.
- class HTTPRedirectHandler(urllib2.BaseHandler):
- # maximum number of redirections before assuming we're in a loop
- max_redirections = 10
-
- # Implementation notes:
-
- # To avoid the server sending us into an infinite loop, the request
- # object needs to track what URLs we have already seen. Do this by
- # adding a handler-specific attribute to the Request object. The value
- # of the dict is used to count the number of times the same url has
- # been visited. This is needed because this isn't necessarily a loop:
- # there is more than one way to redirect (Refresh, 302, 303, 307).
-
- # Another handler-specific Request attribute, original_url, is used to
- # remember the URL of the original request so that it is possible to
- # decide whether or not RFC 2965 cookies should be turned on during
- # redirect.
-
- # Always unhandled redirection codes:
- # 300 Multiple Choices: should not handle this here.
- # 304 Not Modified: no need to handle here: only of interest to caches
- # that do conditional GETs
- # 305 Use Proxy: probably not worth dealing with here
- # 306 Unused: what was this for in the previous versions of protocol??
-
- def redirect_request(self, newurl, req, fp, code, msg, headers):
- """Return a Request or None in response to a redirect.
-
- This is called by the http_error_30x methods when a redirection
- response is received. If a redirection should take place, return a
- new Request to allow http_error_30x to perform the redirect;
- otherwise, return None to indicate that an HTTPError should be
- raised.
-
- """
- if code in (301, 302, 303) or (code == 307 and not req.has_data()):
- # Strictly (according to RFC 2616), 301 or 302 in response to
- # a POST MUST NOT cause a redirection without confirmation
- # from the user (of urllib2, in this case). In practice,
- # essentially all clients do redirect in this case, so we do
- # the same.
- return Request(newurl, headers=req.headers)
- else:
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
- def http_error_302(self, req, fp, code, msg, headers):
- if headers.has_key('location'):
- newurl = headers['location']
- elif headers.has_key('uri'):
- newurl = headers['uri']
- else:
- return
- newurl = urlparse.urljoin(req.get_full_url(), newurl)
-
- # XXX Probably want to forget about the state of the current
- # request, although that might interact poorly with other
- # handlers that also use handler-specific request attributes
- new = self.redirect_request(newurl, req, fp, code, msg, headers)
- if new is None:
- return
-
- # remember where we started from
- if hasattr(req, "original_url"):
- new.original_url = req.original_url
- else:
- new.original_url = req.get_full_url()
-
- # loop detection
- # .error_302_dict[(url, code)] is number of times url
- # previously visited as a result of a redirection with this
- # code (error_30x_dict would be a better name).
- new.origin_req_host = req.origin_req_host
- if not hasattr(req, 'error_302_dict'):
- new.error_302_dict = req.error_302_dict = {(newurl, code): 1}
- else:
- ed = new.error_302_dict = req.error_302_dict
- nr_visits = ed.get((newurl, code), 0)
- # Refreshes generate fake 302s, so we can hit the same URL as
- # a result of the same redirection code twice without
- # necessarily being in a loop! So, allow two visits to each
- # URL as a result of each redirection code.
- if len(ed) < self.max_redirections and nr_visits < 2:
- ed[(newurl, code)] = nr_visits + 1
- else:
- raise HTTPError(req.get_full_url(), code,
- self.inf_msg + msg, headers, fp)
-
- if ClientCookie.REDIRECT_DEBUG:
- _debug("redirecting to %s", newurl)
-
- # Don't close the fp until we are sure that we won't use it
- # with HTTPError.
- fp.read()
- fp.close()
-
- return self.parent.open(new)
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
- inf_msg = "The HTTP server returned a redirect error that would " \
- "lead to an infinite loop.\n" \
- "The last 30x error message was:\n"
-
-
- class Request(urllib2.Request):
- def __init__(self, url, data=None, headers={}):
- urllib2.Request.__init__(self, url, data, headers)
- self.unredirected_hdrs = {}
-
- def add_unredirected_header(self, key, val):
- # these headers do not persist from one request to the next in a chain
- # of requests
- self.unredirected_hdrs[string.capitalize(key)] = val
-
- def has_key(self, header_name):
- if (self.headers.has_key(header_name) or
- self.unredirected_hdrs.has_key(header_name)):
- return True
- return False
-
- def get(self, header_name, failobj=None):
- if self.headers.has_key(header_name):
- return self.headers[header_name]
- if self.unredirected_headers.has_key(header_name):
- return self.unredirected_headers[header_name]
- return failobj
-
-
- class BaseProcessor:
- processor_order = 500
-
- def add_parent(self, parent):
- self.parent = parent
- def close(self):
- self.parent = None
- def __lt__(self, other):
- if not hasattr(other, "processor_order"):
- return True
- return self.processor_order < other.processor_order
-
- class HTTPRequestUpgradeProcessor(BaseProcessor):
- # upgrade Request to class with support for headers that don't get
- # redirected
- processor_order = 0 # before anything else
-
- def http_request(self, request):
- if not hasattr(request, "add_unredirected_header"):
- request = Request(request._Request__original, request.data,
- request.headers)
- return request
-
- https_request = http_request
-
- class HTTPEquivProcessor(BaseProcessor):
- """Append META HTTP-EQUIV headers to regular HTTP headers."""
- def http_response(self, request, response):
- if not hasattr(response, "seek"):
- response = seek_wrapper(response)
- # grab HTTP-EQUIV headers and add them to the true HTTP headers
- headers = response.info()
- for hdr, val in parse_head(response):
- headers[hdr] = val
- response.seek(0)
- return response
-
- https_response = http_response
-
- # XXX ATM this only takes notice of http responses -- probably
- # should be independent of protocol scheme (http, ftp, etc.)
- class SeekableProcessor(BaseProcessor):
- """Make responses seekable."""
-
- def http_response(self, request, response):
- if not hasattr(response, "seek"):
- return seek_wrapper(response)
- return response
-
- https_response = http_response
-
- # XXX if this gets added to urllib2, unverifiable would end up as an
- # attribute on Request.
- class HTTPCookieProcessor(BaseProcessor):
- """Handle HTTP cookies."""
- def __init__(self, cookies=None):
- if cookies is None:
- cookies = CookieJar()
- self.cookies = cookies
-
- def _unverifiable(self, request):
- if hasattr(request, "error_302_dict") and request.error_302_dict:
- redirect = True
- else:
- redirect = False
- if (redirect or
- (hasattr(request, "unverifiable") and request.unverifiable)):
- unverifiable = True
- else:
- unverifiable = False
- return unverifiable
-
- def http_request(self, request):
- unverifiable = self._unverifiable(request)
- if not unverifiable:
- # Stuff request-host of this origin transaction into Request
- # object, because we need to know it to know whether cookies
- # should be in operation during derived requests (redirects,
- # specifically -- including refreshes).
- request.origin_req_host = request_host(request)
- self.cookies.add_cookie_header(request, unverifiable)
- return request
-
- def http_response(self, request, response):
- unverifiable = self._unverifiable(request)
- self.cookies.extract_cookies(response, request, unverifiable)
- return response
-
- https_request = http_request
- https_response = http_response
-
- class HTTPRefererProcessor(BaseProcessor):
- """Add Referer header to requests.
-
- This only makes sense if you use each RefererProcessor for a single
- chain of requests only (so, for example, if you use a single
- HTTPRefererProcessor to fetch a series of URLs extracted from a single
- page, this will break).
-
- """
- def __init__(self):
- self.referer = None
-
- def http_request(self, request):
- if ((self.referer is not None) and
- not request.has_key("Referer")):
- request.add_unredirected_header("Referer", self.referer)
- return request
-
- def http_response(self, request, response):
- self.referer = response.geturl()
- return response
-
- https_request = http_request
- https_response = http_response
-
- class HTTPStandardHeadersProcessor(BaseProcessor):
- def http_request(self, request):
- host = request.get_host()
- if not host:
- raise URLError('no host given')
-
- if request.has_data(): # POST
- data = request.get_data()
- if not request.has_key('Content-type'):
- request.add_unredirected_header(
- 'Content-type',
- 'application/x-www-form-urlencoded')
- if not request.has_key('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
-
- scheme, sel = urllib.splittype(request.get_selector())
- sel_host, sel_path = urllib.splithost(sel)
- if not request.has_key('Host'):
- request.add_unredirected_header('Host', sel_host or host)
- for name, value in self.parent.addheaders:
- name = string.capitalize(name)
- if not request.has_key(name):
- request.add_unredirected_header(name, value)
-
- return request
-
- https_request = http_request
-
- class HTTPResponseDebugProcessor(BaseProcessor):
- processor_order = 900 # before redirections, after everything else
-
- def http_response(self, request, response):
- if not hasattr(response, "seek"):
- response = seek_wrapper(response)
- _debug(response.read())
- _debug("*****************************************************")
- response.seek(0)
- return response
-
- https_response = http_response
-
- class HTTPRefreshProcessor(BaseProcessor):
- """Perform HTTP Refresh redirections.
-
- Note that if a non-200 HTTP code has occurred (for example, a 30x
- redirect), this processor will do nothing.
-
- By default, only zero-time Refresh headers are redirected. Use the
- max_time constructor argument to allow Refresh with longer pauses.
- Use the honor_time argument to control whether the requested pause
- is honoured (with a time.sleep()) or skipped in favour of immediate
- redirection.
-
- """
- processor_order = 1000
-
- def __init__(self, max_time=0, honor_time=True):
- self.max_time = max_time
- self.honor_time = honor_time
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- if code == 200 and hdrs.has_key("refresh"):
- refresh = hdrs["refresh"]
- i = string.find(refresh, ";")
- if i != -1:
- pause, newurl_spec = refresh[:i], refresh[i+1:]
- i = string.find(newurl_spec, "=")
- if i != -1:
- pause = int(pause)
- if pause <= self.max_time:
- if pause != 0 and self.honor_time:
- time.sleep(pause)
- newurl = newurl_spec[i+1:]
- # fake a 302 response
- hdrs["location"] = newurl
- response = self.parent.error(
- 'http', request, response, 302, msg, hdrs)
-
- return response
-
- https_response = http_response
-
- class HTTPErrorProcessor(BaseProcessor):
- """Process non-200 HTTP error responses.
-
- This just passes the job on to the Handler.<proto>_error_<code>
- methods, via the OpenerDirector.error method.
-
- """
- processor_order = 1000
-
- def http_response(self, request, response):
- code, msg, hdrs = response.code, response.msg, response.info()
-
- if code != 200:
- response = self.parent.error(
- 'http', request, response, code, msg, hdrs)
-
- return response
-
- https_response = http_response
-
-
- class OpenerDirector(urllib2.OpenerDirector):
- # XXX might be useful to have remove_processor, too (say you want to
- # set a new RefererProcessor, but keep the old CookieProcessor --
- # could always just create everything anew, though (using old
- # CookieJar object to create CookieProcessor)
- def __init__(self):
- urllib2.OpenerDirector.__init__(self)
- #self.processors = []
- self.process_response = {}
- self.process_request = {}
-
- def add_handler(self, handler):
- # XXX
- # tidy me
- # the same handler could be added twice without detection
- added = 0
- for meth in dir(handler.__class__):
- if meth[-5:] == '_open':
- protocol = meth[:-5]
- if self.handle_open.has_key(protocol):
- self.handle_open[protocol].append(handler)
- self.handle_open[protocol].sort()
- else:
- self.handle_open[protocol] = [handler]
- added = 1
- continue
- i = string.find(meth, '_')
- j = string.find(meth[i+1:], '_') + i + 1
- if j != -1 and meth[i+1:j] == 'error':
- proto = meth[:i]
- kind = meth[j+1:]
- try:
- kind = int(kind)
- except ValueError:
- pass
- dict = self.handle_error.get(proto, {})
- if dict.has_key(kind):
- dict[kind].append(handler)
- dict[kind].sort()
- else:
- dict[kind] = [handler]
- self.handle_error[proto] = dict
- added = 1
- continue
- if meth[-9:] == "_response":
- protocol = meth[:-9]
- if self.process_response.has_key(protocol):
- self.process_response[protocol].append(handler)
- self.process_response[protocol].sort()
- else:
- self.process_response[protocol] = [handler]
- added = True
- continue
- elif meth[-8:] == "_request":
- protocol = meth[:-8]
- if self.process_request.has_key(protocol):
- self.process_request[protocol].append(handler)
- self.process_request[protocol].sort()
- else:
- self.process_request[protocol] = [handler]
- added = True
- continue
- if added:
- self.handlers.append(handler)
- self.handlers.sort()
- handler.add_parent(self)
-
-## def add_processor(self, processor):
-## added = False
-## for meth in dir(processor):
-## if meth[-9:] == "_response":
-## protocol = meth[:-9]
-## if self.process_response.has_key(protocol):
-## self.process_response[protocol].append(processor)
-## self.process_response[protocol].sort()
-## else:
-## self.process_response[protocol] = [processor]
-## added = True
-## continue
-## elif meth[-8:] == "_request":
-## protocol = meth[:-8]
-## if self.process_request.has_key(protocol):
-## self.process_request[protocol].append(processor)
-## self.process_request[protocol].sort()
-## else:
-## self.process_request[protocol] = [processor]
-## added = True
-## continue
-## if added:
-## self.processors.append(processor)
-## # XXX base class sorts .handlers, but I have no idea why
-## #self.processors.sort()
-## processor.add_parent(self)
-
- def _request(self, url_or_req, data):
- if isstringlike(url_or_req):
- req = Request(url_or_req, data)
- else:
- # already a urllib2.Request instance
- req = url_or_req
- if data is not None:
- req.add_data(data)
- return req
-
- def open(self, fullurl, data=None):
- req = self._request(fullurl, data)
- type = req.get_type()
-
- # pre-process request
- # XXX should we allow a Processor to change the type (URL
- # scheme) of the request?
- meth_name = type+"_request"
- for processor in self.process_request.get(type, []):
- meth = getattr(processor, meth_name)
- req = meth(req)
-
- response = urllib2.OpenerDirector.open(self, req, data)
-
- # post-process response
- meth_name = type+"_response"
- for processor in self.process_response.get(type, []):
- meth = getattr(processor, meth_name)
- response = meth(req, response)
-
- return response
-
-## def close(self):
-## urllib2.OpenerDirector.close(self)
-## for processor in self.processors:
-## processor.close()
-## self.processors = []
-
-
- # Note the absence of redirect and header-adding code here
- # (AbstractHTTPHandler), and the lack of other clutter that would be
- # here without Processors.
- class AbstractHTTPHandler(urllib2.BaseHandler):
- def do_open(self, http_class, req):
- host = req.get_host()
- if not host:
- raise URLError('no host given')
-
- h = http_class(host) # will parse host:port
- if ClientCookie.HTTP_DEBUG:
- h.set_debuglevel(1)
-
- if req.has_data():
- h.putrequest('POST', req.get_selector())
- else:
- h.putrequest('GET', req.get_selector())
-
- for k, v in req.headers.items():
- h.putheader(k, v)
- for k, v in req.unredirected_hdrs.items():
- h.putheader(k, v)
-
- # httplib will attempt to connect() here. be prepared
- # to convert a socket error to a URLError.
- try:
- h.endheaders()
- except socket.error, err:
- raise URLError(err)
- if req.has_data():
- h.send(req.get_data())
-
- code, msg, hdrs = h.getreply()
- fp = h.getfile()
-
- response = urllib.addinfourl(fp, hdrs, req.get_full_url())
- response.code = code
- response.msg = msg
-
- return response
-
-
- # XXX would self.reset() work, instead of raising this exception?
- class EndOfHeadError(Exception): pass
- class HeadParser(htmllib.HTMLParser):
- # only these elements are allowed in or before HEAD of document
- head_elems = ("html", "head",
- "title", "base",
- "script", "style", "meta", "link", "object")
- def __init__(self):
- htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
- self.http_equiv = []
-
- def start_meta(self, attrs):
- http_equiv = content = None
- for key, value in attrs:
- if key == "http-equiv":
- http_equiv = value
- elif key == "content":
- content = value
- if http_equiv is not None:
- self.http_equiv.append((http_equiv, content))
-
- def handle_starttag(self, tag, method, attrs):
- if tag in self.head_elems:
- method(attrs)
- else:
- raise EndOfHeadError()
-
- def handle_endtag(self, tag, method):
- if tag in self.head_elems:
- method()
- else:
- raise EndOfHeadError()
-
- def end_head(self):
- raise EndOfHeadError()
-
- def parse_head(file):
- """Return a list of key, value pairs."""
- hp = HeadParser()
- while 1:
- data = file.read(CHUNK)
- try:
- hp.feed(data)
- except EndOfHeadError:
- break
- if len(data) != CHUNK:
- # this should only happen if there is no HTML body, or if
- # CHUNK is big
- break
- return hp.http_equiv
-
-
- class HTTPHandler(AbstractHTTPHandler):
- def http_open(self, req):
- return self.do_open(httplib.HTTP, req)
-
- if hasattr(httplib, 'HTTPS'):
- class HTTPSHandler(AbstractHTTPHandler):
- def https_open(self, req):
- return self.do_open(httplib.HTTPS, req)
-
-
- def build_opener(*handlers):
- """Create an opener object from a list of handlers and processors.
-
- The opener will use several default handlers and processors, including
- support for HTTP and FTP. If there is a ProxyHandler, it must be at the
- front of the list of handlers. (Yuck. This is fixed in 2.3.)
-
- If any of the handlers passed as arguments are subclasses of the
- default handlers, the default handlers will not be used.
- """
- opener = OpenerDirector()
- default_classes = [
- # handlers
- urllib2.ProxyHandler,
- urllib2.UnknownHandler,
- HTTPHandler, # from this module (derived from new AbstractHTTPHandler)
- urllib2.HTTPDefaultErrorHandler,
- HTTPRedirectHandler, # from this module (bugfixed)
- urllib2.FTPHandler,
- urllib2.FileHandler,
- # processors
- HTTPRequestUpgradeProcessor,
- #HTTPEquivProcessor,
- #SeekableProcessor,
- HTTPCookieProcessor,
- #HTTPRefererProcessor,
- HTTPStandardHeadersProcessor,
- #HTTPRefreshProcessor,
- HTTPErrorProcessor
- ]
- if hasattr(httplib, 'HTTPS'):
- default_classes.append(HTTPSHandler)
- skip = []
- for klass in default_classes:
- for check in handlers:
- if type(check) == types.ClassType:
- if issubclass(check, klass):
- skip.append(klass)
- elif type(check) == types.InstanceType:
- if isinstance(check, klass):
- skip.append(klass)
- for klass in skip:
- default_classes.remove(klass)
-
- to_add = []
- for klass in default_classes:
- to_add.append(klass())
- for h in handlers:
- if type(h) == types.ClassType:
- h = h()
- to_add.append(h)
-
- for instance in to_add:
- opener.add_handler(instance)
-## # yuck
-## if hasattr(instance, "processor_order"):
-## opener.add_processor(instance)
-## else:
-## opener.add_handler(instance)
-
- return opener
-
-
- _opener = None
- urlopen_lock = _threading.Lock()
- def urlopen(url, data=None):
- global _opener
- if _opener is None:
- urlopen_lock.acquire()
- try:
- if _opener is None:
- _opener = build_opener()
- finally:
- urlopen_lock.release()
- return _opener.open(url, data)
-
- def install_opener(opener):
- global _opener
- _opener = opener
diff --git a/tools/bug_tool/ClientForm.py b/tools/bug_tool/ClientForm.py
deleted file mode 100644
index c42f65b313..0000000000
--- a/tools/bug_tool/ClientForm.py
+++ /dev/null
@@ -1,2699 +0,0 @@
-"""HTML form handling for web clients.
-
-ClientForm is a Python module for handling HTML forms on the client
-side, useful for parsing HTML forms, filling them in and returning the
-completed forms to the server. It has developed from a port of Gisle
-Aas' Perl module HTML::Form, from the libwww-perl library, but the
-interface is not the same.
-
-The most useful docstring is the one for HTMLForm.
-
-RFC 1866: HTML 2.0
-RFC 1867: Form-based File Upload in HTML
-RFC 2388: Returning Values from Forms: multipart/form-data
-HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
-HTML 4.01 Specification, W3C Recommendation 24 December 1999
-
-
-Copyright 2002-2003 John J. Lee <jjl@pobox.com>
-Copyright 1998-2000 Gisle Aas.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD License (see the file COPYING included with
-the distribution).
-
-"""
-
-# XXX
-# Treat unknown controls as text controls? (this was a recent LWP
-# HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
-# source and browser behaviour.
-# Support for list item ids. How to handle missing ids? (How do I deal
-# with duplicate OPTION labels ATM? Can't remember...)
-# Arrange things so can automatically PyPI-register with categories
-# without messing up 1.5.2 compatibility.
-# Tests need work.
-# Test single and multiple file upload some more on the web.
-# Does file upload work when name is missing? Sourceforge tracker form
-# doesn't like it. Check standards, and test with Apache. Test binary
-# upload with Apache.
-# Add label support for CHECKBOX and RADIO.
-# Better docs.
-# Deal with character sets properly. Not sure what the issues are here.
-# I don't *think* any encoding of control names, filenames or data is
-# necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
-# doesn't seem to do it.
-# Add charset parameter to Content-type headers? How to find value??
-# Get rid of MapBase, AList and MimeWriter.
-# I'm not going to fix this unless somebody tells me what real servers
-# that want this encoding actually expect: If enctype is
-# application/x-www-form-urlencoded and there's a FILE control present.
-# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
-# 17.13.2), but I send "name=" ATM. What about multiple file upload??
-# Get rid of the two type-switches (for kind and click*).
-# Remove single-selection code: can be special case of multi-selection,
-# with a few variations, I think.
-# Factor out multiple-selection list code? May not be easy. Maybe like
-# this:
-
-# ListControl
-# ^
-# | MultipleListControlMixin
-# | ^
-# SelectControl /
-# ^ /
-# \ /
-# MultiSelectControl
-
-
-# Plan
-# ----
-# Maybe a 0.2.x, cleaned up a bit and with id support for list items?
-# Not sure it's worth it, really.
-# Remove toggle methods.
-# Replace by_label with choice between value / id / label /
-# element contents (see discussion with Gisle about labels on
-# libwww-perl list).
-# ...what else?
-# Work on DOMForm.
-# XForms? Don't know if there's a need here.
-
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-try: bool
-except NameError:
- def bool(expr):
- if expr: return True
- else: return False
-
-import sys, urllib, urllib2, types, string, mimetools, copy
-from urlparse import urljoin
-from cStringIO import StringIO
-try:
- import UnicodeType
-except ImportError:
- UNICODE = False
-else:
- UNICODE = True
-
-VERSION = "0.1.13"
-
-CHUNK = 1024 # size of chunks fed to parser, in bytes
-
-# This version of urlencode is from my Python 1.5.2 back-port of the
-# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
-# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
-def urlencode(query,doseq=False,):
- """Encode a sequence of two-element tuples or dictionary into a URL query \
-string.
-
- If any values in the query arg are sequences and doseq is true, each
- sequence element is converted to a separate parameter.
-
- If the query arg is a sequence of two-element tuples, the order of the
- parameters in the output will match the order of parameters in the
- input.
- """
-
- if hasattr(query,"items"):
- # mapping objects
- query = query.items()
- else:
- # it's a bother at times that strings and string-like objects are
- # sequences...
- try:
- # non-sequence items should not work with len()
- x = len(query)
- # non-empty strings will fail this
- if len(query) and type(query[0]) != types.TupleType:
- raise TypeError()
- # zero-length sequences of all types will get here and succeed,
- # but that's a minor nit - since the original implementation
- # allowed empty dicts that type of behavior probably should be
- # preserved for consistency
- except TypeError:
- ty,va,tb = sys.exc_info()
- raise TypeError("not a valid non-string sequence or mapping "
- "object", tb)
-
- l = []
- if not doseq:
- # preserve old behavior
- for k, v in query:
- k = urllib.quote_plus(str(k))
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- for k, v in query:
- k = urllib.quote_plus(str(k))
- if type(v) == types.StringType:
- v = urllib.quote_plus(v)
- l.append(k + '=' + v)
- elif UNICODE and type(v) == types.UnicodeType:
- # is there a reasonable way to convert to ASCII?
- # encode generates a string, but "replace" or "ignore"
- # lose information and "strict" can raise UnicodeError
- v = urllib.quote_plus(v.encode("ASCII","replace"))
- l.append(k + '=' + v)
- else:
- try:
- # is this a sufficient test for sequence-ness?
- x = len(v)
- except TypeError:
- # not a sequence
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- # loop over the sequence
- for elt in v:
- l.append(k + '=' + urllib.quote_plus(str(elt)))
- return string.join(l, '&')
-
-def startswith(string, initial):
- if len(initial) > len(string): return False
- return string[:len(initial)] == initial
-
-def issequence(x):
- try:
- x[0]
- except (TypeError, KeyError):
- return False
- except IndexError:
- pass
- return True
-
-def isstringlike(x):
- try: x+""
- except: return False
- else: return True
-
-
-# XXX don't really want to drag this along (MapBase, AList, MimeWriter)
-
-class MapBase:
- """Mapping designed to be easily derived from.
-
- Subclass it and override __init__, __setitem__, __getitem__, __delitem__
- and keys. Nothing else should need to be overridden, unlike UserDict.
- This significantly simplifies dictionary-like classes.
-
- Also different from UserDict in that it has a redonly flag, and can be
- updated (and initialised) with a sequence of pairs (key, value).
-
- """
- def __init__(self, init=None):
- self._data = {}
- self.readonly = False
- if init is not None: self.update(init)
-
- def __getitem__(self, key):
- return self._data[key]
-
- def __setitem__(self, key, item):
- if not self.readonly:
- self._data[key] = item
- else:
- raise TypeError("object doesn't support item assignment")
-
- def __delitem__(self, key):
- if not self.readonly:
- del self._data[key]
- else:
- raise TypeError("object doesn't support item deletion")
-
- def keys(self):
- return self._data.keys()
-
- # now the internal workings, there should be no need to override these:
-
- def clear(self):
- for k in self.keys():
- del self[k]
-
- def __repr__(self):
- rep = []
- for k, v in self.items():
- rep.append("%s: %s" % (repr(k), repr(v)))
- return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
-
- def copy(self):
- return copy.copy(self)
-
- def __cmp__(self, dict):
- # note: return value is *not* boolean
- for k, v in self.items():
- if not (dict.has_key(k) and dict[k] == v):
- return 1 # different
- return 0 # the same
-
- def __len__(self):
- return len(self.keys())
-
- def values(self):
- r = []
- for k in self.keys():
- r.append(self[k])
- return r
-
- def items(self):
- keys = self.keys()
- vals = self.values()
- r = []
- for i in len(self):
- r.append((keys[i], vals[i]))
- return r
-
- def has_key(self, key):
- return key in self.keys()
-
- def update(self, map):
- if issequence(map) and not isstringlike(map):
- items = map
- else:
- items = map.items()
- for tup in items:
- if not isinstance(tup, TupleType):
- raise TypeError(
- "MapBase.update requires a map or a sequence of pairs")
- k, v = tup
- self[k] = v
-
- def get(self, key, failobj=None):
- if key in self.keys():
- return self[key]
- else:
- return failobj
-
- def setdefault(self, key, failobj=None):
- if not self.has_key(key):
- self[key] = failobj
- return self[key]
-
-
-class AList(MapBase):
- """Read-only ordered mapping."""
- def __init__(self, seq=[]):
- self.readonly = True
- self._inverted = False
- self._data = list(seq[:])
- self._keys = []
- self._values = []
- for key, value in seq:
- self._keys.append(key)
- self._values.append(value)
-
- def set_inverted(self, inverted):
- if (inverted and not self._inverted) or (
- not inverted and self._inverted):
- self._keys, self._values = self._values, self._keys
- if inverted: self._inverted = True
- else: self._inverted = False
-
- def __getitem__(self, key):
- try:
- i = self._keys.index(key)
- except ValueError:
- raise KeyError(key)
- return self._values[i]
-
- def __delitem__(self, key):
- try:
- i = self._keys.index[key]
- except ValueError:
- raise KeyError(key)
- del self._values[i]
-
- def keys(self): return list(self._keys[:])
- def values(self): return list(self._values[:])
- def items(self):
- data = self._data[:]
- if not self._inverted:
- return data
- else:
- newdata = []
- for k, v in data:
- newdata.append((v, k))
- return newdata
-
-
-# This cut-n-pasted MimeWriter from standard library is here so can add
-# to HTTP headers rather than message body when appropriate. It also uses
-# \r\n in place of \n. This is nasty.
-class MimeWriter:
-
- """Generic MIME writer.
-
- Methods:
-
- __init__()
- addheader()
- flushheaders()
- startbody()
- startmultipartbody()
- nextpart()
- lastpart()
-
- A MIME writer is much more primitive than a MIME parser. It
- doesn't seek around on the output file, and it doesn't use large
- amounts of buffer space, so you have to write the parts in the
- order they should occur on the output file. It does buffer the
- headers you add, allowing you to rearrange their order.
-
- General usage is:
-
- f = <open the output file>
- w = MimeWriter(f)
- ...call w.addheader(key, value) 0 or more times...
-
- followed by either:
-
- f = w.startbody(content_type)
- ...call f.write(data) for body data...
-
- or:
-
- w.startmultipartbody(subtype)
- for each part:
- subwriter = w.nextpart()
- ...use the subwriter's methods to create the subpart...
- w.lastpart()
-
- The subwriter is another MimeWriter instance, and should be
- treated in the same way as the toplevel MimeWriter. This way,
- writing recursive body parts is easy.
-
- Warning: don't forget to call lastpart()!
-
- XXX There should be more state so calls made in the wrong order
- are detected.
-
- Some special cases:
-
- - startbody() just returns the file passed to the constructor;
- but don't use this knowledge, as it may be changed.
-
- - startmultipartbody() actually returns a file as well;
- this can be used to write the initial 'if you can read this your
- mailer is not MIME-aware' message.
-
- - If you call flushheaders(), the headers accumulated so far are
- written out (and forgotten); this is useful if you don't need a
- body part at all, e.g. for a subpart of type message/rfc822
- that's (mis)used to store some header-like information.
-
- - Passing a keyword argument 'prefix=<flag>' to addheader(),
- start*body() affects where the header is inserted; 0 means
- append at the end, 1 means insert at the start; default is
- append for addheader(), but insert for start*body(), which use
- it to determine where the Content-type header goes.
-
- """
-
- def __init__(self, fp, http_hdrs=None):
- self._http_hdrs = http_hdrs
- self._fp = fp
- self._headers = []
- self._boundary = []
- self._first_part = True
-
- def addheader(self, key, value, prefix=0,
- add_to_http_hdrs=0):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- lines = string.split(value, "\r\n")
- while lines and not lines[-1]: del lines[-1]
- while lines and not lines[0]: del lines[0]
- if add_to_http_hdrs:
- value = string.join(lines, "")
- self._http_hdrs.append((key, value))
- else:
- for i in range(1, len(lines)):
- lines[i] = " " + string.strip(lines[i])
- value = string.join(lines, "\r\n") + "\r\n"
- line = key + ": " + value
- if prefix:
- self._headers.insert(0, line)
- else:
- self._headers.append(line)
-
- def flushheaders(self):
- self._fp.writelines(self._headers)
- self._headers = []
-
- def startbody(self, ctype=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- if content_type and ctype:
- for name, value in plist:
- ctype = ctype + ';\r\n %s=\"%s\"' % (name, value)
- self.addheader("Content-type", ctype, prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs)
- self.flushheaders()
- if not add_to_http_hdrs: self._fp.write("\r\n")
- self._first_part = True
- return self._fp
-
- def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- boundary = boundary or mimetools.choose_boundary()
- self._boundary.append(boundary)
- return self.startbody("multipart/" + subtype,
- [("boundary", boundary)] + plist,
- prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs,
- content_type=content_type)
-
- def nextpart(self):
- boundary = self._boundary[-1]
- if self._first_part:
- self._first_part = False
- else:
- self._fp.write("\r\n")
- self._fp.write("--" + boundary + "\r\n")
- return self.__class__(self._fp)
-
- def lastpart(self):
- if self._first_part:
- self.nextpart()
- boundary = self._boundary.pop()
- self._fp.write("\r\n--" + boundary + "--\r\n")
-
-
-class ControlNotFoundError(ValueError): pass
-class ItemNotFoundError(ValueError): pass
-class ItemCountError(ValueError): pass
-
-class ParseError(Exception): pass
-
-
-def ParseResponse(response, select_default=False, ignore_errors=False):
- """Parse HTTP response and return a list of HTMLForm instances.
-
- The return value of urllib2.urlopen can be conveniently passed to this
- function as the response parameter.
-
- ClientForm.ParseError is raised on parse errors.
-
- response: file-like object (supporting read() method) with a method
- geturl(), returning the base URI of the HTTP response
- select_default: for multiple-selection SELECT controls and RADIO controls,
- pick the first item as the default if none are selected in the HTML
- ignore_errors: don't raise ParseError, and carry on regardless if the
- parser gets confused
-
- Pass a true value for select_default if you want the behaviour specified by
- RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
- RADIO or multiple-selection SELECT control if none were selected in the
- HTML. Most browsers (including Microsoft Internet Explorer (IE) and
- Netscape Navigator) instead leave all items unselected in these cases. The
- W3C HTML 4.0 standard leaves this behaviour undefined in the case of
- multiple-selection SELECT controls, but insists that at least one RADIO
- button should be checked at all times, in contradiction to browser
- behaviour.
-
- Precisely what ignore_errors does isn't well-defined yet, so don't rely too
- much on the current behaviour -- if you want robustness, you're better off
- fixing the HTML before passing it to this function.
-
- """
- return ParseFile(response, response.geturl(), select_default)
-
-def ParseFile(file, base_uri, select_default=False, ignore_errors=False):
- """Parse HTML and return a list of HTMLForm instances.
-
- ClientForm.ParseError is raised on parse errors.
-
- file: file-like object (supporting read() method) containing HTML with zero
- or more forms to be parsed
- base_uri: the base URI of the document
-
- For the other arguments and further details, see ParseResponse.__doc__.
-
- """
- fp = _FORM_PARSER_CLASS(ignore_errors)
- while 1:
- data = file.read(CHUNK)
- fp.feed(data)
- if len(data) != CHUNK: break
- forms = []
- for (name, action, method, enctype), attrs, controls in fp.forms:
- if action is None:
- action = base_uri
- else:
- action = urljoin(base_uri, action)
- form = HTMLForm(action, method, enctype, name, attrs)
- for type, name, attr in controls:
- form.new_control(type, name, attr, select_default=select_default)
- forms.append(form)
- for form in forms:
- form.fixup()
- return forms
-
-
-class _AbstractFormParser:
- """forms attribute contains HTMLForm instances on completion."""
- # pinched (and modified) from Moshe Zadka
- def __init__(self, ignore_errors, entitydefs=None):
- if entitydefs is not None:
- self.entitydefs = entitydefs
- self._ignore_errors = ignore_errors
- self.forms = []
- self._current_form = None
- self._select = None
- self._optgroup = None
- self._option = None
- self._textarea = None
-
- def error(self, error):
- if not self._ignore_errors: raise error
-
- def start_form(self, attrs):
- if self._current_form is not None:
- self.error(ParseError("nested FORMs"))
- name = None
- action = None
- enctype = "application/x-www-form-urlencoded"
- method = "GET"
- d = {}
- for key, value in attrs:
- if key == "name":
- name = value
- elif key == "action":
- action = value
- elif key == "method":
- method = string.upper(value)
- elif key == "enctype":
- enctype = string.lower(value)
- else:
- d[key] = value
- controls = []
- self._current_form = (name, action, method, enctype), d, controls
-
- def end_form(self):
- if self._current_form is None:
- self.error(ParseError("end of FORM before start"))
- self.forms.append(self._current_form)
- self._current_form = None
-
- def start_select(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of SELECT before start of FORM"))
- if self._select is not None:
- self.error(ParseError("nested SELECTs"))
- if self._textarea is not None:
- self.error(ParseError("SELECT inside TEXTAREA"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._select = d
-
- self._append_select_control({"__select": d})
-
- def end_select(self):
- if self._current_form is None:
- self.error(ParseError("end of SELECT before start of FORM"))
- if self._select is None:
- self.error(ParseError("end of SELECT before start"))
-
- if self._option is not None:
- self._end_option()
-
- self._select = None
-
- def start_optgroup(self, attrs):
- if self._select is None:
- self.error(ParseError("OPTGROUP outside of SELECT"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._optgroup = d
-
- def end_optgroup(self):
- if self._optgroup is None:
- self.error(ParseError("end of OPTGROUP before start"))
- self._optgroup = None
-
- def _start_option(self, attrs):
- if self._select is None:
- self.error(ParseError("OPTION outside of SELECT"))
- if self._option is not None:
- self._end_option()
-
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._option = {}
- self._option.update(d)
- if (self._optgroup and self._optgroup.has_key("disabled") and
- not self._option.has_key("disabled")):
- self._option["disabled"] = None
-
- def _end_option(self):
- if self._option is None:
- self.error(ParseError("end of OPTION before start"))
-
- contents = string.strip(self._option.get("contents", ""))
- #contents = string.strip(self._option["contents"])
- self._option["contents"] = contents
- if not self._option.has_key("value"):
- self._option["value"] = contents
- if not self._option.has_key("label"):
- self._option["label"] = contents
- # stuff dict of SELECT HTML attrs into a special private key
- # (gets deleted again later)
- self._option["__select"] = self._select
- self._append_select_control(self._option)
- self._option = None
-
- def _append_select_control(self, attrs):
- controls = self._current_form[2]
- name = self._select.get("name")
- controls.append(("select", name, attrs))
-
-## def do_option(self, attrs):
-## if self._select is None:
-## self.error(ParseError("OPTION outside of SELECT"))
-## d = {}
-## for key, val in attrs:
-## d[key] = val
-
-## self._option = {}
-## self._option.update(d)
-## if (self._optgroup and self._optgroup.has_key("disabled") and
-## not self._option.has_key("disabled")):
-## self._option["disabled"] = None
-
- def start_textarea(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of TEXTAREA before start of FORM"))
- if self._textarea is not None:
- self.error(ParseError("nested TEXTAREAs"))
- if self._select is not None:
- self.error(ParseError("TEXTAREA inside SELECT"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._textarea = d
-
- def end_textarea(self):
- if self._current_form is None:
- self.error(ParseError("end of TEXTAREA before start of FORM"))
- if self._textarea is None:
- self.error(ParseError("end of TEXTAREA before start"))
- controls = self._current_form[2]
- name = self._textarea.get("name")
- controls.append(("textarea", name, self._textarea))
- self._textarea = None
-
- def handle_data(self, data):
- if self._option is not None:
- # self._option is a dictionary of the OPTION element's HTML
- # attributes, but it has two special keys, one of which is the
- # special "contents" key contains text between OPTION tags (the
- # other is the "__select" key: see the end_option method)
- map = self._option
- key = "contents"
- elif self._textarea is not None:
- map = self._textarea
- key = "value"
- else:
- return
-
- if not map.has_key(key):
- map[key] = data
- else:
- map[key] = map[key] + data
-
-## def handle_data(self, data):
-## if self._option is not None:
-## contents = string.strip(data)
-## controls = self._current_form[2]
-## if not self._option.has_key("value"):
-## self._option["value"] = contents
-## if not self._option.has_key("label"):
-## self._option["label"] = contents
-## # self._option is a dictionary of the OPTION element's HTML
-## # attributes, but it has two special keys:
-## # 1. special "contents" key contains text between OPTION tags
-## self._option["contents"] = contents
-## # 2. stuff dict of SELECT HTML attrs into a special private key
-## # (gets deleted again later)
-## self._option["__select"] = self._select
-## self._append_select_control(self._option)
-## self._option = None
-## elif self._textarea is not None:
-## #self._textarea["value"] = data
-## if self._textarea.get("value") is None:
-## self._textarea["value"] = data
-## else:
-## self._textarea["value"] = self._textarea["value"] + data
-
- def do_button(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of BUTTON before start of FORM"))
- d = {}
- d["type"] = "submit" # default
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- # we don't want to lose information, so use a type string that
- # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
- # eg. type for BUTTON/RESET is "resetbutton"
- # (type for INPUT/RESET is "reset")
- type = type+"button"
- controls.append((type, name, d))
-
- def do_input(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of INPUT before start of FORM"))
- d = {}
- d["type"] = "text" # default
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- controls.append((type, name, d))
-
- def do_isindex(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of ISINDEX before start of FORM"))
- d = {}
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- # isindex doesn't have type or name HTML attributes
- controls.append(("isindex", None, d))
-
-# use HTMLParser if we have it (it does XHTML), htmllib otherwise
-try:
- import HTMLParser
-except ImportError:
- import htmllib, formatter
- class _FormParser(_AbstractFormParser, htmllib.HTMLParser):
- # This is still here for compatibility with Python 1.5.2.
- # It doesn't do the right thing with XHTML.
- def __init__(self, ignore_errors, entitydefs=None):
- htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
- _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
-
- def do_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- _FORM_PARSER_CLASS = _FormParser
-else:
- class _XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
- # thanks to Michael Howitz for this!
- def __init__(self, ignore_errors, entitydefs=None):
- HTMLParser.HTMLParser.__init__(self)
- _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
-
- def start_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- def end_option(self):
- _AbstractFormParser._end_option(self)
-
- def handle_starttag(self, tag, attrs):
- try:
- method = getattr(self, 'start_' + tag)
- except AttributeError:
- try:
- method = getattr(self, 'do_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method(attrs)
- else:
- method(attrs)
-
- def handle_endtag(self, tag):
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method()
-
- # handle_charref, handle_entityref and default entitydefs are taken
- # from sgmllib
- def handle_charref(self, name):
- try:
- n = int(name)
- except ValueError:
- self.unknown_charref(name)
- return
- if not 0 <= n <= 255:
- self.unknown_charref(name)
- return
- self.handle_data(chr(n))
-
- # Definition of entities -- derived classes may override
- entitydefs = \
- {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
-
- def handle_entityref(self, name):
- table = self.entitydefs
- if name in table:
- self.handle_data(table[name])
- else:
- self.unknown_entityref(name)
- return
-
- # These methods would have passed through the ref intact if I'd thought
- # of it earlier, but since the old parser silently swallows unknown
- # refs, so does this new parser.
- def unknown_entityref(self, ref): pass
- def unknown_charref(self, ref): pass
-
- _FORM_PARSER_CLASS = _XHTMLCompatibleFormParser
-
-
-class Control:
- """An HTML form control.
-
- An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
- things to Control objects, and most of Control's methods are, in effect,
- documented by the HTMLForm docstrings.
-
- The Controls in an HTMLForm can be got at via the HTMLForm.find_control
- method or the HTMLForm.controls attribute.
-
- Control instances are usually constructed using the ParseFile /
- ParseResponse functions, so you can probably ignore the rest of this
- paragraph. A Control is only properly initialised after the fixup method
- has been called. In fact, this is only strictly necessary for ListControl
- instances. This is necessary because ListControls are built up from
- ListControls each containing only a single item, and their initial value(s)
- can only be known after the sequence is complete.
-
- The types and values that are acceptable for assignment to the value
- attribute are defined by subclasses.
-
- If the disabled attribute is true, this represents the state typically
- represented by browsers by `greying out' a control. If the disabled
- attribute is true, the Control will raise AttributeError if an attempt is
- made to change its value. In addition, the control will not be considered
- `successful' as defined by the W3C HTML 4 standard -- ie. it will
- contribute no data to the return value of the HTMLForm.click* methods. To
- enable a control, set the disabled attribute to a false value.
-
- If the readonly attribute is true, the Control will raise AttributeError if
- an attempt is made to change its value. To make a control writable, set
- the readonly attribute to a false value.
-
- All controls have the disabled and readonly attributes, not only those that
- may have the HTML attributes of the same names.
-
- On assignment to the value attribute, the following exceptions are raised:
- TypeError, AttributeError (if the value attribute should not be assigned
- to, because the control is disabled, for example) and ValueError.
-
- If the name or value attributes are None, or the value is an empty list, or
- if the control is disabled, the control is not successful.
-
- Public attributes:
-
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values) (readonly)
- name: name of control (readonly)
- value: current value of control (subclasses may allow a single value, a
- sequence of values, or either)
- disabled: disabled state
- readonly: readonly state
- id: value of id HTML attribute
-
- """
- def __init__(self, type, name, attrs):
- """
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values)
- name: control name
- attrs: HTML attributes of control's HTML element
-
- """
- raise NotImplementedError()
-
- def add_to_form(self, form):
- form.controls.append(self)
-
- def fixup(self):
- pass
-
- def __getattr__(self, name): raise NotImplementedError()
- def __setattr__(self, name, value): raise NotImplementedError()
-
- def pairs(self):
- """Return list of (key, value) pairs suitable for passing to urlencode.
- """
- raise NotImplementedError()
-
- def _write_mime_data(self, mw):
- """Write data for this control to a MimeWriter."""
- # called by HTMLForm
- for name, value in self.pairs():
- mw2 = mw.nextpart()
- mw2.addheader("Content-disposition",
- 'form-data; name="%s"' % name, 1)
- f = mw2.startbody(prefix=0)
- f.write(value)
-
- def __str__(self):
- raise NotImplementedError()
-
-
-#---------------------------------------------------
-class ScalarControl(Control):
- """Control whose value is not restricted to one of a prescribed set.
-
- Some ScalarControls don't accept any value attribute. Otherwise, takes a
- single value, which must be string-like.
-
- Additional read-only public attribute:
-
- attrs: dictionary mapping the names of original HTML attributes of the
- control to their values
-
- """
- def __init__(self, type, name, attrs):
- self.__dict__["type"] = string.lower(type)
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = attrs.has_key("disabled")
- self.readonly = attrs.has_key("readonly")
- self.id = attrs.get("id")
-
- self.attrs = attrs.copy()
-
- self._clicked = False
-
- def __getattr__(self, name):
- if name == "value":
- return self.__dict__["_value"]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if not isstringlike(value):
- raise TypeError("must assign a string")
- elif self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- elif self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- self.__dict__["_value"] = value
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def pairs(self):
- name = self.name
- value = self.value
- if name is None or value is None or self.disabled:
- return []
- return [(name, value)]
-
- def __str__(self):
- name = self.name
- value = self.value
- if name is None: name = "<None>"
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class TextControl(ScalarControl):
- """Textual input control.
-
- Covers:
-
- INPUT/TEXT
- INPUT/PASSWORD
- INPUT/FILE
- INPUT/HIDDEN
- TEXTAREA
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- if self.type == "hidden": self.readonly = True
- if self._value is None:
- self._value = ""
-
-
-#---------------------------------------------------
-class FileControl(ScalarControl):
- """File upload with INPUT TYPE=FILE.
-
- The value attribute of a FileControl is always None.
-
- Additional public method: add_file
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self._value = None
- self._upload_data = []
-
- def __setattr__(self, name, value):
- if name in ("value", "name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def add_file(self, file_object, content_type=None, filename=None):
- if not hasattr(file_object, "read"):
- raise TypeError("file-like object must have read method")
- if content_type is not None and not isstringlike(content_type):
- raise TypeError("content type must be None or string-like")
- if filename is not None and not isstringlike(filename):
- raise TypeError("filename must be None or string-like")
- if content_type is None:
- content_type = "application/octet-stream"
- self._upload_data.append((file_object, content_type, filename))
-
- def pairs(self):
- # XXX should it be successful even if unnamed?
- if self.name is None or self.disabled:
- return []
- return [(self.name, "")]
-
- def _write_mime_data(self, mw):
- # called by HTMLForm
- if len(self._upload_data) == 1:
- # single file
- file_object, content_type, filename = self._upload_data[0]
- mw2 = mw.nextpart()
- fn_part = filename and ('; filename="%s"' % filename) or ''
- disp = 'form-data; name="%s"%s' % (self.name, fn_part)
- mw2.addheader("Content-disposition", disp, prefix=1)
- fh = mw2.startbody(content_type, prefix=0)
- fh.write(file_object.read())
- elif len(self._upload_data) != 0:
- # multiple files
- mw2 = mw.nextpart()
- disp = 'form-data; name="%s"' % self.name
- mw2.addheader("Content-disposition", disp, prefix=1)
- fh = mw2.startmultipartbody("mixed", prefix=0)
- for file_object, content_type, filename in self._upload_data:
- mw3 = mw2.nextpart()
- fn_part = filename and ('; filename="%s"' % filename) or ''
- disp = 'file%s' % fn_part
- mw3.addheader("Content-disposition", disp, prefix=1)
- fh2 = mw3.startbody(content_type, prefix=0)
- fh2.write(file_object.read())
- mw2.lastpart()
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- if not self._upload_data:
- value = "<No files added>"
- else:
- value = []
- for file, ctype, filename in self._upload_data:
- if filename is None:
- value.append("<Unnamed file>")
- else:
- value.append(filename)
- value = string.join(value, ", ")
-
- info = []
- if self.disabled: info.append("disabled")
- if self.readonly: info.append("readonly")
- info = string.join(info, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class IsindexControl(ScalarControl):
- """ISINDEX control.
-
- ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
- part of regular HTML forms at all, and predates it. You're only allowed
- one ISINDEX per HTML document. ISINDEX and regular form submission are
- mutually exclusive -- either submit a form, or the ISINDEX.
-
- Having said this, since ISINDEX controls may appear in forms (which is
- probably bad HTML), ParseFile / ParseResponse will include them in the
- HTMLForm instances it returns. You can set the ISINDEX's value, as with
- any other control (but note that ISINDEX controls have no name, so you'll
- need to use the type argument of set_value!). When you submit the form,
- the ISINDEX will not be successful (ie., no data will get returned to the
- server as a result of its presence), unless you click on the ISINDEX
- control, in which case the ISINDEX gets submitted instead of the form:
-
- form.set_value("my isindex value", type="isindex")
- urllib2.urlopen(form.click(type="isindex"))
-
- ISINDEX elements outside of FORMs are ignored. If you want to submit one
- by hand, do it like so:
-
- url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
- result = urllib2.urlopen(url)
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- if self._value is None:
- self._value = ""
-
- def pairs(self):
- return []
-
- def _click(self, form, coord, return_type):
- # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
- # want "bar+baz".
- # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
- # deprecated in 4.01, but it should still say how to submit it).
- # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
- url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
- req_data = url, None, []
-
- if return_type == "pairs":
- return []
- elif return_type == "request_data":
- return req_data
- else:
- return urllib2.Request(url)
-
- def __str__(self):
- value = self.value
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
-
-
-#---------------------------------------------------
-class IgnoreControl(ScalarControl):
- """Control that we're not interested in.
-
- Covers:
-
- INPUT/RESET
- BUTTON/RESET
- INPUT/BUTTON
- BUTTON/BUTTON
-
- These controls are always unsuccessful, in the terminology of HTML 4 (ie.
- they never require any information to be returned to the server).
-
- BUTTON/BUTTON is used to generate events for script embedded in HTML.
-
- The value attribute of IgnoreControl is always None.
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self._value = None
-
- def __setattr__(self, name, value):
- if name == "value":
- raise AttributeError(
- "control '%s' is ignored, hence read-only" % self.name)
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
-
-#---------------------------------------------------
-class ListControl(Control):
- """Control representing a sequence of items.
-
- The value attribute of a ListControl represents the selected list items in
- the control.
-
- ListControl implements both list controls that take a single value and
- those that take multiple values.
-
- ListControls accept sequence values only. Some controls only accept
- sequences of length 0 or 1 (RADIO, and single-selection SELECT).
- In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
- and multiple-selection SELECTs (those having the "multiple" HTML attribute)
- accept sequences of any length.
-
- Note the following mistake:
-
- control.value = some_value
- assert control.value == some_value # not necessarily true
-
- The reason for this is that the value attribute always gives the list items
- in the order they were listed in the HTML.
-
- ListControl items can also be referred to by their labels instead of names.
- Use the by_label argument, and the set_value_by_label, get_value_by_label
- methods.
-
- XXX RadioControl and CheckboxControl don't implement by_label yet.
-
- Note that, rather confusingly, though SELECT controls are represented in
- HTML by SELECT elements (which contain OPTION elements, representing
- individual list items), CHECKBOXes and RADIOs are not represented by *any*
- element. Instead, those controls are represented by a collection of INPUT
- elements. For example, this is a SELECT control, named "control1":
-
- <select name="control1">
- <option>foo</option>
- <option value="1">bar</option>
- </select>
-
- and this is a CHECKBOX control, named "control2":
-
- <input type="checkbox" name="control2" value="foo" id="cbe1">
- <input type="checkbox" name="control2" value="bar" id="cbe2">
-
- The id attribute of a CHECKBOX or RADIO ListControl is always that of its
- first element (for example, "cbe1" above).
-
-
- Additional read-only public attribute: multiple.
-
-
- ListControls are built up by the parser from their component items by
- creating one ListControl per item, consolidating them into a single master
- ListControl held by the HTMLForm:
-
- -User calls form.new_control(...)
- -Form creates Control, and calls control.add_to_form(self).
- -Control looks for a Control with the same name and type in the form, and
- if it finds one, merges itself with that control by calling
- control.merge_control(self). The first Control added to the form, of a
- particular name and type, is the only one that survives in the form.
- -Form calls control.fixup for all its controls. ListControls in the form
- know they can now safely pick their default values.
-
- To create a ListControl without an HTMLForm, use:
-
- control.merge_control(new_control)
-
- """
- def __init__(self, type, name, attrs={}, select_default=False,
- called_as_base_class=False):
- """
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present
-
- """
- if not called_as_base_class:
- raise NotImplementedError()
-
- self.__dict__["type"] = string.lower(type)
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = False
- self.readonly = False
- self.id = attrs.get("id")
-
- self._attrs = attrs.copy()
- # As Controls are merged in with .merge_control(), self._attrs will
- # refer to each Control in turn -- always the most recently merged
- # control. Each merged-in Control instance corresponds to a single
- # list item: see ListControl.__doc__.
- if attrs:
- self._attrs_list = [self._attrs] # extended by .merge_control()
- self._disabled_list = [self._attrs.has_key("disabled")] # ditto
- else:
- self._attrs_list = [] # extended by .merge_control()
- self._disabled_list = [] # ditto
-
- self._select_default = select_default
- self._clicked = False
- # Some list controls can have their default set only after all items
- # are known. If so, self._value_is_set is false, and the self.fixup
- # method, called after all items have been added, sets the default.
- self._value_is_set = False
-
- def _value_from_label(self, label):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
-
- def toggle(self, name, by_label=False):
- return self._set_selected_state(name, 2, by_label)
- def set(self, selected, name, by_label=False):
- action = int(bool(selected))
- return self._set_selected_state(name, action, by_label)
-
- def _set_selected_state(self, name, action, by_label):
- """
- name: item name
- action:
- 0: clear
- 1: set
- 2: toggle
-
- """
- if not isstringlike(name):
- raise TypeError("item name must be string-like")
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError("no item named '%s'" % name)
-
- if self.multiple:
- if action == 2:
- action = not self._selected[i]
- if action and self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % name)
- self._selected[i] = bool(action)
- else:
- if action == 2:
- if self._selected == name:
- action = 0
- else:
- action = 1
- if action == 0 and self._selected == name:
- self._selected = None
- elif action == 1:
- if self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % name)
- self._selected = name
-
- def toggle_single(self, by_label=False):
- self._set_single_selected_state(2, by_label)
- def set_single(self, selected, by_label=False):
- action = int(bool(selected))
- self._set_single_selected_state(action, by_label)
-
- def _set_single_selected_state(self, action, by_label):
- if len(self._menu) != 1:
- raise ItemCountError("'%s' is not a single-item control" %
- self.name)
-
- name = self._menu[0]
- if by_label:
- name = self._value_from_label(name)
- self._set_selected_state(name, action, by_label)
-
- def get_item_disabled(self, name, by_label=False):
- """Get disabled state of named list item in a ListControl."""
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- else:
- return self._disabled_list[i]
-
- def set_item_disabled(self, disabled, name, by_label=False):
- """Set disabled state of named list item in a ListControl.
-
- disabled: boolean disabled state
-
- """
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- else:
- self._disabled_list[i] = bool(disabled)
-
- def set_all_items_disabled(self, disabled):
- """Set disabled state of all list items in a ListControl.
-
- disabled: boolean disabled state
-
- """
- for i in range(len(self._disabled_list)):
- self._disabled_list[i] = bool(disabled)
-
- def get_item_attrs(self, name, by_label=False):
- """Return dictionary of HTML attributes for a single ListControl item.
-
- The HTML element types that describe list items are: OPTION for SELECT
- controls, INPUT for the rest. These elements have HTML attributes that
- you may occasionally want to know about -- for example, the "alt" HTML
- attribute gives a text string describing the item (graphical browsers
- usually display this as a tooltip).
-
- The returned dictionary maps HTML attribute names to values. The names
- and values are taken from the original HTML.
-
- Note that for SELECT controls, the returned dictionary contains a
- special key "contents" -- see SelectControl.__doc__.
-
- """
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- return self._attrs_list[i]
-
- def add_to_form(self, form):
- try:
- control = form.find_control(self.name, self.type)
- except ControlNotFoundError:
- Control.add_to_form(self, form)
- else:
- control.merge_control(self)
-
- def merge_control(self, control):
- assert bool(control.multiple) == bool(self.multiple)
- assert isinstance(control, self.__class__)
- self._menu.extend(control._menu)
- self._attrs_list.extend(control._attrs_list)
- self._disabled_list.extend(control._disabled_list)
- if control.multiple:
- self._selected.extend(control._selected)
- else:
- if control._value_is_set:
- self._selected = control._selected
- if control._value_is_set:
- self._value_is_set = True
-
- def fixup(self):
- """
- ListControls are built up from component list items (which are also
- ListControls) during parsing. This method should be called after all
- items have been added. See ListControl.__doc__ for the reason this is
- required.
-
- """
- # Need to set default selection where no item was indicated as being
- # selected by the HTML:
-
- # CHECKBOX:
- # Nothing should be selected.
- # SELECT/single, SELECT/multiple and RADIO:
- # RFC 1866 (HTML 2.0): says first item should be selected.
- # W3C HTML 4.01 Specification: says that client behaviour is
- # undefined in this case. For RADIO, exactly one must be selected,
- # though which one is undefined.
- # Both Netscape and Microsoft Internet Explorer (IE) choose first
- # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
- # and Firebird 0.6) leave all items unselected for RADIO and
- # SELECT/multiple.
-
- # Since both Netscape and IE all choose the first item for
- # SELECT/single, we do the same. OTOH, both Netscape and IE
- # leave SELECT/multiple with nothing selected, in violation of RFC 1866
- # (but not in violation of the W3C HTML 4 standard); the same is true
- # of RADIO (which *is* in violation of the HTML 4 standard). We follow
- # RFC 1866 if the select_default attribute is set, and Netscape and IE
- # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
- # can deselect all items in a RadioControl.
-
- raise NotImplementedError()
-
- def __getattr__(self, name):
- if name == "value":
- menu = self._menu
- if self.multiple:
- values = []
- for i in range(len(menu)):
- if self._selected[i]: values.append(menu[i])
- return values
- else:
- if self._selected is None: return []
- else: return [self._selected]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- self._set_value(value)
- elif name in ("name", "type", "multiple"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def _set_value(self, value):
- if self.multiple:
- self._multiple_set_value(value)
- else:
- self._single_set_value(value)
-
- def _single_set_value(self, value):
- if value is None or isstringlike(value):
- raise TypeError("ListControl, must set a sequence")
- nr = len(value)
- if not (0 <= nr <= 1):
- raise ItemCountError("single selection list, must set sequence of "
- "length 0 or 1")
-
- if nr == 0:
- self._selected = None
- else:
- value = value[0]
- try:
- i = self._menu.index(value)
- except ValueError:
- raise ItemNotFoundError("no item named '%s'" %
- repr(value))
- if self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % value)
- self._selected = value
-
- def _multiple_set_value(self, value):
- if value is None or isstringlike(value):
- raise TypeError("ListControl, must set a sequence")
-
- selected = [False]*len(self._selected)
- menu = self._menu
- disabled_list = self._disabled_list
-
- for v in value:
- found = False
- for i in range(len(menu)):
- item_name = menu[i]
- if v == item_name:
- if disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % value)
- selected[i] = True
- found = True
- break
- if not found:
- raise ItemNotFoundError("no item named '%s'" % repr(v))
- self._selected = selected
-
- def set_value_by_label(self, value):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
- def get_value_by_label(self):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
-
- def possible_items(self, by_label=False):
- if by_label:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % self.name)
- return copy.copy(self._menu)
-
- def pairs(self):
- if self.disabled:
- return []
-
- if not self.multiple:
- name = self.name
- value = self._selected
- if name is None or value is None:
- return []
- return [(name, value)]
- else:
- control_name = self.name # usually the name HTML attribute
- pairs = []
- for i in range(len(self._menu)):
- item_name = self._menu[i] # usually the value HTML attribute
- if self._selected[i]:
- pairs.append((control_name, item_name))
- return pairs
-
- def _item_str(self, i):
- item_name = self._menu[i]
- if self.multiple:
- if self._selected[i]:
- item_name = "*"+item_name
- else:
- if self._selected == item_name:
- item_name = "*"+item_name
- if self._disabled_list[i]:
- item_name = "(%s)" % item_name
- return item_name
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- display = []
- for i in range(len(self._menu)):
- s = self._item_str(i)
- display.append(s)
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
- name, string.join(display, ", "), info)
-
-
-class RadioControl(ListControl):
- """
- Covers:
-
- INPUT/RADIO
-
- """
- def __init__(self, type, name, attrs, select_default=False):
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
- self.__dict__["multiple"] = False
- value = attrs.get("value", "on")
- self._menu = [value]
- checked = attrs.has_key("checked")
- if checked:
- self._value_is_set = True
- self._selected = value
- else:
- self._selected = None
-
- def fixup(self):
- if not self._value_is_set:
- # no item explicitly selected
- assert self._selected is None
- if self._select_default:
- self._selected = self._menu[0]
- self._value_is_set = True
-
-
-class CheckboxControl(ListControl):
- """
- Covers:
-
- INPUT/CHECKBOX
-
- """
- def __init__(self, type, name, attrs, select_default=False):
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
- self.__dict__["multiple"] = True
- value = attrs.get("value", "on")
- self._menu = [value]
- checked = attrs.has_key("checked")
- self._selected = [checked]
- self._value_is_set = True
-
- def fixup(self):
- # If no items were explicitly checked in HTML, that's how we must
- # leave it, so we have nothing to do here.
- assert self._value_is_set
-
-
-class SelectControl(ListControl):
- """
- Covers:
-
- SELECT (and OPTION)
-
- SELECT control values and labels are subject to some messy defaulting
- rules. For example, if the HTML repreentation of the control is:
-
- <SELECT name=year>
- <OPTION value=0 label="2002">current year</OPTION>
- <OPTION value=1>2001</OPTION>
- <OPTION>2000</OPTION>
- </SELECT>
-
- The items, in order, have labels "2002", "2001" and "2000", whereas their
- values are "0", "1" and "2000" respectively. Note that the value of the
- last OPTION in this example defaults to its contents, as specified by RFC
- 1866, as do the labels of the second and third OPTIONs.
-
- The purpose of these methods is that the OPTION labels are sometimes much
- more meaningful, than are the OPTION values, which can make for more
- maintainable code.
-
- Additional read-only public attribute: attrs
-
- The attrs attribute is a dictionary of the original HTML attributes of the
- SELECT element. Other ListControls do not have this attribute, because in
- other cases the control as a whole does not correspond to any single HTML
- element. The get_item_attrs method may be used as usual to get at the
- HTML attributes of the HTML elements corresponding to individual list items
- (for SELECT controls, these are OPTION elements).
-
- Another special case is that the attributes dictionaries returned by
- get_item_attrs have a special key "contents" which does not correspond to
- any real HTML attribute, but rather contains the contents of the OPTION
- element:
-
- <OPTION>this bit</OPTION>
-
- """
- # HTML attributes here are treated slightly from other list controls:
- # -The SELECT HTML attributes dictionary is stuffed into the OPTION
- # HTML attributes dictionary under the "__select" key.
- # -The content of each OPTION element is stored under the special
- # "contents" key of the dictionary.
- # After all this, the dictionary is passed to the SelectControl constructor
- # as the attrs argument, as usual. However:
- # -The first SelectControl constructed when building up a SELECT control
- # has a constructor attrs argument containing only the __select key -- so
- # this SelectControl represents an empty SELECT control.
- # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
- # the __select dictionary containing the SELECT HTML-attributes.
- def __init__(self, type, name, attrs, select_default=False):
- # fish out the SELECT HTML attributes from the OPTION HTML attributes
- # dictionary
- self.attrs = attrs["__select"].copy()
- attrs = attrs.copy()
- del attrs["__select"]
-
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
-
- self._label_map = None
- self.disabled = self.attrs.has_key("disabled")
- self.id = self.attrs.get("id")
-
- self._menu = []
- self._selected = []
- self._value_is_set = False
- if self.attrs.has_key("multiple"):
- self.__dict__["multiple"] = True
- self._selected = []
- else:
- self.__dict__["multiple"] = False
- self._selected = None
-
- if attrs: # OPTION item data was provided
- value = attrs["value"]
- self._menu.append(value)
- selected = attrs.has_key("selected")
- if selected:
- self._value_is_set = True
- if self.attrs.has_key("multiple"):
- self._selected.append(selected)
- elif selected:
- self._selected = value
-
- def _build_select_label_map(self):
- """Return an ordered mapping of labels to values.
-
- For example, if the HTML repreentation of the control is as given in
- SelectControl.__doc__, this function will return a mapping like:
-
- {"2002": "0", "2001": "1", "2000": "2000"}
-
- """
- alist = []
- for val in self._menu:
- attrs = self.get_item_attrs(val)
- alist.append((attrs["label"], val))
- return AList(alist)
-
- def _value_from_label(self, label):
- try:
- return self._label_map[label]
- except KeyError:
- raise ItemNotFoundError("no item has label '%s'" % label)
-
- def fixup(self):
- if not self._value_is_set:
- # No item explicitly selected.
- if len(self._menu) > 0:
- if self.multiple:
- if self._select_default:
- self._selected[0] = True
- else:
- assert self._selected is None
- self._selected = self._menu[0]
- self._value_is_set = True
- self._label_map = self._build_select_label_map()
-
- def possible_items(self, by_label=False):
- if not by_label:
- return copy.copy(self._menu)
- else:
- self._label_map.set_inverted(True)
- try:
- r = map(lambda v, self=self: self._label_map[v], self._menu)
- finally:
- self._label_map.set_inverted(False)
- return r
-
- def set_value_by_label(self, value):
- if isstringlike(value):
- raise TypeError("ListControl, must set a sequence, not a string")
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
-
- try:
- value = map(lambda v, self=self: self._label_map[v], value)
- except KeyError, e:
- raise ItemNotFoundError("no item has label '%s'" % e.args[0])
- self._set_value(value)
-
- def get_value_by_label(self):
- menu = self._menu
- self._label_map.set_inverted(True)
- try:
- if self.multiple:
- values = []
- for i in range(len(menu)):
- if self._selected[i]:
- values.append(self._label_map[menu[i]])
- return values
- else:
- return [self._label_map[self._selected]]
- finally:
- self._label_map.set_inverted(False)
-
-
-#---------------------------------------------------
-class SubmitControl(ScalarControl):
- """
- Covers:
-
- INPUT/SUBMIT
- BUTTON/SUBMIT
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
- # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
- # to define this.
- if self.value is None: self.value = ""
- self.readonly = True
-
- def _click(self, form, coord, return_type):
- self._clicked = coord
- r = form._switch_click(return_type)
- self._clicked = False
- return r
-
- def pairs(self):
- if not self._clicked:
- return []
- return ScalarControl.pairs(self)
-
-
-#---------------------------------------------------
-class ImageControl(SubmitControl):
- """
- Covers:
-
- INPUT/IMAGE
-
- The value attribute of an ImageControl is always None. Coordinates are
- specified using one of the HTMLForm.click* methods.
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self.__dict__["value"] = None
-
- def __setattr__(self, name, value):
- if name in ("value", "name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def pairs(self):
- clicked = self._clicked
- if self.disabled or not clicked:
- return []
- name = self.name
- if name is None: return []
- return [("%s.x" % name, str(clicked[0])),
- ("%s.y" % name, str(clicked[1]))]
-
-
-# aliases, just to make str(control) and str(form) clearer
-class PasswordControl(TextControl): pass
-class HiddenControl(TextControl): pass
-class TextareaControl(TextControl): pass
-class SubmitButtonControl(SubmitControl): pass
-
-
-def is_listcontrol(control): return isinstance(control, ListControl)
-
-
-class HTMLForm:
- """Represents a single HTML <form> ... </form> element.
-
- A form consists of a sequence of controls that usually have names, and
- which can take on various values. The values of the various types of
- controls represent variously: text, zero-, one- or many-of-many choices,
- and files to be uploaded.
-
- Forms can be filled in with data to be returned to the server, and then
- submitted, using the click method to generate a request object suitable for
- passing to urllib2.urlopen (or the click_request_data or click_pairs
- methods if you're not using urllib2).
-
- import ClientForm
- forms = ClientForm.ParseFile(html, base_uri)
- form = forms[0]
-
- form["query"] = "Python"
- form.set("lots", "nr_results")
-
- response = urllib2.urlopen(form.click())
-
- Usually, HTMLForm instances are not created directly. Instead, the
- ParseFile or ParseResponse factory functions are used. If you do construct
- HTMLForm objects yourself, however, note that an HTMLForm instance is only
- properly initialised after the fixup method has been called (ParseFile and
- ParseResponse do this for you). See ListControl.__doc__ for the reason
- this is required.
-
- Indexing a form (form["control_name"]) returns the named Control's value
- attribute. Assignment to a form index (form["control_name"] = something)
- is equivalent to assignment to the named Control's value attribute. If you
- need to be more specific than just supplying the control's name, use the
- set_value and get_value methods.
-
- ListControl values are lists of item names. The list item's name is the
- value of the corresponding HTML element's "value" attribute.
-
- Example:
-
- <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
- <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
-
- defines a CHECKBOX control with name "cheeses" which has two items, named
- "leicester" and "cheddar".
-
- Another example:
-
- <SELECT name="more_cheeses">
- <OPTION>1</OPTION>
- <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
- </SELECT>
-
- defines a SELECT control with name "more_cheeses" which has two items,
- named "1" and "2".
-
- To set, clear or toggle individual list items, use the set and toggle
- methods. To set the whole value, do as for any other control:use indexing
- or the set_/get_value methods.
-
- Example:
-
- # select *only* the item named "cheddar"
- form["cheeses"] = ["cheddar"]
- # select "cheddar", leave other items unaffected
- form.set("cheddar", "cheeses")
-
- Some controls (RADIO and SELECT without the multiple attribute) can only
- have zero or one items selected at a time. Some controls (CHECKBOX and
- SELECT with the multiple attribute) can have multiple items selected at a
- time. To set the whole value of a multiple-selection ListControl, assign a
- sequence to a form index:
-
- form["cheeses"] = ["cheddar", "leicester"]
-
- To check whether a control has an item, or whether an item is selected,
- respectively:
-
- "cheddar" in form.possible_items("cheeses")
- "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
-
- Note that some items may be disabled (see below).
-
- Note the following mistake:
-
- form[control_name] = control_value
- assert form[control_name] == control_value # not necessarily true
-
- The reason for this is that form[control_name] always gives the list items
- in the order they were listed in the HTML.
-
- List items (hence list values, too) can be referred to in terms of list
- item labels rather than list item names. Currently, this is only possible
- for SELECT controls (this is a bug). To use this feature, use the by_label
- arguments to the various HTMLForm methods. Note that it is *item* names
- (hence ListControl values also), not *control* names, that can be referred
- to by label.
-
- The question of default values of OPTION contents, labels and values is
- somewhat complicated: see SelectControl.__doc__ and
- ListControl.get_item_attrs.__doc__ if you think you need to know.
-
- Controls can be disabled or readonly. In either case, the control's value
- cannot be changed until you clear those flags (using the methods on
- HTMLForm). Disabled is the state typically represented by browsers by
- `greying out' a control. Disabled controls are not `successful' -- they
- don't cause data to get returned to the server. Readonly controls usually
- appear in browsers as read-only text boxes. Readonly controls are
- successful. List items can also be disabled. Attempts to select disabled
- items (with form[name] = value, or using the ListControl.set method, for
- example) fail. Attempts to clear disabled items are allowed.
-
- If a lot of controls are readonly, it can be useful to do this:
-
- form.set_all_readonly(False)
-
- When you want to do several things with a single control, or want to do
- less common things, like changing which controls and items are disabled,
- you can get at a particular control:
-
- control = form.find_control("cheeses")
- control.set_item_disabled(False, "gruyere")
- control.set("gruyere")
-
- Most methods on HTMLForm just delegate to the contained controls, so see
- the docstrings of the various Control classes for further documentation.
- Most of these delegating methods take name, type, kind, id and nr arguments
- to specify the control to be operated on: see
- HTMLForm.find_control.__doc__.
-
- ControlNotFoundError (subclass of ValueError) is raised if the specified
- control can't be found. This includes occasions where a non-ListControl
- is found, but the method (set, for example) requires a ListControl.
- ItemNotFoundError (subclass of ValueError) is raised if a list item can't
- be found. ItemCountError (subclass of ValueError) is raised if an attempt
- is made to select more than one item and the control doesn't allow that, or
- set/get_single are called and the control contains more than one item.
- AttributeError is raised if a control or item is readonly or disabled and
- an attempt is made to alter its value.
-
- XXX CheckBoxControl and RadioControl don't yet support item access by label
-
- Security note: Remember that any passwords you store in HTMLForm instances
- will be saved to disk in the clear if you pickle them (directly or
- indirectly). The simplest solution to this is to avoid pickling HTMLForm
- objects. You could also pickle before filling in any password, or just set
- the password to "" before pickling.
-
-
- Public attributes:
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form (None if no name was specified)
- attrs: dictionary mapping original HTML form attributes to their values
-
- controls: list of Control instances; do not alter this list
- (instead, call form.new_control to make a Control and add it to the
- form, or control.add_to_form if you already have a Control instance)
-
-
-
- Methods for form filling:
- -------------------------
-
- Most of the these methods have very similar arguments. See
- HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
- arguments. See above for a description of by_label.
-
- def find_control(self,
- name=None, type=None, kind=None, id=None, predicate=None,
- nr=None)
-
- get_value(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- set_value(value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
- set_all_readonly(readonly)
-
-
- Methods applying only to ListControls:
-
- possible_items(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
- set(selected, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- toggle(item_name,
- name=None, type=None, id=None, nr=None,
- by_label=False)
-
- set_single(selected,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- toggle_single(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
-
- Method applying only to FileControls:
-
- add_file(file_object,
- content_type="application/octet-stream", filename=None,
- name=None, id=None, nr=None)
-
-
- Methods applying only to clickable controls:
-
- click(name=None, type=None, id=None, nr=0, coord=(1,1))
- click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
- click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
-
- """
-
- type2class = {
- "text": TextControl,
- "password": PasswordControl,
- "hidden": HiddenControl,
- "textarea": TextareaControl,
-
- "isindex": IsindexControl,
-
- "file": FileControl,
-
- "button": IgnoreControl,
- "buttonbutton": IgnoreControl,
- "reset": IgnoreControl,
- "resetbutton": IgnoreControl,
-
- "submit": SubmitControl,
- "submitbutton": SubmitButtonControl,
- "image": ImageControl,
-
- "radio": RadioControl,
- "checkbox": CheckboxControl,
- "select": SelectControl,
- }
-
-#---------------------------------------------------
-# Initialisation. Use ParseResponse / ParseFile instead.
-
- def __init__(self, action, method="GET",
- enctype="application/x-www-form-urlencoded",
- name=None, attrs=None):
- """
- In the usual case, use ParseResponse (or ParseFile) to create new
- HTMLForm objects.
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form
- attrs: dictionary mapping original HTML form attributes to their values
-
- """
- self.action = action
- self.method = method
- self.enctype = enctype
- self.name = name
- if attrs is not None:
- self.attrs = attrs.copy()
- else:
- self.attrs = {}
- self.controls = []
-
- def new_control(self, type, name, attrs,
- ignore_unknown=False, select_default=False):
- """Adds a new control to the form.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- Note that controls representing lists of items are built up from
- controls holding only a single list item. See ListControl.__doc__ for
- further information.
-
- type: type of control (see Control.__doc__ for a list)
- attrs: HTML attributes of control
- ignore_unknown: if true, use a dummy Control instance for controls of
- unknown type; otherwise, raise ValueError
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present (this defaulting happens when the HTMLForm.fixup method is
- called)
-
- """
- type = string.lower(type)
- klass = self.type2class.get(type)
- if klass is None:
- if ignore_unknown:
- klass = IgnoreControl
- else:
- raise ValueError("Unknown control type '%s'" % type)
-
- a = attrs.copy()
- if issubclass(klass, ListControl):
- control = klass(type, name, a, select_default)
- else:
- control = klass(type, name, a)
- control.add_to_form(self)
-
- def fixup(self):
- """Normalise form after all controls have been added.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- This method should only be called once, after all controls have been
- added to the form.
-
- """
- for control in self.controls:
- control.fixup()
-
-#---------------------------------------------------
- def __str__(self):
- header = "%s %s %s" % (self.method, self.action, self.enctype)
- rep = [header]
- for control in self.controls:
- rep.append(" %s" % str(control))
- return "<%s>" % string.join(rep, "\n")
-
-#---------------------------------------------------
-# Form-filling methods.
-
- def __getitem__(self, name):
- return self.find_control(name).value
- def __setitem__(self, name, value):
- control = self.find_control(name)
- try:
- control.value = value
- except AttributeError, e:
- raise ValueError(str(e))
-
- def get_value(self,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Return value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name]
-
- """
- c = self.find_control(name, type, kind, id, nr=nr)
- if by_label:
- try:
- meth = c.get_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- return meth()
- else:
- return c.value
- def set_value(self, value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Set value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name] = value
-
- """
- c = self.find_control(name, type, kind, id, nr=nr)
- if by_label:
- try:
- meth = c.set_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- meth(value)
- else:
- c.value = value
-
- def set_all_readonly(self, readonly):
- for control in self.controls:
- control.readonly = bool(readonly)
-
-
-#---------------------------------------------------
-# Form-filling methods applying only to ListControls.
-
- def possible_items(self,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Return a list of all values that the specified control can take."""
- c = self._find_list_control(name, type, kind, id, nr)
- return c.possible_items(by_label)
-
- def set(self, selected, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Select / deselect named list item.
-
- selected: boolean selected state
-
- """
- self._find_list_control(name, type, kind, id, nr).set(
- selected, item_name, by_label)
- def toggle(self, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Toggle selected state of named list item."""
- self._find_list_control(name, type, kind, id, nr).toggle(
- item_name, by_label)
-
- def set_single(self, selected,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Select / deselect list item in a control having only one item.
-
- If the control has multiple list items, ItemCountError is raised.
-
- This is just a convenience method, so you don't need to know the item's
- name -- the item name in these single-item controls is usually
- something meaningless like "1" or "on".
-
- For example, if a checkbox has a single item named "on", the following
- two calls are equivalent:
-
- control.toggle("on")
- control.toggle_single()
-
- """
- self._find_list_control(name, type, kind, id, nr).set_single(
- selected, by_label)
- def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Toggle selected state of list item in control having only one item.
-
- The rest is as for HTMLForm.set_single.__doc__.
-
- """
- self._find_list_control(name, type, kind, id, nr).toggle_single(
- by_label)
-
-#---------------------------------------------------
-# Form-filling method applying only to FileControls.
-
- def add_file(self, file_object, content_type=None, filename=None,
- name=None, id=None, nr=None):
- """Add a file to be uploaded.
-
- file_object: file-like object (with read method) from which to read
- data to upload
- content_type: MIME content type of data to upload
- filename: filename to pass to server
-
- If filename is None, no filename is sent to the server.
-
- If content_type is None, the content type is guessed based on the
- filename and the data from read from the file object.
-
- XXX
- At the moment, guessed content type is always application/octet-stream.
- Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
- plain text.
-
- """
- self.find_control(name, "file", id=id, nr=nr).add_file(
- file_object, content_type, filename)
-
-#---------------------------------------------------
-# Form submission methods, applying only to clickable controls.
-
- def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
- """Return request that would result from clicking on a control.
-
- The request object is a urllib2.Request instance, which you can pass to
- urllib2.urlopen (or ClientCookie.urlopen).
-
- Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
- IMAGEs) can be clicked.
-
- Will click on the first clickable control, subject to the name, type
- and nr arguments (as for find_control). If no name, type, id or number
- is specified and there are no clickable controls, a request will be
- returned for the form in its current, un-clicked, state.
-
- IndexError is raised if any of name, type, id or nr is specified but no
- matching control is found. ValueError is raised if the HTMLForm has an
- enctype attribute that is not recognised.
-
- You can optionally specify a coordinate to click at, which only makes a
- difference if you clicked on an image.
-
- """
- return self._click(name, type, id, nr, coord, "request")
-
- def click_request_data(self,
- name=None, type=None, id=None, nr=0, coord=(1,1)):
- """As for click method, but return a tuple (url, data, headers).
-
- You can use this data to send a request to the server. This is useful
- if you're using httplib or urllib rather than urllib2. Otherwise, use
- the click method.
-
- # Untested. Have to subclass to add headers, I think -- so use urllib2
- # instead!
- import urllib
- url, data, hdrs = form.click_request_data()
- r = urllib.urlopen(url, data)
-
- # Untested. I don't know of any reason to use httplib -- you can get
- # just as much control with urllib2.
- import httplib, urlparse
- url, data, hdrs = form.click_request_data()
- tup = urlparse(url)
- host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
- conn = httplib.HTTPConnection(host)
- if data:
- httplib.request("POST", path, data, hdrs)
- else:
- httplib.request("GET", path, headers=hdrs)
- r = conn.getresponse()
-
- """
- return self._click(name, type, id, nr, coord, "request_data")
-
- def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
- """As for click_request_data, but returns a list of (key, value) pairs.
-
- You can use this list as an argument to ClientForm.urlencode. This is
- usually only useful if you're using httplib or urllib rather than
- urllib2 or ClientCookie. It may also be useful if you want to manually
- tweak the keys and/or values, but this should not be necessary.
- Otherwise, use the click method.
-
- Note that this method is only useful for forms of MIME type
- x-www-form-urlencoded. In particular, it does not return the
- information required for file upload. If you need file upload and are
- not using urllib2, use click_request_data.
-
- Also note that Python 2.0's urllib.urlencode is slightly broken: it
- only accepts a mapping, not a sequence of pairs, as an argument. This
- messes up any ordering in the argument. Use ClientForm.urlencode
- instead.
-
- """
- return self._click(name, type, id, nr, coord, "pairs")
-
-#---------------------------------------------------
-
- def find_control(self,
- name=None, type=None, kind=None, id=None, predicate=None,
- nr=None):
- """Locate some specific control within the form.
-
- At least one of the name, type, kind, predicate and nr arguments must
- be supplied. If no matching control is found, ControlNotFoundError is
- raised.
-
- If name is specified, then the control must have the indicated name.
-
- If type is specified then the control must have the specified type (in
- addition to the types possible for <input> HTML tags: "text",
- "password", "hidden", "submit", "image", "button", "radio", "checkbox",
- "file" we also have "reset", "buttonbutton", "submitbutton",
- "resetbutton", "textarea", "select" and "isindex").
-
- If kind is specified, then the control must fall into the specified
- group, each of which satisfies a particular interface. The types are
- "text", "list", "multilist", "singlelist", "clickable" and "file".
-
- If id is specified, then the control must have the indicated id.
-
- If predicate is specified, then the control must match that function.
- The predicate function is passed the control as its single argument,
- and should return a boolean value indicating whether the control
- matched.
-
- nr, if supplied, is the sequence number of the control (where 0 is the
- first). Note that control 0 is the first control matching all the
- other arguments (if supplied); it is not necessarily the first control
- in the form.
-
- """
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (predicate is None) and (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
- if nr is None: nr = 0
-
- return self._find_control(name, type, kind, id, predicate, nr)
-
-#---------------------------------------------------
-# Private methods.
-
- def _find_list_control(self,
- name=None, type=None, kind=None, id=None, nr=None):
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
- if nr is None: nr = 0
-
- return self._find_control(name, type, kind, id, is_listcontrol, nr)
-
- def _find_control(self, name, type, kind, id, predicate, nr):
- if (name is not None) and not isstringlike(name):
- raise TypeError("control name must be string-like")
- if (type is not None) and not isstringlike(type):
- raise TypeError("control type must be string-like")
- if (kind is not None) and not isstringlike(kind):
- raise TypeError("control kind must be string-like")
- if (id is not None) and not isstringlike(id):
- raise TypeError("control id must be string-like")
- if (predicate is not None) and not callable(predicate):
- raise TypeError("control predicate must be callable")
- if nr < 0: raise ValueError("control number must be a positive "
- "integer")
-
- orig_nr = nr
-
- for control in self.controls:
- if name is not None and name != control.name:
- continue
- if type is not None and type != control.type:
- continue
- if (kind is not None and
- not self._is_control_in_kind(control, kind)):
- continue
- if id is not None and id != control.id:
- continue
- if predicate and not predicate(control):
- continue
- if nr:
- nr = nr - 1
- continue
- return control
-
- description = []
- if name is not None: description.append("name '%s'" % name)
- if type is not None: description.append("type '%s'" % type)
- if kind is not None: description.append("kind '%s'" % kind)
- if id is not None: description.append("id '%s'" % id)
- if predicate is not None:
- description.append("matching predicate %s" % predicate)
- if orig_nr: description.append("nr %d" % orig_nr)
- description = string.join(description, ", ")
- raise ControlNotFoundError("no control with "+description)
-
- def _is_control_in_kind(self, control, kind):
- # XXX not OO
- if kind == "list":
- return isinstance(control, ListControl)
- elif kind == "multilist":
- return bool(isinstance(control, ListControl) and control.multiple)
- elif kind == "singlelist":
- return bool(isinstance(control, ListControl) and
- not control.multiple)
- elif kind == "file":
- return isinstance(control, FileControl)
- elif kind == "text":
- return isinstance(control, TextControl)
- elif kind == "clickable":
- return (isinstance(control, SubmitControl) or
- isinstance(control, IsindexControl))
- else:
- raise ValueError("no such control kind '%s'" % kind)
-
- def _click(self, name, type, id, nr, coord, return_type):
- try:
- control = self._find_control(name, type, "clickable", id, None, nr)
- except ControlNotFoundError:
- if ((name is not None) or (type is not None) or (id is not None) or
- (nr != 0)):
- raise
- # no clickable controls, but no control was explicitly requested,
- # so return state without clicking any control
- return self._switch_click(return_type)
- else:
- return control._click(self, coord, return_type)
-
- def _pairs(self):
- """Return sequence of (key, value) pairs suitable for urlencoding."""
- pairs = []
- for control in self.controls:
- pairs.extend(control.pairs())
- return pairs
-
- def _request_data(self):
- """Return a tuple (url, data, headers)."""
- method = string.upper(self.method)
- if method == "GET":
- if self.enctype != "application/x-www-form-urlencoded":
- raise ValueError(
- "unknown GET form encoding type '%s'" % self.enctype)
- uri = "%s?%s" % (self.action, urlencode(self._pairs()))
- return uri, None, []
- elif method == "POST":
- if self.enctype == "application/x-www-form-urlencoded":
- return (self.action, urlencode(self._pairs()),
- [("Content-type", self.enctype)])
- elif self.enctype == "multipart/form-data":
- data = StringIO()
- http_hdrs = []
- mw = MimeWriter(data, http_hdrs)
- f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
- prefix=0)
- for control in self.controls:
- control._write_mime_data(mw)
- mw.lastpart()
- return self.action, data.getvalue(), http_hdrs
- else:
- raise ValueError(
- "unknown POST form encoding type '%s'" % self.enctype)
- else:
- raise ValueError("Unknown method '%s'" % method)
-
- def _switch_click(self, return_type):
- # This is called by HTMLForm and clickable Controls to hide switching
- # on return_type.
- # XXX
- # not OO
- # duplicated in IsindexControl._click
- if return_type == "pairs":
- return self._pairs()
- elif return_type == "request_data":
- return self._request_data()
- else:
- req_data = self._request_data()
- req = urllib2.Request(req_data[0], req_data[1])
- for key, val in req_data[2]:
- req.add_header(key, val)
- return req
diff --git a/tools/bug_tool/ardour_bugs b/tools/bug_tool/ardour_bugs
deleted file mode 100755
index c855c21513..0000000000
--- a/tools/bug_tool/ardour_bugs
+++ /dev/null
@@ -1,349 +0,0 @@
-#! /usr/bin/python
-
-# By Taybin Rutkin
-#
-# TODO
-# look along $PATH to find binary.
-# Use ardour binary to get version info
-# hide file information from reporters
-
-# standard
-import os
-import Queue
-import re
-import shutil
-import string
-import sys
-import tempfile
-import threading
-import warnings
-
-# probably installed
-import pygtk
-pygtk.require("2.0")
-import gtk, gnome.ui
-
-# we provide ClientForm and ClientCookie in $prefix/share/ardour/
-sys.path.append('/usr/local/share/ardour/')
-sys.path.append('/opt/local/share/ardour/')
-sys.path.append('/opt/share/ardour/')
-sys.path.append('/usr/share/ardour/')
-
-# probably not installed
-import ClientForm
-import ClientCookie
-
-warnings.filterwarnings('ignore', message="tempnam is a potential security risk to your program")
-g_name = os.tempnam('/tmp', 'bugrp')
-os.mkdir(g_name)
-
-class NoBlock(threading.Thread):
- def __init__(self, data, queue):
- threading.Thread.__init__(self)
- self.data = data
- self.queue = queue
-
- def zip(self):
- self.queue.put('zipping')
- os.system('tar cvfz ' + g_name + '.tar.gz ' + g_name + ' > /dev/null')
-
- def login(self):
- self.queue.put('logging in')
- response = ClientCookie.urlopen('http://ardour.org/mantis/login.php?username=bug_tool&password=bug_tool')
- print response
- response.close()
-
- def get_form(self):
- self.queue.put('forming data')
- response = ClientCookie.urlopen('http://ardour.org/mantis/bug_report_page.php')
- print response
- forms = ClientForm.ParseResponse(response)
- self.form = forms[2]
-
- def upload(self):
- self.queue.put('uploading')
- self.form.add_file(open(g_name+'.tar.gz'), 'application/x-gzip', 'system-info.tar.gz')
- self.form['description'] = self.data['long']
- self.form['summary'] = self.data['short']
- self.form['custom_field_3'] = self.data['email']
- self.form['custom_field_4'] = self.data['name']
- request = self.form.click()
- response2 = ClientCookie.urlopen(request)
- response2.close()
-
- def run(self):
- print "1"
- self.zip()
- print "2"
- self.login()
- print "3"
- self.get_form()
- print "4"
- self.upload()
- print "5"
- self.queue.put('done')
-
-class ProgressWin(object):
- def __init__(self, parent_window, no_block, queue):
- self.no_block = no_block
- self.queue = queue
-
- self.win = gtk.Window()
- self.win.set_type_hint('dialog')
- self.win.set_title('Progress')
- self.win.set_resizable(False)
- self.win.set_transient_for(parent_window)
-
- vbox = gtk.VBox()
- self.text = gtk.Label('')
- self.progress = gtk.ProgressBar()
- self.progress.set_pulse_step(0.25)
-
- vbox.pack_start(self.text)
- vbox.pack_start(self.progress)
- self.win.add(vbox)
-
- self.win.show_all()
-
- gtk.timeout_add(100, self.check)
-
- def check(self):
- try:
- text = self.queue.get_nowait()
- print text
- if text == 'done':
- gtk.main_quit()
- self.text.set_text(text)
- except Queue.Empty:
- pass
-
- self.progress.pulse()
- return True
-
-class ReportWin(object):
- def start_page(self):
- start = gnome.ui.DruidPageEdge(gnome.ui.EDGE_START)
- start.set_text(
-"""So, you want to report a bug in ardour. Excellent.
-
-This program will help you to submit a bug report that will be useful to the programmers.
-
-We are collecting this information so that we don't have to ask you to research very detailed aspects of your system configuration. The information this tool collects is stored in the Ardour bug tracking system, and is not used for any other purpose. We will not intentionally sell or disclose the information to any parties besides those authorized to view it on the Ardour bug tracking system.
-""")
- start.connect('cancel', lambda w, d: gtk.main_quit())
- self.druid.append_page(start)
-
- def end_page_finish(self, page, data):
- print "page_finish"
- if self.first_time:
- self.first_time = False
- self.druid.set_buttons_sensitive(False, False, False, False)
- print "build queue"
- self.queue = Queue.Queue(0)
- print "build no_block"
- self.no_block = NoBlock(self.data, self.queue)
- print "build progress window"
- self.progress = ProgressWin(self.win, self.no_block, self.queue)
- print "start no block"
- self.no_block.start()
- print "exit end_page_finish"
-
- def end_page(self):
- end = gnome.ui.DruidPageEdge(gnome.ui.EDGE_FINISH)
- end.set_text(
-"""Thank you for helping Ardour.
-
-When you click the "Apply" button, we will connect to the web and upload the information.
-
-Please let the Bug Tool finish. It will exit on its own.""")
- end.connect('cancel', lambda w, d: gtk.main_quit())
- end.connect('finish', self.end_page_finish)
- self.druid.append_page(end)
-
- def build_tools_page_next(self, page, data):
- if self.tools_radio.get_active():
- os.system("g++ --version >> " +g_name+"/build-tools")
- os.system("pkg-config --version >> " +g_name+"/build-tools")
- os.system("autoconf --version >> " +g_name+"/build-tools")
- os.system("automake --version >> " +g_name+"/build-tools")
- os.system("aclocal --version >> " +g_name+"/build-tools")
- os.system("libtool --version >> " +g_name+"/build-tools")
- os.system("gettext --version >> " +g_name+"/build-tools")
- os.system("autopoint --version >> " +g_name+"/build-tools")
-
- def build_tools_page(self):
- tools = gnome.ui.DruidPageStandard()
- self.tools_radio = gtk.RadioButton(None, "Yes")
- radio_btn2 = gtk.RadioButton(self.tools_radio, "No")
- radio_btn2.set_active(True)
- tools.append_item("Are you using a version of Ardour that you compiled yourself?", self.tools_radio, "")
- tools.append_item("", radio_btn2, "")
- tools.connect('cancel', lambda w, d: gtk.main_quit())
- tools.connect('next', self.build_tools_page_next)
- self.druid.append_page(tools)
-
- def binary_page_next(self, page, data):
- path = self.binary_path.get_text()
- if len(path) > 0 and os.path.exists(path):
- os.system("ldd "+path+" > "+g_name+"/linker-info")
-
- def binary_page(self):
- binary = gnome.ui.DruidPageStandard()
- self.binary_path = gtk.Entry()
- binary.append_item("Where is Ardour's binary located?", self.binary_path, "")
- binary.connect('cancel', lambda w, d: gtk.main_quit())
- binary.connect('next', self.binary_page_next)
- self.druid.append_page(binary)
-
- def versions_page_next(self, page, data):
- os.system('echo "gtk-ardour version: '+self.gtkardour_version.get_text()+'" >>'+g_name+'/ardour-version')
- os.system('echo "libardour version: '+self.libardour_version.get_text()+'" >>'+g_name+'/ardour-version')
-
- def versions_page(self):
- versions = gnome.ui.DruidPageStandard()
- self.gtkardour_version = gtk.Entry()
- self.libardour_version = gtk.Entry()
- versions.append_item("What is gtk-ardour's version?", self.gtkardour_version, "")
- versions.append_item("What is libardour's version?", self.libardour_version, "")
- versions.connect('cancel', lambda w, d: gtk.main_quit())
- versions.connect('next', self.versions_page_next)
- self.druid.append_page(versions)
-
- def session_check_toggled(self, data):
- self.session_path.set_sensitive(self.session_check.get_active())
-
- def sessions_page_next(self, page, data):
- session = self.session_path.get_text()
- if self.session_check.get_active() and session > 0:
- if os.path.exists(session) and os.path.isfile(session):
- shutil.copy(session, g_name)
-
- def sessions_page(self):
- sessions = gnome.ui.DruidPageStandard()
- self.session_check = gtk.CheckButton("Yes")
- self.session_check.set_active(True)
- self.session_path = gtk.Entry()
- sessions.append_item("Is the problem one you've noticed while trying to run Ardour?", self.session_check, "")
- sessions.append_item("What is the session file you've been using?", self.session_path, "")
- self.session_check.connect('toggled', self.session_check_toggled)
- sessions.connect('cancel', lambda w, d:gtk.main_quit())
- sessions.connect('next', self.sessions_page_next)
- self.druid.append_page(sessions)
-
- def description_page_next(self, page, data):
- self.data['short'] = self.short_description.get_text()
- buffer = self.long_description.get_buffer()
- self.data['long'] = buffer.get_text(buffer.get_start_iter(), buffer.get_end_iter())
-
- def description_page(self):
- description = gnome.ui.DruidPageStandard()
- self.long_description = gtk.TextView()
- self.short_description = gtk.Entry()
- self.long_description.set_size_request(-1, 70)
- description.append_item(
-"""OK, we've collected the system information. Now its time for you
-to explain the problem.
-
-Please note: you do not need to include any information about versions
-of any software - that has been taken care of.
-
-If you are reporting an operational problem, please carefully describe
-the actions you took, any messages that you noticed, and in as much
-detail as possible describe what went wrong.""", self.long_description, "")
- description.append_item("Please give a one line summary of your problem", self.short_description, "")
- description.connect('cancel', lambda w, d:gtk.main_quit())
- description.connect('next', self.description_page_next)
- self.druid.append_page(description)
-
- def info_page_next(self, page, data):
- self.data['name'] = self.name.get_text()
- self.data['email'] = self.email.get_text()
-
- def info_page(self):
- info = gnome.ui.DruidPageStandard()
- self.name = gtk.Entry()
- self.email = gtk.Entry()
- info.append_item("Name", self.name, "Optional")
- info.append_item("Email", self.email, "")
- info.connect('cancel', lambda w, d:gtk.main_quit())
- info.connect('next', self.info_page_next)
- self.druid.append_page(info)
-
- def __init__(self):
- self.first_time = True
-
- self.win = gtk.Window()
- self.win.set_title("Ardour Bug Tool")
- self.win.connect('destroy', lambda w: gtk.main_quit())
-
- self.druid = gnome.ui.Druid()
-
- self.start_page()
- self.build_tools_page()
- self.binary_page()
- self.versions_page()
- self.sessions_page()
- self.description_page()
- self.info_page()
- self.end_page()
-
- self.win.add(self.druid)
- self.win.show_all()
-
- self.data = {}
-
-def main(*args):
- os.mkdir(g_name+"/proc/")
- proclist = ['asound', 'cpuinfo', 'devices', 'dma', 'filesystems', 'irq', 'isapnp', 'meminfo', 'modules', 'mounts', 'partition', 'pci', 'slabinfo', 'sysvipc/shm', 'version']
- for item in proclist:
- if os.path.exists('/proc/'+item):
- try:
- if os.path.isdir('/proc/'+item):
- shutil.copytree('/proc/'+item, g_name+'/proc/'+item)
- else:
- shutil.copy('/proc/'+item, g_name+'/proc/')
- except shutil.Error:
- pass #should this be reported?
- else:
- f = open(g_name+'/proc/'+item, 'w')
- f.write(item+' missing in /proc')
- f.close
-
- liblist = ['asound', 'c', 'gdbm', 'gdk', 'gmodule', 'gtk', 'intl', 'jack', 'm', 'pthreads', 'sndfile', 'X11', 'Xext']
- for lib in liblist:
- for libdir in ['/lib/', '/usr/lib/', '/usr/X11R6/lib/', '/usr/local/lib/', '/opt/lib/']:
- if os.path.exists(libdir+"lib"+lib+".so"):
- os.system('echo "'+lib+ ' is `ls -l '+libdir+'lib'+lib+'.so`" >> '+g_name+'/libraries')
-
- if os.path.exists('/proc/sys/kernel/lowlatency'):
- shutil.copy('/proc/sys/kernel/lowlatency', g_name+'/lowlatency-status')
- else:
- f = open(g_name+'/lowlatency-status', 'w')
- f.write('-1')
- f.close()
-
- scsi = re.compile(r'sd[a-z][0-9]')
- if scsi.search(open('/proc/mounts').read()):
- shutil.copytree('/proc/scsi', g_name+'/scsi')
-
- ide = re.compile(r'hd[a-z][0-9]')
- if ide.search(open('/proc/mounts').read()):
- pass
-
- os.system("xmodmap >" +g_name+"/xmodmap")
- os.system("/sbin/lspci -vv >" +g_name+"/lspci.out")
-
- if os.path.exists(os.path.expandvars('$HOME')+'/.ardour/ardour.rc'):
- shutil.copy(os.path.expandvars('$HOME')+'/.ardour/ardour.rc', g_name+'/ardour.rc')
-
- bug_win = ReportWin()
-
- gtk.main()
-
-if __name__ == '__main__':
- main()
-
-shutil.rmtree(g_name)
-if os.path.exists(g_name+'.tar.gz'):
- os.remove(g_name+'.tar.gz')
-