summaryrefslogtreecommitdiff
path: root/tools/bug_tool/ClientForm.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/bug_tool/ClientForm.py')
-rw-r--r--tools/bug_tool/ClientForm.py2699
1 files changed, 0 insertions, 2699 deletions
diff --git a/tools/bug_tool/ClientForm.py b/tools/bug_tool/ClientForm.py
deleted file mode 100644
index c42f65b313..0000000000
--- a/tools/bug_tool/ClientForm.py
+++ /dev/null
@@ -1,2699 +0,0 @@
-"""HTML form handling for web clients.
-
-ClientForm is a Python module for handling HTML forms on the client
-side, useful for parsing HTML forms, filling them in and returning the
-completed forms to the server. It has developed from a port of Gisle
-Aas' Perl module HTML::Form, from the libwww-perl library, but the
-interface is not the same.
-
-The most useful docstring is the one for HTMLForm.
-
-RFC 1866: HTML 2.0
-RFC 1867: Form-based File Upload in HTML
-RFC 2388: Returning Values from Forms: multipart/form-data
-HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
-HTML 4.01 Specification, W3C Recommendation 24 December 1999
-
-
-Copyright 2002-2003 John J. Lee <jjl@pobox.com>
-Copyright 1998-2000 Gisle Aas.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD License (see the file COPYING included with
-the distribution).
-
-"""
-
-# XXX
-# Treat unknown controls as text controls? (this was a recent LWP
-# HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
-# source and browser behaviour.
-# Support for list item ids. How to handle missing ids? (How do I deal
-# with duplicate OPTION labels ATM? Can't remember...)
-# Arrange things so can automatically PyPI-register with categories
-# without messing up 1.5.2 compatibility.
-# Tests need work.
-# Test single and multiple file upload some more on the web.
-# Does file upload work when name is missing? Sourceforge tracker form
-# doesn't like it. Check standards, and test with Apache. Test binary
-# upload with Apache.
-# Add label support for CHECKBOX and RADIO.
-# Better docs.
-# Deal with character sets properly. Not sure what the issues are here.
-# I don't *think* any encoding of control names, filenames or data is
-# necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
-# doesn't seem to do it.
-# Add charset parameter to Content-type headers? How to find value??
-# Get rid of MapBase, AList and MimeWriter.
-# I'm not going to fix this unless somebody tells me what real servers
-# that want this encoding actually expect: If enctype is
-# application/x-www-form-urlencoded and there's a FILE control present.
-# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
-# 17.13.2), but I send "name=" ATM. What about multiple file upload??
-# Get rid of the two type-switches (for kind and click*).
-# Remove single-selection code: can be special case of multi-selection,
-# with a few variations, I think.
-# Factor out multiple-selection list code? May not be easy. Maybe like
-# this:
-
-# ListControl
-# ^
-# | MultipleListControlMixin
-# | ^
-# SelectControl /
-# ^ /
-# \ /
-# MultiSelectControl
-
-
-# Plan
-# ----
-# Maybe a 0.2.x, cleaned up a bit and with id support for list items?
-# Not sure it's worth it, really.
-# Remove toggle methods.
-# Replace by_label with choice between value / id / label /
-# element contents (see discussion with Gisle about labels on
-# libwww-perl list).
-# ...what else?
-# Work on DOMForm.
-# XForms? Don't know if there's a need here.
-
-
-try: True
-except NameError:
- True = 1
- False = 0
-
-try: bool
-except NameError:
- def bool(expr):
- if expr: return True
- else: return False
-
-import sys, urllib, urllib2, types, string, mimetools, copy
-from urlparse import urljoin
-from cStringIO import StringIO
-try:
- import UnicodeType
-except ImportError:
- UNICODE = False
-else:
- UNICODE = True
-
-VERSION = "0.1.13"
-
-CHUNK = 1024 # size of chunks fed to parser, in bytes
-
-# This version of urlencode is from my Python 1.5.2 back-port of the
-# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
-# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
-def urlencode(query,doseq=False,):
- """Encode a sequence of two-element tuples or dictionary into a URL query \
-string.
-
- If any values in the query arg are sequences and doseq is true, each
- sequence element is converted to a separate parameter.
-
- If the query arg is a sequence of two-element tuples, the order of the
- parameters in the output will match the order of parameters in the
- input.
- """
-
- if hasattr(query,"items"):
- # mapping objects
- query = query.items()
- else:
- # it's a bother at times that strings and string-like objects are
- # sequences...
- try:
- # non-sequence items should not work with len()
- x = len(query)
- # non-empty strings will fail this
- if len(query) and type(query[0]) != types.TupleType:
- raise TypeError()
- # zero-length sequences of all types will get here and succeed,
- # but that's a minor nit - since the original implementation
- # allowed empty dicts that type of behavior probably should be
- # preserved for consistency
- except TypeError:
- ty,va,tb = sys.exc_info()
- raise TypeError("not a valid non-string sequence or mapping "
- "object", tb)
-
- l = []
- if not doseq:
- # preserve old behavior
- for k, v in query:
- k = urllib.quote_plus(str(k))
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- for k, v in query:
- k = urllib.quote_plus(str(k))
- if type(v) == types.StringType:
- v = urllib.quote_plus(v)
- l.append(k + '=' + v)
- elif UNICODE and type(v) == types.UnicodeType:
- # is there a reasonable way to convert to ASCII?
- # encode generates a string, but "replace" or "ignore"
- # lose information and "strict" can raise UnicodeError
- v = urllib.quote_plus(v.encode("ASCII","replace"))
- l.append(k + '=' + v)
- else:
- try:
- # is this a sufficient test for sequence-ness?
- x = len(v)
- except TypeError:
- # not a sequence
- v = urllib.quote_plus(str(v))
- l.append(k + '=' + v)
- else:
- # loop over the sequence
- for elt in v:
- l.append(k + '=' + urllib.quote_plus(str(elt)))
- return string.join(l, '&')
-
-def startswith(string, initial):
- if len(initial) > len(string): return False
- return string[:len(initial)] == initial
-
-def issequence(x):
- try:
- x[0]
- except (TypeError, KeyError):
- return False
- except IndexError:
- pass
- return True
-
-def isstringlike(x):
- try: x+""
- except: return False
- else: return True
-
-
-# XXX don't really want to drag this along (MapBase, AList, MimeWriter)
-
-class MapBase:
- """Mapping designed to be easily derived from.
-
- Subclass it and override __init__, __setitem__, __getitem__, __delitem__
- and keys. Nothing else should need to be overridden, unlike UserDict.
- This significantly simplifies dictionary-like classes.
-
- Also different from UserDict in that it has a redonly flag, and can be
- updated (and initialised) with a sequence of pairs (key, value).
-
- """
- def __init__(self, init=None):
- self._data = {}
- self.readonly = False
- if init is not None: self.update(init)
-
- def __getitem__(self, key):
- return self._data[key]
-
- def __setitem__(self, key, item):
- if not self.readonly:
- self._data[key] = item
- else:
- raise TypeError("object doesn't support item assignment")
-
- def __delitem__(self, key):
- if not self.readonly:
- del self._data[key]
- else:
- raise TypeError("object doesn't support item deletion")
-
- def keys(self):
- return self._data.keys()
-
- # now the internal workings, there should be no need to override these:
-
- def clear(self):
- for k in self.keys():
- del self[k]
-
- def __repr__(self):
- rep = []
- for k, v in self.items():
- rep.append("%s: %s" % (repr(k), repr(v)))
- return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
-
- def copy(self):
- return copy.copy(self)
-
- def __cmp__(self, dict):
- # note: return value is *not* boolean
- for k, v in self.items():
- if not (dict.has_key(k) and dict[k] == v):
- return 1 # different
- return 0 # the same
-
- def __len__(self):
- return len(self.keys())
-
- def values(self):
- r = []
- for k in self.keys():
- r.append(self[k])
- return r
-
- def items(self):
- keys = self.keys()
- vals = self.values()
- r = []
- for i in len(self):
- r.append((keys[i], vals[i]))
- return r
-
- def has_key(self, key):
- return key in self.keys()
-
- def update(self, map):
- if issequence(map) and not isstringlike(map):
- items = map
- else:
- items = map.items()
- for tup in items:
- if not isinstance(tup, TupleType):
- raise TypeError(
- "MapBase.update requires a map or a sequence of pairs")
- k, v = tup
- self[k] = v
-
- def get(self, key, failobj=None):
- if key in self.keys():
- return self[key]
- else:
- return failobj
-
- def setdefault(self, key, failobj=None):
- if not self.has_key(key):
- self[key] = failobj
- return self[key]
-
-
-class AList(MapBase):
- """Read-only ordered mapping."""
- def __init__(self, seq=[]):
- self.readonly = True
- self._inverted = False
- self._data = list(seq[:])
- self._keys = []
- self._values = []
- for key, value in seq:
- self._keys.append(key)
- self._values.append(value)
-
- def set_inverted(self, inverted):
- if (inverted and not self._inverted) or (
- not inverted and self._inverted):
- self._keys, self._values = self._values, self._keys
- if inverted: self._inverted = True
- else: self._inverted = False
-
- def __getitem__(self, key):
- try:
- i = self._keys.index(key)
- except ValueError:
- raise KeyError(key)
- return self._values[i]
-
- def __delitem__(self, key):
- try:
- i = self._keys.index[key]
- except ValueError:
- raise KeyError(key)
- del self._values[i]
-
- def keys(self): return list(self._keys[:])
- def values(self): return list(self._values[:])
- def items(self):
- data = self._data[:]
- if not self._inverted:
- return data
- else:
- newdata = []
- for k, v in data:
- newdata.append((v, k))
- return newdata
-
-
-# This cut-n-pasted MimeWriter from standard library is here so can add
-# to HTTP headers rather than message body when appropriate. It also uses
-# \r\n in place of \n. This is nasty.
-class MimeWriter:
-
- """Generic MIME writer.
-
- Methods:
-
- __init__()
- addheader()
- flushheaders()
- startbody()
- startmultipartbody()
- nextpart()
- lastpart()
-
- A MIME writer is much more primitive than a MIME parser. It
- doesn't seek around on the output file, and it doesn't use large
- amounts of buffer space, so you have to write the parts in the
- order they should occur on the output file. It does buffer the
- headers you add, allowing you to rearrange their order.
-
- General usage is:
-
- f = <open the output file>
- w = MimeWriter(f)
- ...call w.addheader(key, value) 0 or more times...
-
- followed by either:
-
- f = w.startbody(content_type)
- ...call f.write(data) for body data...
-
- or:
-
- w.startmultipartbody(subtype)
- for each part:
- subwriter = w.nextpart()
- ...use the subwriter's methods to create the subpart...
- w.lastpart()
-
- The subwriter is another MimeWriter instance, and should be
- treated in the same way as the toplevel MimeWriter. This way,
- writing recursive body parts is easy.
-
- Warning: don't forget to call lastpart()!
-
- XXX There should be more state so calls made in the wrong order
- are detected.
-
- Some special cases:
-
- - startbody() just returns the file passed to the constructor;
- but don't use this knowledge, as it may be changed.
-
- - startmultipartbody() actually returns a file as well;
- this can be used to write the initial 'if you can read this your
- mailer is not MIME-aware' message.
-
- - If you call flushheaders(), the headers accumulated so far are
- written out (and forgotten); this is useful if you don't need a
- body part at all, e.g. for a subpart of type message/rfc822
- that's (mis)used to store some header-like information.
-
- - Passing a keyword argument 'prefix=<flag>' to addheader(),
- start*body() affects where the header is inserted; 0 means
- append at the end, 1 means insert at the start; default is
- append for addheader(), but insert for start*body(), which use
- it to determine where the Content-type header goes.
-
- """
-
- def __init__(self, fp, http_hdrs=None):
- self._http_hdrs = http_hdrs
- self._fp = fp
- self._headers = []
- self._boundary = []
- self._first_part = True
-
- def addheader(self, key, value, prefix=0,
- add_to_http_hdrs=0):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- lines = string.split(value, "\r\n")
- while lines and not lines[-1]: del lines[-1]
- while lines and not lines[0]: del lines[0]
- if add_to_http_hdrs:
- value = string.join(lines, "")
- self._http_hdrs.append((key, value))
- else:
- for i in range(1, len(lines)):
- lines[i] = " " + string.strip(lines[i])
- value = string.join(lines, "\r\n") + "\r\n"
- line = key + ": " + value
- if prefix:
- self._headers.insert(0, line)
- else:
- self._headers.append(line)
-
- def flushheaders(self):
- self._fp.writelines(self._headers)
- self._headers = []
-
- def startbody(self, ctype=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- """
- prefix is ignored if add_to_http_hdrs is true.
- """
- if content_type and ctype:
- for name, value in plist:
- ctype = ctype + ';\r\n %s=\"%s\"' % (name, value)
- self.addheader("Content-type", ctype, prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs)
- self.flushheaders()
- if not add_to_http_hdrs: self._fp.write("\r\n")
- self._first_part = True
- return self._fp
-
- def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
- add_to_http_hdrs=0, content_type=1):
- boundary = boundary or mimetools.choose_boundary()
- self._boundary.append(boundary)
- return self.startbody("multipart/" + subtype,
- [("boundary", boundary)] + plist,
- prefix=prefix,
- add_to_http_hdrs=add_to_http_hdrs,
- content_type=content_type)
-
- def nextpart(self):
- boundary = self._boundary[-1]
- if self._first_part:
- self._first_part = False
- else:
- self._fp.write("\r\n")
- self._fp.write("--" + boundary + "\r\n")
- return self.__class__(self._fp)
-
- def lastpart(self):
- if self._first_part:
- self.nextpart()
- boundary = self._boundary.pop()
- self._fp.write("\r\n--" + boundary + "--\r\n")
-
-
-class ControlNotFoundError(ValueError): pass
-class ItemNotFoundError(ValueError): pass
-class ItemCountError(ValueError): pass
-
-class ParseError(Exception): pass
-
-
-def ParseResponse(response, select_default=False, ignore_errors=False):
- """Parse HTTP response and return a list of HTMLForm instances.
-
- The return value of urllib2.urlopen can be conveniently passed to this
- function as the response parameter.
-
- ClientForm.ParseError is raised on parse errors.
-
- response: file-like object (supporting read() method) with a method
- geturl(), returning the base URI of the HTTP response
- select_default: for multiple-selection SELECT controls and RADIO controls,
- pick the first item as the default if none are selected in the HTML
- ignore_errors: don't raise ParseError, and carry on regardless if the
- parser gets confused
-
- Pass a true value for select_default if you want the behaviour specified by
- RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
- RADIO or multiple-selection SELECT control if none were selected in the
- HTML. Most browsers (including Microsoft Internet Explorer (IE) and
- Netscape Navigator) instead leave all items unselected in these cases. The
- W3C HTML 4.0 standard leaves this behaviour undefined in the case of
- multiple-selection SELECT controls, but insists that at least one RADIO
- button should be checked at all times, in contradiction to browser
- behaviour.
-
- Precisely what ignore_errors does isn't well-defined yet, so don't rely too
- much on the current behaviour -- if you want robustness, you're better off
- fixing the HTML before passing it to this function.
-
- """
- return ParseFile(response, response.geturl(), select_default)
-
-def ParseFile(file, base_uri, select_default=False, ignore_errors=False):
- """Parse HTML and return a list of HTMLForm instances.
-
- ClientForm.ParseError is raised on parse errors.
-
- file: file-like object (supporting read() method) containing HTML with zero
- or more forms to be parsed
- base_uri: the base URI of the document
-
- For the other arguments and further details, see ParseResponse.__doc__.
-
- """
- fp = _FORM_PARSER_CLASS(ignore_errors)
- while 1:
- data = file.read(CHUNK)
- fp.feed(data)
- if len(data) != CHUNK: break
- forms = []
- for (name, action, method, enctype), attrs, controls in fp.forms:
- if action is None:
- action = base_uri
- else:
- action = urljoin(base_uri, action)
- form = HTMLForm(action, method, enctype, name, attrs)
- for type, name, attr in controls:
- form.new_control(type, name, attr, select_default=select_default)
- forms.append(form)
- for form in forms:
- form.fixup()
- return forms
-
-
-class _AbstractFormParser:
- """forms attribute contains HTMLForm instances on completion."""
- # pinched (and modified) from Moshe Zadka
- def __init__(self, ignore_errors, entitydefs=None):
- if entitydefs is not None:
- self.entitydefs = entitydefs
- self._ignore_errors = ignore_errors
- self.forms = []
- self._current_form = None
- self._select = None
- self._optgroup = None
- self._option = None
- self._textarea = None
-
- def error(self, error):
- if not self._ignore_errors: raise error
-
- def start_form(self, attrs):
- if self._current_form is not None:
- self.error(ParseError("nested FORMs"))
- name = None
- action = None
- enctype = "application/x-www-form-urlencoded"
- method = "GET"
- d = {}
- for key, value in attrs:
- if key == "name":
- name = value
- elif key == "action":
- action = value
- elif key == "method":
- method = string.upper(value)
- elif key == "enctype":
- enctype = string.lower(value)
- else:
- d[key] = value
- controls = []
- self._current_form = (name, action, method, enctype), d, controls
-
- def end_form(self):
- if self._current_form is None:
- self.error(ParseError("end of FORM before start"))
- self.forms.append(self._current_form)
- self._current_form = None
-
- def start_select(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of SELECT before start of FORM"))
- if self._select is not None:
- self.error(ParseError("nested SELECTs"))
- if self._textarea is not None:
- self.error(ParseError("SELECT inside TEXTAREA"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._select = d
-
- self._append_select_control({"__select": d})
-
- def end_select(self):
- if self._current_form is None:
- self.error(ParseError("end of SELECT before start of FORM"))
- if self._select is None:
- self.error(ParseError("end of SELECT before start"))
-
- if self._option is not None:
- self._end_option()
-
- self._select = None
-
- def start_optgroup(self, attrs):
- if self._select is None:
- self.error(ParseError("OPTGROUP outside of SELECT"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._optgroup = d
-
- def end_optgroup(self):
- if self._optgroup is None:
- self.error(ParseError("end of OPTGROUP before start"))
- self._optgroup = None
-
- def _start_option(self, attrs):
- if self._select is None:
- self.error(ParseError("OPTION outside of SELECT"))
- if self._option is not None:
- self._end_option()
-
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._option = {}
- self._option.update(d)
- if (self._optgroup and self._optgroup.has_key("disabled") and
- not self._option.has_key("disabled")):
- self._option["disabled"] = None
-
- def _end_option(self):
- if self._option is None:
- self.error(ParseError("end of OPTION before start"))
-
- contents = string.strip(self._option.get("contents", ""))
- #contents = string.strip(self._option["contents"])
- self._option["contents"] = contents
- if not self._option.has_key("value"):
- self._option["value"] = contents
- if not self._option.has_key("label"):
- self._option["label"] = contents
- # stuff dict of SELECT HTML attrs into a special private key
- # (gets deleted again later)
- self._option["__select"] = self._select
- self._append_select_control(self._option)
- self._option = None
-
- def _append_select_control(self, attrs):
- controls = self._current_form[2]
- name = self._select.get("name")
- controls.append(("select", name, attrs))
-
-## def do_option(self, attrs):
-## if self._select is None:
-## self.error(ParseError("OPTION outside of SELECT"))
-## d = {}
-## for key, val in attrs:
-## d[key] = val
-
-## self._option = {}
-## self._option.update(d)
-## if (self._optgroup and self._optgroup.has_key("disabled") and
-## not self._option.has_key("disabled")):
-## self._option["disabled"] = None
-
- def start_textarea(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of TEXTAREA before start of FORM"))
- if self._textarea is not None:
- self.error(ParseError("nested TEXTAREAs"))
- if self._select is not None:
- self.error(ParseError("TEXTAREA inside SELECT"))
- d = {}
- for key, val in attrs:
- d[key] = val
-
- self._textarea = d
-
- def end_textarea(self):
- if self._current_form is None:
- self.error(ParseError("end of TEXTAREA before start of FORM"))
- if self._textarea is None:
- self.error(ParseError("end of TEXTAREA before start"))
- controls = self._current_form[2]
- name = self._textarea.get("name")
- controls.append(("textarea", name, self._textarea))
- self._textarea = None
-
- def handle_data(self, data):
- if self._option is not None:
- # self._option is a dictionary of the OPTION element's HTML
- # attributes, but it has two special keys, one of which is the
- # special "contents" key contains text between OPTION tags (the
- # other is the "__select" key: see the end_option method)
- map = self._option
- key = "contents"
- elif self._textarea is not None:
- map = self._textarea
- key = "value"
- else:
- return
-
- if not map.has_key(key):
- map[key] = data
- else:
- map[key] = map[key] + data
-
-## def handle_data(self, data):
-## if self._option is not None:
-## contents = string.strip(data)
-## controls = self._current_form[2]
-## if not self._option.has_key("value"):
-## self._option["value"] = contents
-## if not self._option.has_key("label"):
-## self._option["label"] = contents
-## # self._option is a dictionary of the OPTION element's HTML
-## # attributes, but it has two special keys:
-## # 1. special "contents" key contains text between OPTION tags
-## self._option["contents"] = contents
-## # 2. stuff dict of SELECT HTML attrs into a special private key
-## # (gets deleted again later)
-## self._option["__select"] = self._select
-## self._append_select_control(self._option)
-## self._option = None
-## elif self._textarea is not None:
-## #self._textarea["value"] = data
-## if self._textarea.get("value") is None:
-## self._textarea["value"] = data
-## else:
-## self._textarea["value"] = self._textarea["value"] + data
-
- def do_button(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of BUTTON before start of FORM"))
- d = {}
- d["type"] = "submit" # default
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- # we don't want to lose information, so use a type string that
- # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
- # eg. type for BUTTON/RESET is "resetbutton"
- # (type for INPUT/RESET is "reset")
- type = type+"button"
- controls.append((type, name, d))
-
- def do_input(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of INPUT before start of FORM"))
- d = {}
- d["type"] = "text" # default
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- type = d["type"]
- name = d.get("name")
- controls.append((type, name, d))
-
- def do_isindex(self, attrs):
- if self._current_form is None:
- self.error(ParseError("start of ISINDEX before start of FORM"))
- d = {}
- for key, val in attrs:
- d[key] = val
- controls = self._current_form[2]
-
- # isindex doesn't have type or name HTML attributes
- controls.append(("isindex", None, d))
-
-# use HTMLParser if we have it (it does XHTML), htmllib otherwise
-try:
- import HTMLParser
-except ImportError:
- import htmllib, formatter
- class _FormParser(_AbstractFormParser, htmllib.HTMLParser):
- # This is still here for compatibility with Python 1.5.2.
- # It doesn't do the right thing with XHTML.
- def __init__(self, ignore_errors, entitydefs=None):
- htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
- _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
-
- def do_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- _FORM_PARSER_CLASS = _FormParser
-else:
- class _XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
- # thanks to Michael Howitz for this!
- def __init__(self, ignore_errors, entitydefs=None):
- HTMLParser.HTMLParser.__init__(self)
- _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
-
- def start_option(self, attrs):
- _AbstractFormParser._start_option(self, attrs)
-
- def end_option(self):
- _AbstractFormParser._end_option(self)
-
- def handle_starttag(self, tag, attrs):
- try:
- method = getattr(self, 'start_' + tag)
- except AttributeError:
- try:
- method = getattr(self, 'do_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method(attrs)
- else:
- method(attrs)
-
- def handle_endtag(self, tag):
- try:
- method = getattr(self, 'end_' + tag)
- except AttributeError:
- pass # unknown tag
- else:
- method()
-
- # handle_charref, handle_entityref and default entitydefs are taken
- # from sgmllib
- def handle_charref(self, name):
- try:
- n = int(name)
- except ValueError:
- self.unknown_charref(name)
- return
- if not 0 <= n <= 255:
- self.unknown_charref(name)
- return
- self.handle_data(chr(n))
-
- # Definition of entities -- derived classes may override
- entitydefs = \
- {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
-
- def handle_entityref(self, name):
- table = self.entitydefs
- if name in table:
- self.handle_data(table[name])
- else:
- self.unknown_entityref(name)
- return
-
- # These methods would have passed through the ref intact if I'd thought
- # of it earlier, but since the old parser silently swallows unknown
- # refs, so does this new parser.
- def unknown_entityref(self, ref): pass
- def unknown_charref(self, ref): pass
-
- _FORM_PARSER_CLASS = _XHTMLCompatibleFormParser
-
-
-class Control:
- """An HTML form control.
-
- An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
- things to Control objects, and most of Control's methods are, in effect,
- documented by the HTMLForm docstrings.
-
- The Controls in an HTMLForm can be got at via the HTMLForm.find_control
- method or the HTMLForm.controls attribute.
-
- Control instances are usually constructed using the ParseFile /
- ParseResponse functions, so you can probably ignore the rest of this
- paragraph. A Control is only properly initialised after the fixup method
- has been called. In fact, this is only strictly necessary for ListControl
- instances. This is necessary because ListControls are built up from
- ListControls each containing only a single item, and their initial value(s)
- can only be known after the sequence is complete.
-
- The types and values that are acceptable for assignment to the value
- attribute are defined by subclasses.
-
- If the disabled attribute is true, this represents the state typically
- represented by browsers by `greying out' a control. If the disabled
- attribute is true, the Control will raise AttributeError if an attempt is
- made to change its value. In addition, the control will not be considered
- `successful' as defined by the W3C HTML 4 standard -- ie. it will
- contribute no data to the return value of the HTMLForm.click* methods. To
- enable a control, set the disabled attribute to a false value.
-
- If the readonly attribute is true, the Control will raise AttributeError if
- an attempt is made to change its value. To make a control writable, set
- the readonly attribute to a false value.
-
- All controls have the disabled and readonly attributes, not only those that
- may have the HTML attributes of the same names.
-
- On assignment to the value attribute, the following exceptions are raised:
- TypeError, AttributeError (if the value attribute should not be assigned
- to, because the control is disabled, for example) and ValueError.
-
- If the name or value attributes are None, or the value is an empty list, or
- if the control is disabled, the control is not successful.
-
- Public attributes:
-
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values) (readonly)
- name: name of control (readonly)
- value: current value of control (subclasses may allow a single value, a
- sequence of values, or either)
- disabled: disabled state
- readonly: readonly state
- id: value of id HTML attribute
-
- """
- def __init__(self, type, name, attrs):
- """
- type: string describing type of control (see the keys of the
- HTMLForm.type2class dictionary for the allowable values)
- name: control name
- attrs: HTML attributes of control's HTML element
-
- """
- raise NotImplementedError()
-
- def add_to_form(self, form):
- form.controls.append(self)
-
- def fixup(self):
- pass
-
- def __getattr__(self, name): raise NotImplementedError()
- def __setattr__(self, name, value): raise NotImplementedError()
-
- def pairs(self):
- """Return list of (key, value) pairs suitable for passing to urlencode.
- """
- raise NotImplementedError()
-
- def _write_mime_data(self, mw):
- """Write data for this control to a MimeWriter."""
- # called by HTMLForm
- for name, value in self.pairs():
- mw2 = mw.nextpart()
- mw2.addheader("Content-disposition",
- 'form-data; name="%s"' % name, 1)
- f = mw2.startbody(prefix=0)
- f.write(value)
-
- def __str__(self):
- raise NotImplementedError()
-
-
-#---------------------------------------------------
-class ScalarControl(Control):
- """Control whose value is not restricted to one of a prescribed set.
-
- Some ScalarControls don't accept any value attribute. Otherwise, takes a
- single value, which must be string-like.
-
- Additional read-only public attribute:
-
- attrs: dictionary mapping the names of original HTML attributes of the
- control to their values
-
- """
- def __init__(self, type, name, attrs):
- self.__dict__["type"] = string.lower(type)
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = attrs.has_key("disabled")
- self.readonly = attrs.has_key("readonly")
- self.id = attrs.get("id")
-
- self.attrs = attrs.copy()
-
- self._clicked = False
-
- def __getattr__(self, name):
- if name == "value":
- return self.__dict__["_value"]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if not isstringlike(value):
- raise TypeError("must assign a string")
- elif self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- elif self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- self.__dict__["_value"] = value
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def pairs(self):
- name = self.name
- value = self.value
- if name is None or value is None or self.disabled:
- return []
- return [(name, value)]
-
- def __str__(self):
- name = self.name
- value = self.value
- if name is None: name = "<None>"
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class TextControl(ScalarControl):
- """Textual input control.
-
- Covers:
-
- INPUT/TEXT
- INPUT/PASSWORD
- INPUT/FILE
- INPUT/HIDDEN
- TEXTAREA
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- if self.type == "hidden": self.readonly = True
- if self._value is None:
- self._value = ""
-
-
-#---------------------------------------------------
-class FileControl(ScalarControl):
- """File upload with INPUT TYPE=FILE.
-
- The value attribute of a FileControl is always None.
-
- Additional public method: add_file
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self._value = None
- self._upload_data = []
-
- def __setattr__(self, name, value):
- if name in ("value", "name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def add_file(self, file_object, content_type=None, filename=None):
- if not hasattr(file_object, "read"):
- raise TypeError("file-like object must have read method")
- if content_type is not None and not isstringlike(content_type):
- raise TypeError("content type must be None or string-like")
- if filename is not None and not isstringlike(filename):
- raise TypeError("filename must be None or string-like")
- if content_type is None:
- content_type = "application/octet-stream"
- self._upload_data.append((file_object, content_type, filename))
-
- def pairs(self):
- # XXX should it be successful even if unnamed?
- if self.name is None or self.disabled:
- return []
- return [(self.name, "")]
-
- def _write_mime_data(self, mw):
- # called by HTMLForm
- if len(self._upload_data) == 1:
- # single file
- file_object, content_type, filename = self._upload_data[0]
- mw2 = mw.nextpart()
- fn_part = filename and ('; filename="%s"' % filename) or ''
- disp = 'form-data; name="%s"%s' % (self.name, fn_part)
- mw2.addheader("Content-disposition", disp, prefix=1)
- fh = mw2.startbody(content_type, prefix=0)
- fh.write(file_object.read())
- elif len(self._upload_data) != 0:
- # multiple files
- mw2 = mw.nextpart()
- disp = 'form-data; name="%s"' % self.name
- mw2.addheader("Content-disposition", disp, prefix=1)
- fh = mw2.startmultipartbody("mixed", prefix=0)
- for file_object, content_type, filename in self._upload_data:
- mw3 = mw2.nextpart()
- fn_part = filename and ('; filename="%s"' % filename) or ''
- disp = 'file%s' % fn_part
- mw3.addheader("Content-disposition", disp, prefix=1)
- fh2 = mw3.startbody(content_type, prefix=0)
- fh2.write(file_object.read())
- mw2.lastpart()
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- if not self._upload_data:
- value = "<No files added>"
- else:
- value = []
- for file, ctype, filename in self._upload_data:
- if filename is None:
- value.append("<Unnamed file>")
- else:
- value.append(filename)
- value = string.join(value, ", ")
-
- info = []
- if self.disabled: info.append("disabled")
- if self.readonly: info.append("readonly")
- info = string.join(info, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
-
-
-#---------------------------------------------------
-class IsindexControl(ScalarControl):
- """ISINDEX control.
-
- ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
- part of regular HTML forms at all, and predates it. You're only allowed
- one ISINDEX per HTML document. ISINDEX and regular form submission are
- mutually exclusive -- either submit a form, or the ISINDEX.
-
- Having said this, since ISINDEX controls may appear in forms (which is
- probably bad HTML), ParseFile / ParseResponse will include them in the
- HTMLForm instances it returns. You can set the ISINDEX's value, as with
- any other control (but note that ISINDEX controls have no name, so you'll
- need to use the type argument of set_value!). When you submit the form,
- the ISINDEX will not be successful (ie., no data will get returned to the
- server as a result of its presence), unless you click on the ISINDEX
- control, in which case the ISINDEX gets submitted instead of the form:
-
- form.set_value("my isindex value", type="isindex")
- urllib2.urlopen(form.click(type="isindex"))
-
- ISINDEX elements outside of FORMs are ignored. If you want to submit one
- by hand, do it like so:
-
- url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
- result = urllib2.urlopen(url)
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- if self._value is None:
- self._value = ""
-
- def pairs(self):
- return []
-
- def _click(self, form, coord, return_type):
- # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
- # want "bar+baz".
- # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
- # deprecated in 4.01, but it should still say how to submit it).
- # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
- url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
- req_data = url, None, []
-
- if return_type == "pairs":
- return []
- elif return_type == "request_data":
- return req_data
- else:
- return urllib2.Request(url)
-
- def __str__(self):
- value = self.value
- if value is None: value = "<None>"
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
-
-
-#---------------------------------------------------
-class IgnoreControl(ScalarControl):
- """Control that we're not interested in.
-
- Covers:
-
- INPUT/RESET
- BUTTON/RESET
- INPUT/BUTTON
- BUTTON/BUTTON
-
- These controls are always unsuccessful, in the terminology of HTML 4 (ie.
- they never require any information to be returned to the server).
-
- BUTTON/BUTTON is used to generate events for script embedded in HTML.
-
- The value attribute of IgnoreControl is always None.
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self._value = None
-
- def __setattr__(self, name, value):
- if name == "value":
- raise AttributeError(
- "control '%s' is ignored, hence read-only" % self.name)
- elif name in ("name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
-
-#---------------------------------------------------
-class ListControl(Control):
- """Control representing a sequence of items.
-
- The value attribute of a ListControl represents the selected list items in
- the control.
-
- ListControl implements both list controls that take a single value and
- those that take multiple values.
-
- ListControls accept sequence values only. Some controls only accept
- sequences of length 0 or 1 (RADIO, and single-selection SELECT).
- In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
- and multiple-selection SELECTs (those having the "multiple" HTML attribute)
- accept sequences of any length.
-
- Note the following mistake:
-
- control.value = some_value
- assert control.value == some_value # not necessarily true
-
- The reason for this is that the value attribute always gives the list items
- in the order they were listed in the HTML.
-
- ListControl items can also be referred to by their labels instead of names.
- Use the by_label argument, and the set_value_by_label, get_value_by_label
- methods.
-
- XXX RadioControl and CheckboxControl don't implement by_label yet.
-
- Note that, rather confusingly, though SELECT controls are represented in
- HTML by SELECT elements (which contain OPTION elements, representing
- individual list items), CHECKBOXes and RADIOs are not represented by *any*
- element. Instead, those controls are represented by a collection of INPUT
- elements. For example, this is a SELECT control, named "control1":
-
- <select name="control1">
- <option>foo</option>
- <option value="1">bar</option>
- </select>
-
- and this is a CHECKBOX control, named "control2":
-
- <input type="checkbox" name="control2" value="foo" id="cbe1">
- <input type="checkbox" name="control2" value="bar" id="cbe2">
-
- The id attribute of a CHECKBOX or RADIO ListControl is always that of its
- first element (for example, "cbe1" above).
-
-
- Additional read-only public attribute: multiple.
-
-
- ListControls are built up by the parser from their component items by
- creating one ListControl per item, consolidating them into a single master
- ListControl held by the HTMLForm:
-
- -User calls form.new_control(...)
- -Form creates Control, and calls control.add_to_form(self).
- -Control looks for a Control with the same name and type in the form, and
- if it finds one, merges itself with that control by calling
- control.merge_control(self). The first Control added to the form, of a
- particular name and type, is the only one that survives in the form.
- -Form calls control.fixup for all its controls. ListControls in the form
- know they can now safely pick their default values.
-
- To create a ListControl without an HTMLForm, use:
-
- control.merge_control(new_control)
-
- """
- def __init__(self, type, name, attrs={}, select_default=False,
- called_as_base_class=False):
- """
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present
-
- """
- if not called_as_base_class:
- raise NotImplementedError()
-
- self.__dict__["type"] = string.lower(type)
- self.__dict__["name"] = name
- self._value = attrs.get("value")
- self.disabled = False
- self.readonly = False
- self.id = attrs.get("id")
-
- self._attrs = attrs.copy()
- # As Controls are merged in with .merge_control(), self._attrs will
- # refer to each Control in turn -- always the most recently merged
- # control. Each merged-in Control instance corresponds to a single
- # list item: see ListControl.__doc__.
- if attrs:
- self._attrs_list = [self._attrs] # extended by .merge_control()
- self._disabled_list = [self._attrs.has_key("disabled")] # ditto
- else:
- self._attrs_list = [] # extended by .merge_control()
- self._disabled_list = [] # ditto
-
- self._select_default = select_default
- self._clicked = False
- # Some list controls can have their default set only after all items
- # are known. If so, self._value_is_set is false, and the self.fixup
- # method, called after all items have been added, sets the default.
- self._value_is_set = False
-
- def _value_from_label(self, label):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
-
- def toggle(self, name, by_label=False):
- return self._set_selected_state(name, 2, by_label)
- def set(self, selected, name, by_label=False):
- action = int(bool(selected))
- return self._set_selected_state(name, action, by_label)
-
- def _set_selected_state(self, name, action, by_label):
- """
- name: item name
- action:
- 0: clear
- 1: set
- 2: toggle
-
- """
- if not isstringlike(name):
- raise TypeError("item name must be string-like")
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError("no item named '%s'" % name)
-
- if self.multiple:
- if action == 2:
- action = not self._selected[i]
- if action and self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % name)
- self._selected[i] = bool(action)
- else:
- if action == 2:
- if self._selected == name:
- action = 0
- else:
- action = 1
- if action == 0 and self._selected == name:
- self._selected = None
- elif action == 1:
- if self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % name)
- self._selected = name
-
- def toggle_single(self, by_label=False):
- self._set_single_selected_state(2, by_label)
- def set_single(self, selected, by_label=False):
- action = int(bool(selected))
- self._set_single_selected_state(action, by_label)
-
- def _set_single_selected_state(self, action, by_label):
- if len(self._menu) != 1:
- raise ItemCountError("'%s' is not a single-item control" %
- self.name)
-
- name = self._menu[0]
- if by_label:
- name = self._value_from_label(name)
- self._set_selected_state(name, action, by_label)
-
- def get_item_disabled(self, name, by_label=False):
- """Get disabled state of named list item in a ListControl."""
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- else:
- return self._disabled_list[i]
-
- def set_item_disabled(self, disabled, name, by_label=False):
- """Set disabled state of named list item in a ListControl.
-
- disabled: boolean disabled state
-
- """
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- else:
- self._disabled_list[i] = bool(disabled)
-
- def set_all_items_disabled(self, disabled):
- """Set disabled state of all list items in a ListControl.
-
- disabled: boolean disabled state
-
- """
- for i in range(len(self._disabled_list)):
- self._disabled_list[i] = bool(disabled)
-
- def get_item_attrs(self, name, by_label=False):
- """Return dictionary of HTML attributes for a single ListControl item.
-
- The HTML element types that describe list items are: OPTION for SELECT
- controls, INPUT for the rest. These elements have HTML attributes that
- you may occasionally want to know about -- for example, the "alt" HTML
- attribute gives a text string describing the item (graphical browsers
- usually display this as a tooltip).
-
- The returned dictionary maps HTML attribute names to values. The names
- and values are taken from the original HTML.
-
- Note that for SELECT controls, the returned dictionary contains a
- special key "contents" -- see SelectControl.__doc__.
-
- """
- if by_label:
- name = self._value_from_label(name)
- try:
- i = self._menu.index(name)
- except ValueError:
- raise ItemNotFoundError()
- return self._attrs_list[i]
-
- def add_to_form(self, form):
- try:
- control = form.find_control(self.name, self.type)
- except ControlNotFoundError:
- Control.add_to_form(self, form)
- else:
- control.merge_control(self)
-
- def merge_control(self, control):
- assert bool(control.multiple) == bool(self.multiple)
- assert isinstance(control, self.__class__)
- self._menu.extend(control._menu)
- self._attrs_list.extend(control._attrs_list)
- self._disabled_list.extend(control._disabled_list)
- if control.multiple:
- self._selected.extend(control._selected)
- else:
- if control._value_is_set:
- self._selected = control._selected
- if control._value_is_set:
- self._value_is_set = True
-
- def fixup(self):
- """
- ListControls are built up from component list items (which are also
- ListControls) during parsing. This method should be called after all
- items have been added. See ListControl.__doc__ for the reason this is
- required.
-
- """
- # Need to set default selection where no item was indicated as being
- # selected by the HTML:
-
- # CHECKBOX:
- # Nothing should be selected.
- # SELECT/single, SELECT/multiple and RADIO:
- # RFC 1866 (HTML 2.0): says first item should be selected.
- # W3C HTML 4.01 Specification: says that client behaviour is
- # undefined in this case. For RADIO, exactly one must be selected,
- # though which one is undefined.
- # Both Netscape and Microsoft Internet Explorer (IE) choose first
- # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
- # and Firebird 0.6) leave all items unselected for RADIO and
- # SELECT/multiple.
-
- # Since both Netscape and IE all choose the first item for
- # SELECT/single, we do the same. OTOH, both Netscape and IE
- # leave SELECT/multiple with nothing selected, in violation of RFC 1866
- # (but not in violation of the W3C HTML 4 standard); the same is true
- # of RADIO (which *is* in violation of the HTML 4 standard). We follow
- # RFC 1866 if the select_default attribute is set, and Netscape and IE
- # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
- # can deselect all items in a RadioControl.
-
- raise NotImplementedError()
-
- def __getattr__(self, name):
- if name == "value":
- menu = self._menu
- if self.multiple:
- values = []
- for i in range(len(menu)):
- if self._selected[i]: values.append(menu[i])
- return values
- else:
- if self._selected is None: return []
- else: return [self._selected]
- else:
- raise AttributeError("%s instance has no attribute '%s'" %
- (self.__class__.__name__, name))
-
- def __setattr__(self, name, value):
- if name == "value":
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
- self._set_value(value)
- elif name in ("name", "type", "multiple"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def _set_value(self, value):
- if self.multiple:
- self._multiple_set_value(value)
- else:
- self._single_set_value(value)
-
- def _single_set_value(self, value):
- if value is None or isstringlike(value):
- raise TypeError("ListControl, must set a sequence")
- nr = len(value)
- if not (0 <= nr <= 1):
- raise ItemCountError("single selection list, must set sequence of "
- "length 0 or 1")
-
- if nr == 0:
- self._selected = None
- else:
- value = value[0]
- try:
- i = self._menu.index(value)
- except ValueError:
- raise ItemNotFoundError("no item named '%s'" %
- repr(value))
- if self._disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % value)
- self._selected = value
-
- def _multiple_set_value(self, value):
- if value is None or isstringlike(value):
- raise TypeError("ListControl, must set a sequence")
-
- selected = [False]*len(self._selected)
- menu = self._menu
- disabled_list = self._disabled_list
-
- for v in value:
- found = False
- for i in range(len(menu)):
- item_name = menu[i]
- if v == item_name:
- if disabled_list[i]:
- raise AttributeError("item '%s' is disabled" % value)
- selected[i] = True
- found = True
- break
- if not found:
- raise ItemNotFoundError("no item named '%s'" % repr(v))
- self._selected = selected
-
- def set_value_by_label(self, value):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
- def get_value_by_label(self):
- raise NotImplementedError("control '%s' does not yet support "
- "by_label" % self.name)
-
- def possible_items(self, by_label=False):
- if by_label:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % self.name)
- return copy.copy(self._menu)
-
- def pairs(self):
- if self.disabled:
- return []
-
- if not self.multiple:
- name = self.name
- value = self._selected
- if name is None or value is None:
- return []
- return [(name, value)]
- else:
- control_name = self.name # usually the name HTML attribute
- pairs = []
- for i in range(len(self._menu)):
- item_name = self._menu[i] # usually the value HTML attribute
- if self._selected[i]:
- pairs.append((control_name, item_name))
- return pairs
-
- def _item_str(self, i):
- item_name = self._menu[i]
- if self.multiple:
- if self._selected[i]:
- item_name = "*"+item_name
- else:
- if self._selected == item_name:
- item_name = "*"+item_name
- if self._disabled_list[i]:
- item_name = "(%s)" % item_name
- return item_name
-
- def __str__(self):
- name = self.name
- if name is None: name = "<None>"
-
- display = []
- for i in range(len(self._menu)):
- s = self._item_str(i)
- display.append(s)
-
- infos = []
- if self.disabled: infos.append("disabled")
- if self.readonly: infos.append("readonly")
- info = string.join(infos, ", ")
- if info: info = " (%s)" % info
-
- return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
- name, string.join(display, ", "), info)
-
-
-class RadioControl(ListControl):
- """
- Covers:
-
- INPUT/RADIO
-
- """
- def __init__(self, type, name, attrs, select_default=False):
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
- self.__dict__["multiple"] = False
- value = attrs.get("value", "on")
- self._menu = [value]
- checked = attrs.has_key("checked")
- if checked:
- self._value_is_set = True
- self._selected = value
- else:
- self._selected = None
-
- def fixup(self):
- if not self._value_is_set:
- # no item explicitly selected
- assert self._selected is None
- if self._select_default:
- self._selected = self._menu[0]
- self._value_is_set = True
-
-
-class CheckboxControl(ListControl):
- """
- Covers:
-
- INPUT/CHECKBOX
-
- """
- def __init__(self, type, name, attrs, select_default=False):
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
- self.__dict__["multiple"] = True
- value = attrs.get("value", "on")
- self._menu = [value]
- checked = attrs.has_key("checked")
- self._selected = [checked]
- self._value_is_set = True
-
- def fixup(self):
- # If no items were explicitly checked in HTML, that's how we must
- # leave it, so we have nothing to do here.
- assert self._value_is_set
-
-
-class SelectControl(ListControl):
- """
- Covers:
-
- SELECT (and OPTION)
-
- SELECT control values and labels are subject to some messy defaulting
- rules. For example, if the HTML repreentation of the control is:
-
- <SELECT name=year>
- <OPTION value=0 label="2002">current year</OPTION>
- <OPTION value=1>2001</OPTION>
- <OPTION>2000</OPTION>
- </SELECT>
-
- The items, in order, have labels "2002", "2001" and "2000", whereas their
- values are "0", "1" and "2000" respectively. Note that the value of the
- last OPTION in this example defaults to its contents, as specified by RFC
- 1866, as do the labels of the second and third OPTIONs.
-
- The purpose of these methods is that the OPTION labels are sometimes much
- more meaningful, than are the OPTION values, which can make for more
- maintainable code.
-
- Additional read-only public attribute: attrs
-
- The attrs attribute is a dictionary of the original HTML attributes of the
- SELECT element. Other ListControls do not have this attribute, because in
- other cases the control as a whole does not correspond to any single HTML
- element. The get_item_attrs method may be used as usual to get at the
- HTML attributes of the HTML elements corresponding to individual list items
- (for SELECT controls, these are OPTION elements).
-
- Another special case is that the attributes dictionaries returned by
- get_item_attrs have a special key "contents" which does not correspond to
- any real HTML attribute, but rather contains the contents of the OPTION
- element:
-
- <OPTION>this bit</OPTION>
-
- """
- # HTML attributes here are treated slightly from other list controls:
- # -The SELECT HTML attributes dictionary is stuffed into the OPTION
- # HTML attributes dictionary under the "__select" key.
- # -The content of each OPTION element is stored under the special
- # "contents" key of the dictionary.
- # After all this, the dictionary is passed to the SelectControl constructor
- # as the attrs argument, as usual. However:
- # -The first SelectControl constructed when building up a SELECT control
- # has a constructor attrs argument containing only the __select key -- so
- # this SelectControl represents an empty SELECT control.
- # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
- # the __select dictionary containing the SELECT HTML-attributes.
- def __init__(self, type, name, attrs, select_default=False):
- # fish out the SELECT HTML attributes from the OPTION HTML attributes
- # dictionary
- self.attrs = attrs["__select"].copy()
- attrs = attrs.copy()
- del attrs["__select"]
-
- ListControl.__init__(self, type, name, attrs, select_default,
- called_as_base_class=True)
-
- self._label_map = None
- self.disabled = self.attrs.has_key("disabled")
- self.id = self.attrs.get("id")
-
- self._menu = []
- self._selected = []
- self._value_is_set = False
- if self.attrs.has_key("multiple"):
- self.__dict__["multiple"] = True
- self._selected = []
- else:
- self.__dict__["multiple"] = False
- self._selected = None
-
- if attrs: # OPTION item data was provided
- value = attrs["value"]
- self._menu.append(value)
- selected = attrs.has_key("selected")
- if selected:
- self._value_is_set = True
- if self.attrs.has_key("multiple"):
- self._selected.append(selected)
- elif selected:
- self._selected = value
-
- def _build_select_label_map(self):
- """Return an ordered mapping of labels to values.
-
- For example, if the HTML repreentation of the control is as given in
- SelectControl.__doc__, this function will return a mapping like:
-
- {"2002": "0", "2001": "1", "2000": "2000"}
-
- """
- alist = []
- for val in self._menu:
- attrs = self.get_item_attrs(val)
- alist.append((attrs["label"], val))
- return AList(alist)
-
- def _value_from_label(self, label):
- try:
- return self._label_map[label]
- except KeyError:
- raise ItemNotFoundError("no item has label '%s'" % label)
-
- def fixup(self):
- if not self._value_is_set:
- # No item explicitly selected.
- if len(self._menu) > 0:
- if self.multiple:
- if self._select_default:
- self._selected[0] = True
- else:
- assert self._selected is None
- self._selected = self._menu[0]
- self._value_is_set = True
- self._label_map = self._build_select_label_map()
-
- def possible_items(self, by_label=False):
- if not by_label:
- return copy.copy(self._menu)
- else:
- self._label_map.set_inverted(True)
- try:
- r = map(lambda v, self=self: self._label_map[v], self._menu)
- finally:
- self._label_map.set_inverted(False)
- return r
-
- def set_value_by_label(self, value):
- if isstringlike(value):
- raise TypeError("ListControl, must set a sequence, not a string")
- if self.disabled:
- raise AttributeError("control '%s' is disabled" % self.name)
- if self.readonly:
- raise AttributeError("control '%s' is readonly" % self.name)
-
- try:
- value = map(lambda v, self=self: self._label_map[v], value)
- except KeyError, e:
- raise ItemNotFoundError("no item has label '%s'" % e.args[0])
- self._set_value(value)
-
- def get_value_by_label(self):
- menu = self._menu
- self._label_map.set_inverted(True)
- try:
- if self.multiple:
- values = []
- for i in range(len(menu)):
- if self._selected[i]:
- values.append(self._label_map[menu[i]])
- return values
- else:
- return [self._label_map[self._selected]]
- finally:
- self._label_map.set_inverted(False)
-
-
-#---------------------------------------------------
-class SubmitControl(ScalarControl):
- """
- Covers:
-
- INPUT/SUBMIT
- BUTTON/SUBMIT
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
- # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
- # to define this.
- if self.value is None: self.value = ""
- self.readonly = True
-
- def _click(self, form, coord, return_type):
- self._clicked = coord
- r = form._switch_click(return_type)
- self._clicked = False
- return r
-
- def pairs(self):
- if not self._clicked:
- return []
- return ScalarControl.pairs(self)
-
-
-#---------------------------------------------------
-class ImageControl(SubmitControl):
- """
- Covers:
-
- INPUT/IMAGE
-
- The value attribute of an ImageControl is always None. Coordinates are
- specified using one of the HTMLForm.click* methods.
-
- """
- def __init__(self, type, name, attrs):
- ScalarControl.__init__(self, type, name, attrs)
- self.__dict__["value"] = None
-
- def __setattr__(self, name, value):
- if name in ("value", "name", "type"):
- raise AttributeError("%s attribute is readonly" % name)
- else:
- self.__dict__[name] = value
-
- def pairs(self):
- clicked = self._clicked
- if self.disabled or not clicked:
- return []
- name = self.name
- if name is None: return []
- return [("%s.x" % name, str(clicked[0])),
- ("%s.y" % name, str(clicked[1]))]
-
-
-# aliases, just to make str(control) and str(form) clearer
-class PasswordControl(TextControl): pass
-class HiddenControl(TextControl): pass
-class TextareaControl(TextControl): pass
-class SubmitButtonControl(SubmitControl): pass
-
-
-def is_listcontrol(control): return isinstance(control, ListControl)
-
-
-class HTMLForm:
- """Represents a single HTML <form> ... </form> element.
-
- A form consists of a sequence of controls that usually have names, and
- which can take on various values. The values of the various types of
- controls represent variously: text, zero-, one- or many-of-many choices,
- and files to be uploaded.
-
- Forms can be filled in with data to be returned to the server, and then
- submitted, using the click method to generate a request object suitable for
- passing to urllib2.urlopen (or the click_request_data or click_pairs
- methods if you're not using urllib2).
-
- import ClientForm
- forms = ClientForm.ParseFile(html, base_uri)
- form = forms[0]
-
- form["query"] = "Python"
- form.set("lots", "nr_results")
-
- response = urllib2.urlopen(form.click())
-
- Usually, HTMLForm instances are not created directly. Instead, the
- ParseFile or ParseResponse factory functions are used. If you do construct
- HTMLForm objects yourself, however, note that an HTMLForm instance is only
- properly initialised after the fixup method has been called (ParseFile and
- ParseResponse do this for you). See ListControl.__doc__ for the reason
- this is required.
-
- Indexing a form (form["control_name"]) returns the named Control's value
- attribute. Assignment to a form index (form["control_name"] = something)
- is equivalent to assignment to the named Control's value attribute. If you
- need to be more specific than just supplying the control's name, use the
- set_value and get_value methods.
-
- ListControl values are lists of item names. The list item's name is the
- value of the corresponding HTML element's "value" attribute.
-
- Example:
-
- <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
- <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
-
- defines a CHECKBOX control with name "cheeses" which has two items, named
- "leicester" and "cheddar".
-
- Another example:
-
- <SELECT name="more_cheeses">
- <OPTION>1</OPTION>
- <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
- </SELECT>
-
- defines a SELECT control with name "more_cheeses" which has two items,
- named "1" and "2".
-
- To set, clear or toggle individual list items, use the set and toggle
- methods. To set the whole value, do as for any other control:use indexing
- or the set_/get_value methods.
-
- Example:
-
- # select *only* the item named "cheddar"
- form["cheeses"] = ["cheddar"]
- # select "cheddar", leave other items unaffected
- form.set("cheddar", "cheeses")
-
- Some controls (RADIO and SELECT without the multiple attribute) can only
- have zero or one items selected at a time. Some controls (CHECKBOX and
- SELECT with the multiple attribute) can have multiple items selected at a
- time. To set the whole value of a multiple-selection ListControl, assign a
- sequence to a form index:
-
- form["cheeses"] = ["cheddar", "leicester"]
-
- To check whether a control has an item, or whether an item is selected,
- respectively:
-
- "cheddar" in form.possible_items("cheeses")
- "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
-
- Note that some items may be disabled (see below).
-
- Note the following mistake:
-
- form[control_name] = control_value
- assert form[control_name] == control_value # not necessarily true
-
- The reason for this is that form[control_name] always gives the list items
- in the order they were listed in the HTML.
-
- List items (hence list values, too) can be referred to in terms of list
- item labels rather than list item names. Currently, this is only possible
- for SELECT controls (this is a bug). To use this feature, use the by_label
- arguments to the various HTMLForm methods. Note that it is *item* names
- (hence ListControl values also), not *control* names, that can be referred
- to by label.
-
- The question of default values of OPTION contents, labels and values is
- somewhat complicated: see SelectControl.__doc__ and
- ListControl.get_item_attrs.__doc__ if you think you need to know.
-
- Controls can be disabled or readonly. In either case, the control's value
- cannot be changed until you clear those flags (using the methods on
- HTMLForm). Disabled is the state typically represented by browsers by
- `greying out' a control. Disabled controls are not `successful' -- they
- don't cause data to get returned to the server. Readonly controls usually
- appear in browsers as read-only text boxes. Readonly controls are
- successful. List items can also be disabled. Attempts to select disabled
- items (with form[name] = value, or using the ListControl.set method, for
- example) fail. Attempts to clear disabled items are allowed.
-
- If a lot of controls are readonly, it can be useful to do this:
-
- form.set_all_readonly(False)
-
- When you want to do several things with a single control, or want to do
- less common things, like changing which controls and items are disabled,
- you can get at a particular control:
-
- control = form.find_control("cheeses")
- control.set_item_disabled(False, "gruyere")
- control.set("gruyere")
-
- Most methods on HTMLForm just delegate to the contained controls, so see
- the docstrings of the various Control classes for further documentation.
- Most of these delegating methods take name, type, kind, id and nr arguments
- to specify the control to be operated on: see
- HTMLForm.find_control.__doc__.
-
- ControlNotFoundError (subclass of ValueError) is raised if the specified
- control can't be found. This includes occasions where a non-ListControl
- is found, but the method (set, for example) requires a ListControl.
- ItemNotFoundError (subclass of ValueError) is raised if a list item can't
- be found. ItemCountError (subclass of ValueError) is raised if an attempt
- is made to select more than one item and the control doesn't allow that, or
- set/get_single are called and the control contains more than one item.
- AttributeError is raised if a control or item is readonly or disabled and
- an attempt is made to alter its value.
-
- XXX CheckBoxControl and RadioControl don't yet support item access by label
-
- Security note: Remember that any passwords you store in HTMLForm instances
- will be saved to disk in the clear if you pickle them (directly or
- indirectly). The simplest solution to this is to avoid pickling HTMLForm
- objects. You could also pickle before filling in any password, or just set
- the password to "" before pickling.
-
-
- Public attributes:
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form (None if no name was specified)
- attrs: dictionary mapping original HTML form attributes to their values
-
- controls: list of Control instances; do not alter this list
- (instead, call form.new_control to make a Control and add it to the
- form, or control.add_to_form if you already have a Control instance)
-
-
-
- Methods for form filling:
- -------------------------
-
- Most of the these methods have very similar arguments. See
- HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
- arguments. See above for a description of by_label.
-
- def find_control(self,
- name=None, type=None, kind=None, id=None, predicate=None,
- nr=None)
-
- get_value(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- set_value(value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
- set_all_readonly(readonly)
-
-
- Methods applying only to ListControls:
-
- possible_items(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
- set(selected, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- toggle(item_name,
- name=None, type=None, id=None, nr=None,
- by_label=False)
-
- set_single(selected,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
- toggle_single(name=None, type=None, kind=None, id=None, nr=None,
- by_label=False)
-
-
- Method applying only to FileControls:
-
- add_file(file_object,
- content_type="application/octet-stream", filename=None,
- name=None, id=None, nr=None)
-
-
- Methods applying only to clickable controls:
-
- click(name=None, type=None, id=None, nr=0, coord=(1,1))
- click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
- click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
-
- """
-
- type2class = {
- "text": TextControl,
- "password": PasswordControl,
- "hidden": HiddenControl,
- "textarea": TextareaControl,
-
- "isindex": IsindexControl,
-
- "file": FileControl,
-
- "button": IgnoreControl,
- "buttonbutton": IgnoreControl,
- "reset": IgnoreControl,
- "resetbutton": IgnoreControl,
-
- "submit": SubmitControl,
- "submitbutton": SubmitButtonControl,
- "image": ImageControl,
-
- "radio": RadioControl,
- "checkbox": CheckboxControl,
- "select": SelectControl,
- }
-
-#---------------------------------------------------
-# Initialisation. Use ParseResponse / ParseFile instead.
-
- def __init__(self, action, method="GET",
- enctype="application/x-www-form-urlencoded",
- name=None, attrs=None):
- """
- In the usual case, use ParseResponse (or ParseFile) to create new
- HTMLForm objects.
-
- action: full (absolute URI) form action
- method: "GET" or "POST"
- enctype: form transfer encoding MIME type
- name: name of form
- attrs: dictionary mapping original HTML form attributes to their values
-
- """
- self.action = action
- self.method = method
- self.enctype = enctype
- self.name = name
- if attrs is not None:
- self.attrs = attrs.copy()
- else:
- self.attrs = {}
- self.controls = []
-
- def new_control(self, type, name, attrs,
- ignore_unknown=False, select_default=False):
- """Adds a new control to the form.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- Note that controls representing lists of items are built up from
- controls holding only a single list item. See ListControl.__doc__ for
- further information.
-
- type: type of control (see Control.__doc__ for a list)
- attrs: HTML attributes of control
- ignore_unknown: if true, use a dummy Control instance for controls of
- unknown type; otherwise, raise ValueError
- select_default: for RADIO and multiple-selection SELECT controls, pick
- the first item as the default if no 'selected' HTML attribute is
- present (this defaulting happens when the HTMLForm.fixup method is
- called)
-
- """
- type = string.lower(type)
- klass = self.type2class.get(type)
- if klass is None:
- if ignore_unknown:
- klass = IgnoreControl
- else:
- raise ValueError("Unknown control type '%s'" % type)
-
- a = attrs.copy()
- if issubclass(klass, ListControl):
- control = klass(type, name, a, select_default)
- else:
- control = klass(type, name, a)
- control.add_to_form(self)
-
- def fixup(self):
- """Normalise form after all controls have been added.
-
- This is usually called by ParseFile and ParseResponse. Don't call it
- youself unless you're building your own Control instances.
-
- This method should only be called once, after all controls have been
- added to the form.
-
- """
- for control in self.controls:
- control.fixup()
-
-#---------------------------------------------------
- def __str__(self):
- header = "%s %s %s" % (self.method, self.action, self.enctype)
- rep = [header]
- for control in self.controls:
- rep.append(" %s" % str(control))
- return "<%s>" % string.join(rep, "\n")
-
-#---------------------------------------------------
-# Form-filling methods.
-
- def __getitem__(self, name):
- return self.find_control(name).value
- def __setitem__(self, name, value):
- control = self.find_control(name)
- try:
- control.value = value
- except AttributeError, e:
- raise ValueError(str(e))
-
- def get_value(self,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Return value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name]
-
- """
- c = self.find_control(name, type, kind, id, nr=nr)
- if by_label:
- try:
- meth = c.get_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- return meth()
- else:
- return c.value
- def set_value(self, value,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Set value of control.
-
- If only name and value arguments are supplied, equivalent to
-
- form[name] = value
-
- """
- c = self.find_control(name, type, kind, id, nr=nr)
- if by_label:
- try:
- meth = c.set_value_by_label
- except AttributeError:
- raise NotImplementedError(
- "control '%s' does not yet support by_label" % c.name)
- else:
- meth(value)
- else:
- c.value = value
-
- def set_all_readonly(self, readonly):
- for control in self.controls:
- control.readonly = bool(readonly)
-
-
-#---------------------------------------------------
-# Form-filling methods applying only to ListControls.
-
- def possible_items(self,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Return a list of all values that the specified control can take."""
- c = self._find_list_control(name, type, kind, id, nr)
- return c.possible_items(by_label)
-
- def set(self, selected, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Select / deselect named list item.
-
- selected: boolean selected state
-
- """
- self._find_list_control(name, type, kind, id, nr).set(
- selected, item_name, by_label)
- def toggle(self, item_name,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Toggle selected state of named list item."""
- self._find_list_control(name, type, kind, id, nr).toggle(
- item_name, by_label)
-
- def set_single(self, selected,
- name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Select / deselect list item in a control having only one item.
-
- If the control has multiple list items, ItemCountError is raised.
-
- This is just a convenience method, so you don't need to know the item's
- name -- the item name in these single-item controls is usually
- something meaningless like "1" or "on".
-
- For example, if a checkbox has a single item named "on", the following
- two calls are equivalent:
-
- control.toggle("on")
- control.toggle_single()
-
- """
- self._find_list_control(name, type, kind, id, nr).set_single(
- selected, by_label)
- def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
- by_label=False):
- """Toggle selected state of list item in control having only one item.
-
- The rest is as for HTMLForm.set_single.__doc__.
-
- """
- self._find_list_control(name, type, kind, id, nr).toggle_single(
- by_label)
-
-#---------------------------------------------------
-# Form-filling method applying only to FileControls.
-
- def add_file(self, file_object, content_type=None, filename=None,
- name=None, id=None, nr=None):
- """Add a file to be uploaded.
-
- file_object: file-like object (with read method) from which to read
- data to upload
- content_type: MIME content type of data to upload
- filename: filename to pass to server
-
- If filename is None, no filename is sent to the server.
-
- If content_type is None, the content type is guessed based on the
- filename and the data from read from the file object.
-
- XXX
- At the moment, guessed content type is always application/octet-stream.
- Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
- plain text.
-
- """
- self.find_control(name, "file", id=id, nr=nr).add_file(
- file_object, content_type, filename)
-
-#---------------------------------------------------
-# Form submission methods, applying only to clickable controls.
-
- def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
- """Return request that would result from clicking on a control.
-
- The request object is a urllib2.Request instance, which you can pass to
- urllib2.urlopen (or ClientCookie.urlopen).
-
- Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
- IMAGEs) can be clicked.
-
- Will click on the first clickable control, subject to the name, type
- and nr arguments (as for find_control). If no name, type, id or number
- is specified and there are no clickable controls, a request will be
- returned for the form in its current, un-clicked, state.
-
- IndexError is raised if any of name, type, id or nr is specified but no
- matching control is found. ValueError is raised if the HTMLForm has an
- enctype attribute that is not recognised.
-
- You can optionally specify a coordinate to click at, which only makes a
- difference if you clicked on an image.
-
- """
- return self._click(name, type, id, nr, coord, "request")
-
- def click_request_data(self,
- name=None, type=None, id=None, nr=0, coord=(1,1)):
- """As for click method, but return a tuple (url, data, headers).
-
- You can use this data to send a request to the server. This is useful
- if you're using httplib or urllib rather than urllib2. Otherwise, use
- the click method.
-
- # Untested. Have to subclass to add headers, I think -- so use urllib2
- # instead!
- import urllib
- url, data, hdrs = form.click_request_data()
- r = urllib.urlopen(url, data)
-
- # Untested. I don't know of any reason to use httplib -- you can get
- # just as much control with urllib2.
- import httplib, urlparse
- url, data, hdrs = form.click_request_data()
- tup = urlparse(url)
- host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
- conn = httplib.HTTPConnection(host)
- if data:
- httplib.request("POST", path, data, hdrs)
- else:
- httplib.request("GET", path, headers=hdrs)
- r = conn.getresponse()
-
- """
- return self._click(name, type, id, nr, coord, "request_data")
-
- def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
- """As for click_request_data, but returns a list of (key, value) pairs.
-
- You can use this list as an argument to ClientForm.urlencode. This is
- usually only useful if you're using httplib or urllib rather than
- urllib2 or ClientCookie. It may also be useful if you want to manually
- tweak the keys and/or values, but this should not be necessary.
- Otherwise, use the click method.
-
- Note that this method is only useful for forms of MIME type
- x-www-form-urlencoded. In particular, it does not return the
- information required for file upload. If you need file upload and are
- not using urllib2, use click_request_data.
-
- Also note that Python 2.0's urllib.urlencode is slightly broken: it
- only accepts a mapping, not a sequence of pairs, as an argument. This
- messes up any ordering in the argument. Use ClientForm.urlencode
- instead.
-
- """
- return self._click(name, type, id, nr, coord, "pairs")
-
-#---------------------------------------------------
-
- def find_control(self,
- name=None, type=None, kind=None, id=None, predicate=None,
- nr=None):
- """Locate some specific control within the form.
-
- At least one of the name, type, kind, predicate and nr arguments must
- be supplied. If no matching control is found, ControlNotFoundError is
- raised.
-
- If name is specified, then the control must have the indicated name.
-
- If type is specified then the control must have the specified type (in
- addition to the types possible for <input> HTML tags: "text",
- "password", "hidden", "submit", "image", "button", "radio", "checkbox",
- "file" we also have "reset", "buttonbutton", "submitbutton",
- "resetbutton", "textarea", "select" and "isindex").
-
- If kind is specified, then the control must fall into the specified
- group, each of which satisfies a particular interface. The types are
- "text", "list", "multilist", "singlelist", "clickable" and "file".
-
- If id is specified, then the control must have the indicated id.
-
- If predicate is specified, then the control must match that function.
- The predicate function is passed the control as its single argument,
- and should return a boolean value indicating whether the control
- matched.
-
- nr, if supplied, is the sequence number of the control (where 0 is the
- first). Note that control 0 is the first control matching all the
- other arguments (if supplied); it is not necessarily the first control
- in the form.
-
- """
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (predicate is None) and (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
- if nr is None: nr = 0
-
- return self._find_control(name, type, kind, id, predicate, nr)
-
-#---------------------------------------------------
-# Private methods.
-
- def _find_list_control(self,
- name=None, type=None, kind=None, id=None, nr=None):
- if ((name is None) and (type is None) and (kind is None) and
- (id is None) and (nr is None)):
- raise ValueError(
- "at least one argument must be supplied to specify control")
- if nr is None: nr = 0
-
- return self._find_control(name, type, kind, id, is_listcontrol, nr)
-
- def _find_control(self, name, type, kind, id, predicate, nr):
- if (name is not None) and not isstringlike(name):
- raise TypeError("control name must be string-like")
- if (type is not None) and not isstringlike(type):
- raise TypeError("control type must be string-like")
- if (kind is not None) and not isstringlike(kind):
- raise TypeError("control kind must be string-like")
- if (id is not None) and not isstringlike(id):
- raise TypeError("control id must be string-like")
- if (predicate is not None) and not callable(predicate):
- raise TypeError("control predicate must be callable")
- if nr < 0: raise ValueError("control number must be a positive "
- "integer")
-
- orig_nr = nr
-
- for control in self.controls:
- if name is not None and name != control.name:
- continue
- if type is not None and type != control.type:
- continue
- if (kind is not None and
- not self._is_control_in_kind(control, kind)):
- continue
- if id is not None and id != control.id:
- continue
- if predicate and not predicate(control):
- continue
- if nr:
- nr = nr - 1
- continue
- return control
-
- description = []
- if name is not None: description.append("name '%s'" % name)
- if type is not None: description.append("type '%s'" % type)
- if kind is not None: description.append("kind '%s'" % kind)
- if id is not None: description.append("id '%s'" % id)
- if predicate is not None:
- description.append("matching predicate %s" % predicate)
- if orig_nr: description.append("nr %d" % orig_nr)
- description = string.join(description, ", ")
- raise ControlNotFoundError("no control with "+description)
-
- def _is_control_in_kind(self, control, kind):
- # XXX not OO
- if kind == "list":
- return isinstance(control, ListControl)
- elif kind == "multilist":
- return bool(isinstance(control, ListControl) and control.multiple)
- elif kind == "singlelist":
- return bool(isinstance(control, ListControl) and
- not control.multiple)
- elif kind == "file":
- return isinstance(control, FileControl)
- elif kind == "text":
- return isinstance(control, TextControl)
- elif kind == "clickable":
- return (isinstance(control, SubmitControl) or
- isinstance(control, IsindexControl))
- else:
- raise ValueError("no such control kind '%s'" % kind)
-
- def _click(self, name, type, id, nr, coord, return_type):
- try:
- control = self._find_control(name, type, "clickable", id, None, nr)
- except ControlNotFoundError:
- if ((name is not None) or (type is not None) or (id is not None) or
- (nr != 0)):
- raise
- # no clickable controls, but no control was explicitly requested,
- # so return state without clicking any control
- return self._switch_click(return_type)
- else:
- return control._click(self, coord, return_type)
-
- def _pairs(self):
- """Return sequence of (key, value) pairs suitable for urlencoding."""
- pairs = []
- for control in self.controls:
- pairs.extend(control.pairs())
- return pairs
-
- def _request_data(self):
- """Return a tuple (url, data, headers)."""
- method = string.upper(self.method)
- if method == "GET":
- if self.enctype != "application/x-www-form-urlencoded":
- raise ValueError(
- "unknown GET form encoding type '%s'" % self.enctype)
- uri = "%s?%s" % (self.action, urlencode(self._pairs()))
- return uri, None, []
- elif method == "POST":
- if self.enctype == "application/x-www-form-urlencoded":
- return (self.action, urlencode(self._pairs()),
- [("Content-type", self.enctype)])
- elif self.enctype == "multipart/form-data":
- data = StringIO()
- http_hdrs = []
- mw = MimeWriter(data, http_hdrs)
- f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
- prefix=0)
- for control in self.controls:
- control._write_mime_data(mw)
- mw.lastpart()
- return self.action, data.getvalue(), http_hdrs
- else:
- raise ValueError(
- "unknown POST form encoding type '%s'" % self.enctype)
- else:
- raise ValueError("Unknown method '%s'" % method)
-
- def _switch_click(self, return_type):
- # This is called by HTMLForm and clickable Controls to hide switching
- # on return_type.
- # XXX
- # not OO
- # duplicated in IsindexControl._click
- if return_type == "pairs":
- return self._pairs()
- elif return_type == "request_data":
- return self._request_data()
- else:
- req_data = self._request_data()
- req = urllib2.Request(req_data[0], req_data[1])
- for key, val in req_data[2]:
- req.add_header(key, val)
- return req