summaryrefslogtreecommitdiff
path: root/tools/bug_tool/ClientForm.py
diff options
context:
space:
mode:
authorTaybin Rutkin <taybin@taybin.com>2005-05-13 20:47:18 +0000
committerTaybin Rutkin <taybin@taybin.com>2005-05-13 20:47:18 +0000
commitd09f6b3016bacbc2871a8946cbb24ad705076509 (patch)
treef27312839c2a772cb2ce068a4f28b2449ad869df /tools/bug_tool/ClientForm.py
Initial revision
git-svn-id: svn://localhost/trunk/ardour2@4 d708f5d6-7413-0410-9779-e7cbd77b26cf
Diffstat (limited to 'tools/bug_tool/ClientForm.py')
-rw-r--r--tools/bug_tool/ClientForm.py2699
1 files changed, 2699 insertions, 0 deletions
diff --git a/tools/bug_tool/ClientForm.py b/tools/bug_tool/ClientForm.py
new file mode 100644
index 0000000000..c42f65b313
--- /dev/null
+++ b/tools/bug_tool/ClientForm.py
@@ -0,0 +1,2699 @@
+"""HTML form handling for web clients.
+
+ClientForm is a Python module for handling HTML forms on the client
+side, useful for parsing HTML forms, filling them in and returning the
+completed forms to the server. It has developed from a port of Gisle
+Aas' Perl module HTML::Form, from the libwww-perl library, but the
+interface is not the same.
+
+The most useful docstring is the one for HTMLForm.
+
+RFC 1866: HTML 2.0
+RFC 1867: Form-based File Upload in HTML
+RFC 2388: Returning Values from Forms: multipart/form-data
+HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
+HTML 4.01 Specification, W3C Recommendation 24 December 1999
+
+
+Copyright 2002-2003 John J. Lee <jjl@pobox.com>
+Copyright 1998-2000 Gisle Aas.
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD License (see the file COPYING included with
+the distribution).
+
+"""
+
+# XXX
+# Treat unknown controls as text controls? (this was a recent LWP
+# HTML::Form change) I guess this is INPUT with no TYPE? Check LWP
+# source and browser behaviour.
+# Support for list item ids. How to handle missing ids? (How do I deal
+# with duplicate OPTION labels ATM? Can't remember...)
+# Arrange things so can automatically PyPI-register with categories
+# without messing up 1.5.2 compatibility.
+# Tests need work.
+# Test single and multiple file upload some more on the web.
+# Does file upload work when name is missing? Sourceforge tracker form
+# doesn't like it. Check standards, and test with Apache. Test binary
+# upload with Apache.
+# Add label support for CHECKBOX and RADIO.
+# Better docs.
+# Deal with character sets properly. Not sure what the issues are here.
+# I don't *think* any encoding of control names, filenames or data is
+# necessary -- HTML spec. doesn't require it, and Mozilla Firebird 0.6
+# doesn't seem to do it.
+# Add charset parameter to Content-type headers? How to find value??
+# Get rid of MapBase, AList and MimeWriter.
+# I'm not going to fix this unless somebody tells me what real servers
+# that want this encoding actually expect: If enctype is
+# application/x-www-form-urlencoded and there's a FILE control present.
+# Strictly, it should be 'name=data' (see HTML 4.01 spec., section
+# 17.13.2), but I send "name=" ATM. What about multiple file upload??
+# Get rid of the two type-switches (for kind and click*).
+# Remove single-selection code: can be special case of multi-selection,
+# with a few variations, I think.
+# Factor out multiple-selection list code? May not be easy. Maybe like
+# this:
+
+# ListControl
+# ^
+# | MultipleListControlMixin
+# | ^
+# SelectControl /
+# ^ /
+# \ /
+# MultiSelectControl
+
+
+# Plan
+# ----
+# Maybe a 0.2.x, cleaned up a bit and with id support for list items?
+# Not sure it's worth it, really.
+# Remove toggle methods.
+# Replace by_label with choice between value / id / label /
+# element contents (see discussion with Gisle about labels on
+# libwww-perl list).
+# ...what else?
+# Work on DOMForm.
+# XForms? Don't know if there's a need here.
+
+
+try: True
+except NameError:
+ True = 1
+ False = 0
+
+try: bool
+except NameError:
+ def bool(expr):
+ if expr: return True
+ else: return False
+
+import sys, urllib, urllib2, types, string, mimetools, copy
+from urlparse import urljoin
+from cStringIO import StringIO
+try:
+ import UnicodeType
+except ImportError:
+ UNICODE = False
+else:
+ UNICODE = True
+
+VERSION = "0.1.13"
+
+CHUNK = 1024 # size of chunks fed to parser, in bytes
+
+# This version of urlencode is from my Python 1.5.2 back-port of the
+# Python 2.1 CVS maintenance branch of urllib. It will accept a sequence
+# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
+def urlencode(query,doseq=False,):
+ """Encode a sequence of two-element tuples or dictionary into a URL query \
+string.
+
+ If any values in the query arg are sequences and doseq is true, each
+ sequence element is converted to a separate parameter.
+
+ If the query arg is a sequence of two-element tuples, the order of the
+ parameters in the output will match the order of parameters in the
+ input.
+ """
+
+ if hasattr(query,"items"):
+ # mapping objects
+ query = query.items()
+ else:
+ # it's a bother at times that strings and string-like objects are
+ # sequences...
+ try:
+ # non-sequence items should not work with len()
+ x = len(query)
+ # non-empty strings will fail this
+ if len(query) and type(query[0]) != types.TupleType:
+ raise TypeError()
+ # zero-length sequences of all types will get here and succeed,
+ # but that's a minor nit - since the original implementation
+ # allowed empty dicts that type of behavior probably should be
+ # preserved for consistency
+ except TypeError:
+ ty,va,tb = sys.exc_info()
+ raise TypeError("not a valid non-string sequence or mapping "
+ "object", tb)
+
+ l = []
+ if not doseq:
+ # preserve old behavior
+ for k, v in query:
+ k = urllib.quote_plus(str(k))
+ v = urllib.quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ for k, v in query:
+ k = urllib.quote_plus(str(k))
+ if type(v) == types.StringType:
+ v = urllib.quote_plus(v)
+ l.append(k + '=' + v)
+ elif UNICODE and type(v) == types.UnicodeType:
+ # is there a reasonable way to convert to ASCII?
+ # encode generates a string, but "replace" or "ignore"
+ # lose information and "strict" can raise UnicodeError
+ v = urllib.quote_plus(v.encode("ASCII","replace"))
+ l.append(k + '=' + v)
+ else:
+ try:
+ # is this a sufficient test for sequence-ness?
+ x = len(v)
+ except TypeError:
+ # not a sequence
+ v = urllib.quote_plus(str(v))
+ l.append(k + '=' + v)
+ else:
+ # loop over the sequence
+ for elt in v:
+ l.append(k + '=' + urllib.quote_plus(str(elt)))
+ return string.join(l, '&')
+
+def startswith(string, initial):
+ if len(initial) > len(string): return False
+ return string[:len(initial)] == initial
+
+def issequence(x):
+ try:
+ x[0]
+ except (TypeError, KeyError):
+ return False
+ except IndexError:
+ pass
+ return True
+
+def isstringlike(x):
+ try: x+""
+ except: return False
+ else: return True
+
+
+# XXX don't really want to drag this along (MapBase, AList, MimeWriter)
+
+class MapBase:
+ """Mapping designed to be easily derived from.
+
+ Subclass it and override __init__, __setitem__, __getitem__, __delitem__
+ and keys. Nothing else should need to be overridden, unlike UserDict.
+ This significantly simplifies dictionary-like classes.
+
+ Also different from UserDict in that it has a redonly flag, and can be
+ updated (and initialised) with a sequence of pairs (key, value).
+
+ """
+ def __init__(self, init=None):
+ self._data = {}
+ self.readonly = False
+ if init is not None: self.update(init)
+
+ def __getitem__(self, key):
+ return self._data[key]
+
+ def __setitem__(self, key, item):
+ if not self.readonly:
+ self._data[key] = item
+ else:
+ raise TypeError("object doesn't support item assignment")
+
+ def __delitem__(self, key):
+ if not self.readonly:
+ del self._data[key]
+ else:
+ raise TypeError("object doesn't support item deletion")
+
+ def keys(self):
+ return self._data.keys()
+
+ # now the internal workings, there should be no need to override these:
+
+ def clear(self):
+ for k in self.keys():
+ del self[k]
+
+ def __repr__(self):
+ rep = []
+ for k, v in self.items():
+ rep.append("%s: %s" % (repr(k), repr(v)))
+ return self.__class__.__name__+"{"+(string.join(rep, ", "))+"}"
+
+ def copy(self):
+ return copy.copy(self)
+
+ def __cmp__(self, dict):
+ # note: return value is *not* boolean
+ for k, v in self.items():
+ if not (dict.has_key(k) and dict[k] == v):
+ return 1 # different
+ return 0 # the same
+
+ def __len__(self):
+ return len(self.keys())
+
+ def values(self):
+ r = []
+ for k in self.keys():
+ r.append(self[k])
+ return r
+
+ def items(self):
+ keys = self.keys()
+ vals = self.values()
+ r = []
+ for i in len(self):
+ r.append((keys[i], vals[i]))
+ return r
+
+ def has_key(self, key):
+ return key in self.keys()
+
+ def update(self, map):
+ if issequence(map) and not isstringlike(map):
+ items = map
+ else:
+ items = map.items()
+ for tup in items:
+ if not isinstance(tup, TupleType):
+ raise TypeError(
+ "MapBase.update requires a map or a sequence of pairs")
+ k, v = tup
+ self[k] = v
+
+ def get(self, key, failobj=None):
+ if key in self.keys():
+ return self[key]
+ else:
+ return failobj
+
+ def setdefault(self, key, failobj=None):
+ if not self.has_key(key):
+ self[key] = failobj
+ return self[key]
+
+
+class AList(MapBase):
+ """Read-only ordered mapping."""
+ def __init__(self, seq=[]):
+ self.readonly = True
+ self._inverted = False
+ self._data = list(seq[:])
+ self._keys = []
+ self._values = []
+ for key, value in seq:
+ self._keys.append(key)
+ self._values.append(value)
+
+ def set_inverted(self, inverted):
+ if (inverted and not self._inverted) or (
+ not inverted and self._inverted):
+ self._keys, self._values = self._values, self._keys
+ if inverted: self._inverted = True
+ else: self._inverted = False
+
+ def __getitem__(self, key):
+ try:
+ i = self._keys.index(key)
+ except ValueError:
+ raise KeyError(key)
+ return self._values[i]
+
+ def __delitem__(self, key):
+ try:
+ i = self._keys.index[key]
+ except ValueError:
+ raise KeyError(key)
+ del self._values[i]
+
+ def keys(self): return list(self._keys[:])
+ def values(self): return list(self._values[:])
+ def items(self):
+ data = self._data[:]
+ if not self._inverted:
+ return data
+ else:
+ newdata = []
+ for k, v in data:
+ newdata.append((v, k))
+ return newdata
+
+
+# This cut-n-pasted MimeWriter from standard library is here so can add
+# to HTTP headers rather than message body when appropriate. It also uses
+# \r\n in place of \n. This is nasty.
+class MimeWriter:
+
+ """Generic MIME writer.
+
+ Methods:
+
+ __init__()
+ addheader()
+ flushheaders()
+ startbody()
+ startmultipartbody()
+ nextpart()
+ lastpart()
+
+ A MIME writer is much more primitive than a MIME parser. It
+ doesn't seek around on the output file, and it doesn't use large
+ amounts of buffer space, so you have to write the parts in the
+ order they should occur on the output file. It does buffer the
+ headers you add, allowing you to rearrange their order.
+
+ General usage is:
+
+ f = <open the output file>
+ w = MimeWriter(f)
+ ...call w.addheader(key, value) 0 or more times...
+
+ followed by either:
+
+ f = w.startbody(content_type)
+ ...call f.write(data) for body data...
+
+ or:
+
+ w.startmultipartbody(subtype)
+ for each part:
+ subwriter = w.nextpart()
+ ...use the subwriter's methods to create the subpart...
+ w.lastpart()
+
+ The subwriter is another MimeWriter instance, and should be
+ treated in the same way as the toplevel MimeWriter. This way,
+ writing recursive body parts is easy.
+
+ Warning: don't forget to call lastpart()!
+
+ XXX There should be more state so calls made in the wrong order
+ are detected.
+
+ Some special cases:
+
+ - startbody() just returns the file passed to the constructor;
+ but don't use this knowledge, as it may be changed.
+
+ - startmultipartbody() actually returns a file as well;
+ this can be used to write the initial 'if you can read this your
+ mailer is not MIME-aware' message.
+
+ - If you call flushheaders(), the headers accumulated so far are
+ written out (and forgotten); this is useful if you don't need a
+ body part at all, e.g. for a subpart of type message/rfc822
+ that's (mis)used to store some header-like information.
+
+ - Passing a keyword argument 'prefix=<flag>' to addheader(),
+ start*body() affects where the header is inserted; 0 means
+ append at the end, 1 means insert at the start; default is
+ append for addheader(), but insert for start*body(), which use
+ it to determine where the Content-type header goes.
+
+ """
+
+ def __init__(self, fp, http_hdrs=None):
+ self._http_hdrs = http_hdrs
+ self._fp = fp
+ self._headers = []
+ self._boundary = []
+ self._first_part = True
+
+ def addheader(self, key, value, prefix=0,
+ add_to_http_hdrs=0):
+ """
+ prefix is ignored if add_to_http_hdrs is true.
+ """
+ lines = string.split(value, "\r\n")
+ while lines and not lines[-1]: del lines[-1]
+ while lines and not lines[0]: del lines[0]
+ if add_to_http_hdrs:
+ value = string.join(lines, "")
+ self._http_hdrs.append((key, value))
+ else:
+ for i in range(1, len(lines)):
+ lines[i] = " " + string.strip(lines[i])
+ value = string.join(lines, "\r\n") + "\r\n"
+ line = key + ": " + value
+ if prefix:
+ self._headers.insert(0, line)
+ else:
+ self._headers.append(line)
+
+ def flushheaders(self):
+ self._fp.writelines(self._headers)
+ self._headers = []
+
+ def startbody(self, ctype=None, plist=[], prefix=1,
+ add_to_http_hdrs=0, content_type=1):
+ """
+ prefix is ignored if add_to_http_hdrs is true.
+ """
+ if content_type and ctype:
+ for name, value in plist:
+ ctype = ctype + ';\r\n %s=\"%s\"' % (name, value)
+ self.addheader("Content-type", ctype, prefix=prefix,
+ add_to_http_hdrs=add_to_http_hdrs)
+ self.flushheaders()
+ if not add_to_http_hdrs: self._fp.write("\r\n")
+ self._first_part = True
+ return self._fp
+
+ def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
+ add_to_http_hdrs=0, content_type=1):
+ boundary = boundary or mimetools.choose_boundary()
+ self._boundary.append(boundary)
+ return self.startbody("multipart/" + subtype,
+ [("boundary", boundary)] + plist,
+ prefix=prefix,
+ add_to_http_hdrs=add_to_http_hdrs,
+ content_type=content_type)
+
+ def nextpart(self):
+ boundary = self._boundary[-1]
+ if self._first_part:
+ self._first_part = False
+ else:
+ self._fp.write("\r\n")
+ self._fp.write("--" + boundary + "\r\n")
+ return self.__class__(self._fp)
+
+ def lastpart(self):
+ if self._first_part:
+ self.nextpart()
+ boundary = self._boundary.pop()
+ self._fp.write("\r\n--" + boundary + "--\r\n")
+
+
+class ControlNotFoundError(ValueError): pass
+class ItemNotFoundError(ValueError): pass
+class ItemCountError(ValueError): pass
+
+class ParseError(Exception): pass
+
+
+def ParseResponse(response, select_default=False, ignore_errors=False):
+ """Parse HTTP response and return a list of HTMLForm instances.
+
+ The return value of urllib2.urlopen can be conveniently passed to this
+ function as the response parameter.
+
+ ClientForm.ParseError is raised on parse errors.
+
+ response: file-like object (supporting read() method) with a method
+ geturl(), returning the base URI of the HTTP response
+ select_default: for multiple-selection SELECT controls and RADIO controls,
+ pick the first item as the default if none are selected in the HTML
+ ignore_errors: don't raise ParseError, and carry on regardless if the
+ parser gets confused
+
+ Pass a true value for select_default if you want the behaviour specified by
+ RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
+ RADIO or multiple-selection SELECT control if none were selected in the
+ HTML. Most browsers (including Microsoft Internet Explorer (IE) and
+ Netscape Navigator) instead leave all items unselected in these cases. The
+ W3C HTML 4.0 standard leaves this behaviour undefined in the case of
+ multiple-selection SELECT controls, but insists that at least one RADIO
+ button should be checked at all times, in contradiction to browser
+ behaviour.
+
+ Precisely what ignore_errors does isn't well-defined yet, so don't rely too
+ much on the current behaviour -- if you want robustness, you're better off
+ fixing the HTML before passing it to this function.
+
+ """
+ return ParseFile(response, response.geturl(), select_default)
+
+def ParseFile(file, base_uri, select_default=False, ignore_errors=False):
+ """Parse HTML and return a list of HTMLForm instances.
+
+ ClientForm.ParseError is raised on parse errors.
+
+ file: file-like object (supporting read() method) containing HTML with zero
+ or more forms to be parsed
+ base_uri: the base URI of the document
+
+ For the other arguments and further details, see ParseResponse.__doc__.
+
+ """
+ fp = _FORM_PARSER_CLASS(ignore_errors)
+ while 1:
+ data = file.read(CHUNK)
+ fp.feed(data)
+ if len(data) != CHUNK: break
+ forms = []
+ for (name, action, method, enctype), attrs, controls in fp.forms:
+ if action is None:
+ action = base_uri
+ else:
+ action = urljoin(base_uri, action)
+ form = HTMLForm(action, method, enctype, name, attrs)
+ for type, name, attr in controls:
+ form.new_control(type, name, attr, select_default=select_default)
+ forms.append(form)
+ for form in forms:
+ form.fixup()
+ return forms
+
+
+class _AbstractFormParser:
+ """forms attribute contains HTMLForm instances on completion."""
+ # pinched (and modified) from Moshe Zadka
+ def __init__(self, ignore_errors, entitydefs=None):
+ if entitydefs is not None:
+ self.entitydefs = entitydefs
+ self._ignore_errors = ignore_errors
+ self.forms = []
+ self._current_form = None
+ self._select = None
+ self._optgroup = None
+ self._option = None
+ self._textarea = None
+
+ def error(self, error):
+ if not self._ignore_errors: raise error
+
+ def start_form(self, attrs):
+ if self._current_form is not None:
+ self.error(ParseError("nested FORMs"))
+ name = None
+ action = None
+ enctype = "application/x-www-form-urlencoded"
+ method = "GET"
+ d = {}
+ for key, value in attrs:
+ if key == "name":
+ name = value
+ elif key == "action":
+ action = value
+ elif key == "method":
+ method = string.upper(value)
+ elif key == "enctype":
+ enctype = string.lower(value)
+ else:
+ d[key] = value
+ controls = []
+ self._current_form = (name, action, method, enctype), d, controls
+
+ def end_form(self):
+ if self._current_form is None:
+ self.error(ParseError("end of FORM before start"))
+ self.forms.append(self._current_form)
+ self._current_form = None
+
+ def start_select(self, attrs):
+ if self._current_form is None:
+ self.error(ParseError("start of SELECT before start of FORM"))
+ if self._select is not None:
+ self.error(ParseError("nested SELECTs"))
+ if self._textarea is not None:
+ self.error(ParseError("SELECT inside TEXTAREA"))
+ d = {}
+ for key, val in attrs:
+ d[key] = val
+
+ self._select = d
+
+ self._append_select_control({"__select": d})
+
+ def end_select(self):
+ if self._current_form is None:
+ self.error(ParseError("end of SELECT before start of FORM"))
+ if self._select is None:
+ self.error(ParseError("end of SELECT before start"))
+
+ if self._option is not None:
+ self._end_option()
+
+ self._select = None
+
+ def start_optgroup(self, attrs):
+ if self._select is None:
+ self.error(ParseError("OPTGROUP outside of SELECT"))
+ d = {}
+ for key, val in attrs:
+ d[key] = val
+
+ self._optgroup = d
+
+ def end_optgroup(self):
+ if self._optgroup is None:
+ self.error(ParseError("end of OPTGROUP before start"))
+ self._optgroup = None
+
+ def _start_option(self, attrs):
+ if self._select is None:
+ self.error(ParseError("OPTION outside of SELECT"))
+ if self._option is not None:
+ self._end_option()
+
+ d = {}
+ for key, val in attrs:
+ d[key] = val
+
+ self._option = {}
+ self._option.update(d)
+ if (self._optgroup and self._optgroup.has_key("disabled") and
+ not self._option.has_key("disabled")):
+ self._option["disabled"] = None
+
+ def _end_option(self):
+ if self._option is None:
+ self.error(ParseError("end of OPTION before start"))
+
+ contents = string.strip(self._option.get("contents", ""))
+ #contents = string.strip(self._option["contents"])
+ self._option["contents"] = contents
+ if not self._option.has_key("value"):
+ self._option["value"] = contents
+ if not self._option.has_key("label"):
+ self._option["label"] = contents
+ # stuff dict of SELECT HTML attrs into a special private key
+ # (gets deleted again later)
+ self._option["__select"] = self._select
+ self._append_select_control(self._option)
+ self._option = None
+
+ def _append_select_control(self, attrs):
+ controls = self._current_form[2]
+ name = self._select.get("name")
+ controls.append(("select", name, attrs))
+
+## def do_option(self, attrs):
+## if self._select is None:
+## self.error(ParseError("OPTION outside of SELECT"))
+## d = {}
+## for key, val in attrs:
+## d[key] = val
+
+## self._option = {}
+## self._option.update(d)
+## if (self._optgroup and self._optgroup.has_key("disabled") and
+## not self._option.has_key("disabled")):
+## self._option["disabled"] = None
+
+ def start_textarea(self, attrs):
+ if self._current_form is None:
+ self.error(ParseError("start of TEXTAREA before start of FORM"))
+ if self._textarea is not None:
+ self.error(ParseError("nested TEXTAREAs"))
+ if self._select is not None:
+ self.error(ParseError("TEXTAREA inside SELECT"))
+ d = {}
+ for key, val in attrs:
+ d[key] = val
+
+ self._textarea = d
+
+ def end_textarea(self):
+ if self._current_form is None:
+ self.error(ParseError("end of TEXTAREA before start of FORM"))
+ if self._textarea is None:
+ self.error(ParseError("end of TEXTAREA before start"))
+ controls = self._current_form[2]
+ name = self._textarea.get("name")
+ controls.append(("textarea", name, self._textarea))
+ self._textarea = None
+
+ def handle_data(self, data):
+ if self._option is not None:
+ # self._option is a dictionary of the OPTION element's HTML
+ # attributes, but it has two special keys, one of which is the
+ # special "contents" key contains text between OPTION tags (the
+ # other is the "__select" key: see the end_option method)
+ map = self._option
+ key = "contents"
+ elif self._textarea is not None:
+ map = self._textarea
+ key = "value"
+ else:
+ return
+
+ if not map.has_key(key):
+ map[key] = data
+ else:
+ map[key] = map[key] + data
+
+## def handle_data(self, data):
+## if self._option is not None:
+## contents = string.strip(data)
+## controls = self._current_form[2]
+## if not self._option.has_key("value"):
+## self._option["value"] = contents
+## if not self._option.has_key("label"):
+## self._option["label"] = contents
+## # self._option is a dictionary of the OPTION element's HTML
+## # attributes, but it has two special keys:
+## # 1. special "contents" key contains text between OPTION tags
+## self._option["contents"] = contents
+## # 2. stuff dict of SELECT HTML attrs into a special private key
+## # (gets deleted again later)
+## self._option["__select"] = self._select
+## self._append_select_control(self._option)
+## self._option = None
+## elif self._textarea is not None:
+## #self._textarea["value"] = data
+## if self._textarea.get("value") is None:
+## self._textarea["value"] = data
+## else:
+## self._textarea["value"] = self._textarea["value"] + data
+
+ def do_button(self, attrs):
+ if self._current_form is None:
+ self.error(ParseError("start of BUTTON before start of FORM"))
+ d = {}
+ d["type"] = "submit" # default
+ for key, val in attrs:
+ d[key] = val
+ controls = self._current_form[2]
+
+ type = d["type"]
+ name = d.get("name")
+ # we don't want to lose information, so use a type string that
+ # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
+ # eg. type for BUTTON/RESET is "resetbutton"
+ # (type for INPUT/RESET is "reset")
+ type = type+"button"
+ controls.append((type, name, d))
+
+ def do_input(self, attrs):
+ if self._current_form is None:
+ self.error(ParseError("start of INPUT before start of FORM"))
+ d = {}
+ d["type"] = "text" # default
+ for key, val in attrs:
+ d[key] = val
+ controls = self._current_form[2]
+
+ type = d["type"]
+ name = d.get("name")
+ controls.append((type, name, d))
+
+ def do_isindex(self, attrs):
+ if self._current_form is None:
+ self.error(ParseError("start of ISINDEX before start of FORM"))
+ d = {}
+ for key, val in attrs:
+ d[key] = val
+ controls = self._current_form[2]
+
+ # isindex doesn't have type or name HTML attributes
+ controls.append(("isindex", None, d))
+
+# use HTMLParser if we have it (it does XHTML), htmllib otherwise
+try:
+ import HTMLParser
+except ImportError:
+ import htmllib, formatter
+ class _FormParser(_AbstractFormParser, htmllib.HTMLParser):
+ # This is still here for compatibility with Python 1.5.2.
+ # It doesn't do the right thing with XHTML.
+ def __init__(self, ignore_errors, entitydefs=None):
+ htmllib.HTMLParser.__init__(self, formatter.NullFormatter())
+ _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
+
+ def do_option(self, attrs):
+ _AbstractFormParser._start_option(self, attrs)
+
+ _FORM_PARSER_CLASS = _FormParser
+else:
+ class _XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
+ # thanks to Michael Howitz for this!
+ def __init__(self, ignore_errors, entitydefs=None):
+ HTMLParser.HTMLParser.__init__(self)
+ _AbstractFormParser.__init__(self, ignore_errors, entitydefs)
+
+ def start_option(self, attrs):
+ _AbstractFormParser._start_option(self, attrs)
+
+ def end_option(self):
+ _AbstractFormParser._end_option(self)
+
+ def handle_starttag(self, tag, attrs):
+ try:
+ method = getattr(self, 'start_' + tag)
+ except AttributeError:
+ try:
+ method = getattr(self, 'do_' + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method(attrs)
+ else:
+ method(attrs)
+
+ def handle_endtag(self, tag):
+ try:
+ method = getattr(self, 'end_' + tag)
+ except AttributeError:
+ pass # unknown tag
+ else:
+ method()
+
+ # handle_charref, handle_entityref and default entitydefs are taken
+ # from sgmllib
+ def handle_charref(self, name):
+ try:
+ n = int(name)
+ except ValueError:
+ self.unknown_charref(name)
+ return
+ if not 0 <= n <= 255:
+ self.unknown_charref(name)
+ return
+ self.handle_data(chr(n))
+
+ # Definition of entities -- derived classes may override
+ entitydefs = \
+ {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
+
+ def handle_entityref(self, name):
+ table = self.entitydefs
+ if name in table:
+ self.handle_data(table[name])
+ else:
+ self.unknown_entityref(name)
+ return
+
+ # These methods would have passed through the ref intact if I'd thought
+ # of it earlier, but since the old parser silently swallows unknown
+ # refs, so does this new parser.
+ def unknown_entityref(self, ref): pass
+ def unknown_charref(self, ref): pass
+
+ _FORM_PARSER_CLASS = _XHTMLCompatibleFormParser
+
+
+class Control:
+ """An HTML form control.
+
+ An HTMLForm contains a sequence of Controls. HTMLForm delegates lots of
+ things to Control objects, and most of Control's methods are, in effect,
+ documented by the HTMLForm docstrings.
+
+ The Controls in an HTMLForm can be got at via the HTMLForm.find_control
+ method or the HTMLForm.controls attribute.
+
+ Control instances are usually constructed using the ParseFile /
+ ParseResponse functions, so you can probably ignore the rest of this
+ paragraph. A Control is only properly initialised after the fixup method
+ has been called. In fact, this is only strictly necessary for ListControl
+ instances. This is necessary because ListControls are built up from
+ ListControls each containing only a single item, and their initial value(s)
+ can only be known after the sequence is complete.
+
+ The types and values that are acceptable for assignment to the value
+ attribute are defined by subclasses.
+
+ If the disabled attribute is true, this represents the state typically
+ represented by browsers by `greying out' a control. If the disabled
+ attribute is true, the Control will raise AttributeError if an attempt is
+ made to change its value. In addition, the control will not be considered
+ `successful' as defined by the W3C HTML 4 standard -- ie. it will
+ contribute no data to the return value of the HTMLForm.click* methods. To
+ enable a control, set the disabled attribute to a false value.
+
+ If the readonly attribute is true, the Control will raise AttributeError if
+ an attempt is made to change its value. To make a control writable, set
+ the readonly attribute to a false value.
+
+ All controls have the disabled and readonly attributes, not only those that
+ may have the HTML attributes of the same names.
+
+ On assignment to the value attribute, the following exceptions are raised:
+ TypeError, AttributeError (if the value attribute should not be assigned
+ to, because the control is disabled, for example) and ValueError.
+
+ If the name or value attributes are None, or the value is an empty list, or
+ if the control is disabled, the control is not successful.
+
+ Public attributes:
+
+ type: string describing type of control (see the keys of the
+ HTMLForm.type2class dictionary for the allowable values) (readonly)
+ name: name of control (readonly)
+ value: current value of control (subclasses may allow a single value, a
+ sequence of values, or either)
+ disabled: disabled state
+ readonly: readonly state
+ id: value of id HTML attribute
+
+ """
+ def __init__(self, type, name, attrs):
+ """
+ type: string describing type of control (see the keys of the
+ HTMLForm.type2class dictionary for the allowable values)
+ name: control name
+ attrs: HTML attributes of control's HTML element
+
+ """
+ raise NotImplementedError()
+
+ def add_to_form(self, form):
+ form.controls.append(self)
+
+ def fixup(self):
+ pass
+
+ def __getattr__(self, name): raise NotImplementedError()
+ def __setattr__(self, name, value): raise NotImplementedError()
+
+ def pairs(self):
+ """Return list of (key, value) pairs suitable for passing to urlencode.
+ """
+ raise NotImplementedError()
+
+ def _write_mime_data(self, mw):
+ """Write data for this control to a MimeWriter."""
+ # called by HTMLForm
+ for name, value in self.pairs():
+ mw2 = mw.nextpart()
+ mw2.addheader("Content-disposition",
+ 'form-data; name="%s"' % name, 1)
+ f = mw2.startbody(prefix=0)
+ f.write(value)
+
+ def __str__(self):
+ raise NotImplementedError()
+
+
+#---------------------------------------------------
+class ScalarControl(Control):
+ """Control whose value is not restricted to one of a prescribed set.
+
+ Some ScalarControls don't accept any value attribute. Otherwise, takes a
+ single value, which must be string-like.
+
+ Additional read-only public attribute:
+
+ attrs: dictionary mapping the names of original HTML attributes of the
+ control to their values
+
+ """
+ def __init__(self, type, name, attrs):
+ self.__dict__["type"] = string.lower(type)
+ self.__dict__["name"] = name
+ self._value = attrs.get("value")
+ self.disabled = attrs.has_key("disabled")
+ self.readonly = attrs.has_key("readonly")
+ self.id = attrs.get("id")
+
+ self.attrs = attrs.copy()
+
+ self._clicked = False
+
+ def __getattr__(self, name):
+ if name == "value":
+ return self.__dict__["_value"]
+ else:
+ raise AttributeError("%s instance has no attribute '%s'" %
+ (self.__class__.__name__, name))
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ if not isstringlike(value):
+ raise TypeError("must assign a string")
+ elif self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ elif self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ self.__dict__["_value"] = value
+ elif name in ("name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def pairs(self):
+ name = self.name
+ value = self.value
+ if name is None or value is None or self.disabled:
+ return []
+ return [(name, value)]
+
+ def __str__(self):
+ name = self.name
+ value = self.value
+ if name is None: name = "<None>"
+ if value is None: value = "<None>"
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = string.join(infos, ", ")
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
+
+
+#---------------------------------------------------
+class TextControl(ScalarControl):
+ """Textual input control.
+
+ Covers:
+
+ INPUT/TEXT
+ INPUT/PASSWORD
+ INPUT/FILE
+ INPUT/HIDDEN
+ TEXTAREA
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ if self.type == "hidden": self.readonly = True
+ if self._value is None:
+ self._value = ""
+
+
+#---------------------------------------------------
+class FileControl(ScalarControl):
+ """File upload with INPUT TYPE=FILE.
+
+ The value attribute of a FileControl is always None.
+
+ Additional public method: add_file
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ self._value = None
+ self._upload_data = []
+
+ def __setattr__(self, name, value):
+ if name in ("value", "name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def add_file(self, file_object, content_type=None, filename=None):
+ if not hasattr(file_object, "read"):
+ raise TypeError("file-like object must have read method")
+ if content_type is not None and not isstringlike(content_type):
+ raise TypeError("content type must be None or string-like")
+ if filename is not None and not isstringlike(filename):
+ raise TypeError("filename must be None or string-like")
+ if content_type is None:
+ content_type = "application/octet-stream"
+ self._upload_data.append((file_object, content_type, filename))
+
+ def pairs(self):
+ # XXX should it be successful even if unnamed?
+ if self.name is None or self.disabled:
+ return []
+ return [(self.name, "")]
+
+ def _write_mime_data(self, mw):
+ # called by HTMLForm
+ if len(self._upload_data) == 1:
+ # single file
+ file_object, content_type, filename = self._upload_data[0]
+ mw2 = mw.nextpart()
+ fn_part = filename and ('; filename="%s"' % filename) or ''
+ disp = 'form-data; name="%s"%s' % (self.name, fn_part)
+ mw2.addheader("Content-disposition", disp, prefix=1)
+ fh = mw2.startbody(content_type, prefix=0)
+ fh.write(file_object.read())
+ elif len(self._upload_data) != 0:
+ # multiple files
+ mw2 = mw.nextpart()
+ disp = 'form-data; name="%s"' % self.name
+ mw2.addheader("Content-disposition", disp, prefix=1)
+ fh = mw2.startmultipartbody("mixed", prefix=0)
+ for file_object, content_type, filename in self._upload_data:
+ mw3 = mw2.nextpart()
+ fn_part = filename and ('; filename="%s"' % filename) or ''
+ disp = 'file%s' % fn_part
+ mw3.addheader("Content-disposition", disp, prefix=1)
+ fh2 = mw3.startbody(content_type, prefix=0)
+ fh2.write(file_object.read())
+ mw2.lastpart()
+
+ def __str__(self):
+ name = self.name
+ if name is None: name = "<None>"
+
+ if not self._upload_data:
+ value = "<No files added>"
+ else:
+ value = []
+ for file, ctype, filename in self._upload_data:
+ if filename is None:
+ value.append("<Unnamed file>")
+ else:
+ value.append(filename)
+ value = string.join(value, ", ")
+
+ info = []
+ if self.disabled: info.append("disabled")
+ if self.readonly: info.append("readonly")
+ info = string.join(info, ", ")
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
+
+
+#---------------------------------------------------
+class IsindexControl(ScalarControl):
+ """ISINDEX control.
+
+ ISINDEX is the odd-one-out of HTML form controls. In fact, it isn't really
+ part of regular HTML forms at all, and predates it. You're only allowed
+ one ISINDEX per HTML document. ISINDEX and regular form submission are
+ mutually exclusive -- either submit a form, or the ISINDEX.
+
+ Having said this, since ISINDEX controls may appear in forms (which is
+ probably bad HTML), ParseFile / ParseResponse will include them in the
+ HTMLForm instances it returns. You can set the ISINDEX's value, as with
+ any other control (but note that ISINDEX controls have no name, so you'll
+ need to use the type argument of set_value!). When you submit the form,
+ the ISINDEX will not be successful (ie., no data will get returned to the
+ server as a result of its presence), unless you click on the ISINDEX
+ control, in which case the ISINDEX gets submitted instead of the form:
+
+ form.set_value("my isindex value", type="isindex")
+ urllib2.urlopen(form.click(type="isindex"))
+
+ ISINDEX elements outside of FORMs are ignored. If you want to submit one
+ by hand, do it like so:
+
+ url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
+ result = urllib2.urlopen(url)
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ if self._value is None:
+ self._value = ""
+
+ def pairs(self):
+ return []
+
+ def _click(self, form, coord, return_type):
+ # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
+ # want "bar+baz".
+ # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
+ # deprecated in 4.01, but it should still say how to submit it).
+ # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
+ url = urljoin(form.action, "?"+urllib.quote_plus(self.value))
+ req_data = url, None, []
+
+ if return_type == "pairs":
+ return []
+ elif return_type == "request_data":
+ return req_data
+ else:
+ return urllib2.Request(url)
+
+ def __str__(self):
+ value = self.value
+ if value is None: value = "<None>"
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = string.join(infos, ", ")
+ if info: info = " (%s)" % info
+
+ return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
+
+
+#---------------------------------------------------
+class IgnoreControl(ScalarControl):
+ """Control that we're not interested in.
+
+ Covers:
+
+ INPUT/RESET
+ BUTTON/RESET
+ INPUT/BUTTON
+ BUTTON/BUTTON
+
+ These controls are always unsuccessful, in the terminology of HTML 4 (ie.
+ they never require any information to be returned to the server).
+
+ BUTTON/BUTTON is used to generate events for script embedded in HTML.
+
+ The value attribute of IgnoreControl is always None.
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ self._value = None
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ raise AttributeError(
+ "control '%s' is ignored, hence read-only" % self.name)
+ elif name in ("name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+
+#---------------------------------------------------
+class ListControl(Control):
+ """Control representing a sequence of items.
+
+ The value attribute of a ListControl represents the selected list items in
+ the control.
+
+ ListControl implements both list controls that take a single value and
+ those that take multiple values.
+
+ ListControls accept sequence values only. Some controls only accept
+ sequences of length 0 or 1 (RADIO, and single-selection SELECT).
+ In those cases, ItemCountError is raised if len(sequence) > 1. CHECKBOXes
+ and multiple-selection SELECTs (those having the "multiple" HTML attribute)
+ accept sequences of any length.
+
+ Note the following mistake:
+
+ control.value = some_value
+ assert control.value == some_value # not necessarily true
+
+ The reason for this is that the value attribute always gives the list items
+ in the order they were listed in the HTML.
+
+ ListControl items can also be referred to by their labels instead of names.
+ Use the by_label argument, and the set_value_by_label, get_value_by_label
+ methods.
+
+ XXX RadioControl and CheckboxControl don't implement by_label yet.
+
+ Note that, rather confusingly, though SELECT controls are represented in
+ HTML by SELECT elements (which contain OPTION elements, representing
+ individual list items), CHECKBOXes and RADIOs are not represented by *any*
+ element. Instead, those controls are represented by a collection of INPUT
+ elements. For example, this is a SELECT control, named "control1":
+
+ <select name="control1">
+ <option>foo</option>
+ <option value="1">bar</option>
+ </select>
+
+ and this is a CHECKBOX control, named "control2":
+
+ <input type="checkbox" name="control2" value="foo" id="cbe1">
+ <input type="checkbox" name="control2" value="bar" id="cbe2">
+
+ The id attribute of a CHECKBOX or RADIO ListControl is always that of its
+ first element (for example, "cbe1" above).
+
+
+ Additional read-only public attribute: multiple.
+
+
+ ListControls are built up by the parser from their component items by
+ creating one ListControl per item, consolidating them into a single master
+ ListControl held by the HTMLForm:
+
+ -User calls form.new_control(...)
+ -Form creates Control, and calls control.add_to_form(self).
+ -Control looks for a Control with the same name and type in the form, and
+ if it finds one, merges itself with that control by calling
+ control.merge_control(self). The first Control added to the form, of a
+ particular name and type, is the only one that survives in the form.
+ -Form calls control.fixup for all its controls. ListControls in the form
+ know they can now safely pick their default values.
+
+ To create a ListControl without an HTMLForm, use:
+
+ control.merge_control(new_control)
+
+ """
+ def __init__(self, type, name, attrs={}, select_default=False,
+ called_as_base_class=False):
+ """
+ select_default: for RADIO and multiple-selection SELECT controls, pick
+ the first item as the default if no 'selected' HTML attribute is
+ present
+
+ """
+ if not called_as_base_class:
+ raise NotImplementedError()
+
+ self.__dict__["type"] = string.lower(type)
+ self.__dict__["name"] = name
+ self._value = attrs.get("value")
+ self.disabled = False
+ self.readonly = False
+ self.id = attrs.get("id")
+
+ self._attrs = attrs.copy()
+ # As Controls are merged in with .merge_control(), self._attrs will
+ # refer to each Control in turn -- always the most recently merged
+ # control. Each merged-in Control instance corresponds to a single
+ # list item: see ListControl.__doc__.
+ if attrs:
+ self._attrs_list = [self._attrs] # extended by .merge_control()
+ self._disabled_list = [self._attrs.has_key("disabled")] # ditto
+ else:
+ self._attrs_list = [] # extended by .merge_control()
+ self._disabled_list = [] # ditto
+
+ self._select_default = select_default
+ self._clicked = False
+ # Some list controls can have their default set only after all items
+ # are known. If so, self._value_is_set is false, and the self.fixup
+ # method, called after all items have been added, sets the default.
+ self._value_is_set = False
+
+ def _value_from_label(self, label):
+ raise NotImplementedError("control '%s' does not yet support "
+ "by_label" % self.name)
+
+ def toggle(self, name, by_label=False):
+ return self._set_selected_state(name, 2, by_label)
+ def set(self, selected, name, by_label=False):
+ action = int(bool(selected))
+ return self._set_selected_state(name, action, by_label)
+
+ def _set_selected_state(self, name, action, by_label):
+ """
+ name: item name
+ action:
+ 0: clear
+ 1: set
+ 2: toggle
+
+ """
+ if not isstringlike(name):
+ raise TypeError("item name must be string-like")
+ if self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ if by_label:
+ name = self._value_from_label(name)
+ try:
+ i = self._menu.index(name)
+ except ValueError:
+ raise ItemNotFoundError("no item named '%s'" % name)
+
+ if self.multiple:
+ if action == 2:
+ action = not self._selected[i]
+ if action and self._disabled_list[i]:
+ raise AttributeError("item '%s' is disabled" % name)
+ self._selected[i] = bool(action)
+ else:
+ if action == 2:
+ if self._selected == name:
+ action = 0
+ else:
+ action = 1
+ if action == 0 and self._selected == name:
+ self._selected = None
+ elif action == 1:
+ if self._disabled_list[i]:
+ raise AttributeError("item '%s' is disabled" % name)
+ self._selected = name
+
+ def toggle_single(self, by_label=False):
+ self._set_single_selected_state(2, by_label)
+ def set_single(self, selected, by_label=False):
+ action = int(bool(selected))
+ self._set_single_selected_state(action, by_label)
+
+ def _set_single_selected_state(self, action, by_label):
+ if len(self._menu) != 1:
+ raise ItemCountError("'%s' is not a single-item control" %
+ self.name)
+
+ name = self._menu[0]
+ if by_label:
+ name = self._value_from_label(name)
+ self._set_selected_state(name, action, by_label)
+
+ def get_item_disabled(self, name, by_label=False):
+ """Get disabled state of named list item in a ListControl."""
+ if by_label:
+ name = self._value_from_label(name)
+ try:
+ i = self._menu.index(name)
+ except ValueError:
+ raise ItemNotFoundError()
+ else:
+ return self._disabled_list[i]
+
+ def set_item_disabled(self, disabled, name, by_label=False):
+ """Set disabled state of named list item in a ListControl.
+
+ disabled: boolean disabled state
+
+ """
+ if by_label:
+ name = self._value_from_label(name)
+ try:
+ i = self._menu.index(name)
+ except ValueError:
+ raise ItemNotFoundError()
+ else:
+ self._disabled_list[i] = bool(disabled)
+
+ def set_all_items_disabled(self, disabled):
+ """Set disabled state of all list items in a ListControl.
+
+ disabled: boolean disabled state
+
+ """
+ for i in range(len(self._disabled_list)):
+ self._disabled_list[i] = bool(disabled)
+
+ def get_item_attrs(self, name, by_label=False):
+ """Return dictionary of HTML attributes for a single ListControl item.
+
+ The HTML element types that describe list items are: OPTION for SELECT
+ controls, INPUT for the rest. These elements have HTML attributes that
+ you may occasionally want to know about -- for example, the "alt" HTML
+ attribute gives a text string describing the item (graphical browsers
+ usually display this as a tooltip).
+
+ The returned dictionary maps HTML attribute names to values. The names
+ and values are taken from the original HTML.
+
+ Note that for SELECT controls, the returned dictionary contains a
+ special key "contents" -- see SelectControl.__doc__.
+
+ """
+ if by_label:
+ name = self._value_from_label(name)
+ try:
+ i = self._menu.index(name)
+ except ValueError:
+ raise ItemNotFoundError()
+ return self._attrs_list[i]
+
+ def add_to_form(self, form):
+ try:
+ control = form.find_control(self.name, self.type)
+ except ControlNotFoundError:
+ Control.add_to_form(self, form)
+ else:
+ control.merge_control(self)
+
+ def merge_control(self, control):
+ assert bool(control.multiple) == bool(self.multiple)
+ assert isinstance(control, self.__class__)
+ self._menu.extend(control._menu)
+ self._attrs_list.extend(control._attrs_list)
+ self._disabled_list.extend(control._disabled_list)
+ if control.multiple:
+ self._selected.extend(control._selected)
+ else:
+ if control._value_is_set:
+ self._selected = control._selected
+ if control._value_is_set:
+ self._value_is_set = True
+
+ def fixup(self):
+ """
+ ListControls are built up from component list items (which are also
+ ListControls) during parsing. This method should be called after all
+ items have been added. See ListControl.__doc__ for the reason this is
+ required.
+
+ """
+ # Need to set default selection where no item was indicated as being
+ # selected by the HTML:
+
+ # CHECKBOX:
+ # Nothing should be selected.
+ # SELECT/single, SELECT/multiple and RADIO:
+ # RFC 1866 (HTML 2.0): says first item should be selected.
+ # W3C HTML 4.01 Specification: says that client behaviour is
+ # undefined in this case. For RADIO, exactly one must be selected,
+ # though which one is undefined.
+ # Both Netscape and Microsoft Internet Explorer (IE) choose first
+ # item for SELECT/single. However, both IE5 and Mozilla (both 1.0
+ # and Firebird 0.6) leave all items unselected for RADIO and
+ # SELECT/multiple.
+
+ # Since both Netscape and IE all choose the first item for
+ # SELECT/single, we do the same. OTOH, both Netscape and IE
+ # leave SELECT/multiple with nothing selected, in violation of RFC 1866
+ # (but not in violation of the W3C HTML 4 standard); the same is true
+ # of RADIO (which *is* in violation of the HTML 4 standard). We follow
+ # RFC 1866 if the select_default attribute is set, and Netscape and IE
+ # otherwise. RFC 1866 and HTML 4 are always violated insofar as you
+ # can deselect all items in a RadioControl.
+
+ raise NotImplementedError()
+
+ def __getattr__(self, name):
+ if name == "value":
+ menu = self._menu
+ if self.multiple:
+ values = []
+ for i in range(len(menu)):
+ if self._selected[i]: values.append(menu[i])
+ return values
+ else:
+ if self._selected is None: return []
+ else: return [self._selected]
+ else:
+ raise AttributeError("%s instance has no attribute '%s'" %
+ (self.__class__.__name__, name))
+
+ def __setattr__(self, name, value):
+ if name == "value":
+ if self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+ self._set_value(value)
+ elif name in ("name", "type", "multiple"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def _set_value(self, value):
+ if self.multiple:
+ self._multiple_set_value(value)
+ else:
+ self._single_set_value(value)
+
+ def _single_set_value(self, value):
+ if value is None or isstringlike(value):
+ raise TypeError("ListControl, must set a sequence")
+ nr = len(value)
+ if not (0 <= nr <= 1):
+ raise ItemCountError("single selection list, must set sequence of "
+ "length 0 or 1")
+
+ if nr == 0:
+ self._selected = None
+ else:
+ value = value[0]
+ try:
+ i = self._menu.index(value)
+ except ValueError:
+ raise ItemNotFoundError("no item named '%s'" %
+ repr(value))
+ if self._disabled_list[i]:
+ raise AttributeError("item '%s' is disabled" % value)
+ self._selected = value
+
+ def _multiple_set_value(self, value):
+ if value is None or isstringlike(value):
+ raise TypeError("ListControl, must set a sequence")
+
+ selected = [False]*len(self._selected)
+ menu = self._menu
+ disabled_list = self._disabled_list
+
+ for v in value:
+ found = False
+ for i in range(len(menu)):
+ item_name = menu[i]
+ if v == item_name:
+ if disabled_list[i]:
+ raise AttributeError("item '%s' is disabled" % value)
+ selected[i] = True
+ found = True
+ break
+ if not found:
+ raise ItemNotFoundError("no item named '%s'" % repr(v))
+ self._selected = selected
+
+ def set_value_by_label(self, value):
+ raise NotImplementedError("control '%s' does not yet support "
+ "by_label" % self.name)
+ def get_value_by_label(self):
+ raise NotImplementedError("control '%s' does not yet support "
+ "by_label" % self.name)
+
+ def possible_items(self, by_label=False):
+ if by_label:
+ raise NotImplementedError(
+ "control '%s' does not yet support by_label" % self.name)
+ return copy.copy(self._menu)
+
+ def pairs(self):
+ if self.disabled:
+ return []
+
+ if not self.multiple:
+ name = self.name
+ value = self._selected
+ if name is None or value is None:
+ return []
+ return [(name, value)]
+ else:
+ control_name = self.name # usually the name HTML attribute
+ pairs = []
+ for i in range(len(self._menu)):
+ item_name = self._menu[i] # usually the value HTML attribute
+ if self._selected[i]:
+ pairs.append((control_name, item_name))
+ return pairs
+
+ def _item_str(self, i):
+ item_name = self._menu[i]
+ if self.multiple:
+ if self._selected[i]:
+ item_name = "*"+item_name
+ else:
+ if self._selected == item_name:
+ item_name = "*"+item_name
+ if self._disabled_list[i]:
+ item_name = "(%s)" % item_name
+ return item_name
+
+ def __str__(self):
+ name = self.name
+ if name is None: name = "<None>"
+
+ display = []
+ for i in range(len(self._menu)):
+ s = self._item_str(i)
+ display.append(s)
+
+ infos = []
+ if self.disabled: infos.append("disabled")
+ if self.readonly: infos.append("readonly")
+ info = string.join(infos, ", ")
+ if info: info = " (%s)" % info
+
+ return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
+ name, string.join(display, ", "), info)
+
+
+class RadioControl(ListControl):
+ """
+ Covers:
+
+ INPUT/RADIO
+
+ """
+ def __init__(self, type, name, attrs, select_default=False):
+ ListControl.__init__(self, type, name, attrs, select_default,
+ called_as_base_class=True)
+ self.__dict__["multiple"] = False
+ value = attrs.get("value", "on")
+ self._menu = [value]
+ checked = attrs.has_key("checked")
+ if checked:
+ self._value_is_set = True
+ self._selected = value
+ else:
+ self._selected = None
+
+ def fixup(self):
+ if not self._value_is_set:
+ # no item explicitly selected
+ assert self._selected is None
+ if self._select_default:
+ self._selected = self._menu[0]
+ self._value_is_set = True
+
+
+class CheckboxControl(ListControl):
+ """
+ Covers:
+
+ INPUT/CHECKBOX
+
+ """
+ def __init__(self, type, name, attrs, select_default=False):
+ ListControl.__init__(self, type, name, attrs, select_default,
+ called_as_base_class=True)
+ self.__dict__["multiple"] = True
+ value = attrs.get("value", "on")
+ self._menu = [value]
+ checked = attrs.has_key("checked")
+ self._selected = [checked]
+ self._value_is_set = True
+
+ def fixup(self):
+ # If no items were explicitly checked in HTML, that's how we must
+ # leave it, so we have nothing to do here.
+ assert self._value_is_set
+
+
+class SelectControl(ListControl):
+ """
+ Covers:
+
+ SELECT (and OPTION)
+
+ SELECT control values and labels are subject to some messy defaulting
+ rules. For example, if the HTML repreentation of the control is:
+
+ <SELECT name=year>
+ <OPTION value=0 label="2002">current year</OPTION>
+ <OPTION value=1>2001</OPTION>
+ <OPTION>2000</OPTION>
+ </SELECT>
+
+ The items, in order, have labels "2002", "2001" and "2000", whereas their
+ values are "0", "1" and "2000" respectively. Note that the value of the
+ last OPTION in this example defaults to its contents, as specified by RFC
+ 1866, as do the labels of the second and third OPTIONs.
+
+ The purpose of these methods is that the OPTION labels are sometimes much
+ more meaningful, than are the OPTION values, which can make for more
+ maintainable code.
+
+ Additional read-only public attribute: attrs
+
+ The attrs attribute is a dictionary of the original HTML attributes of the
+ SELECT element. Other ListControls do not have this attribute, because in
+ other cases the control as a whole does not correspond to any single HTML
+ element. The get_item_attrs method may be used as usual to get at the
+ HTML attributes of the HTML elements corresponding to individual list items
+ (for SELECT controls, these are OPTION elements).
+
+ Another special case is that the attributes dictionaries returned by
+ get_item_attrs have a special key "contents" which does not correspond to
+ any real HTML attribute, but rather contains the contents of the OPTION
+ element:
+
+ <OPTION>this bit</OPTION>
+
+ """
+ # HTML attributes here are treated slightly from other list controls:
+ # -The SELECT HTML attributes dictionary is stuffed into the OPTION
+ # HTML attributes dictionary under the "__select" key.
+ # -The content of each OPTION element is stored under the special
+ # "contents" key of the dictionary.
+ # After all this, the dictionary is passed to the SelectControl constructor
+ # as the attrs argument, as usual. However:
+ # -The first SelectControl constructed when building up a SELECT control
+ # has a constructor attrs argument containing only the __select key -- so
+ # this SelectControl represents an empty SELECT control.
+ # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
+ # the __select dictionary containing the SELECT HTML-attributes.
+ def __init__(self, type, name, attrs, select_default=False):
+ # fish out the SELECT HTML attributes from the OPTION HTML attributes
+ # dictionary
+ self.attrs = attrs["__select"].copy()
+ attrs = attrs.copy()
+ del attrs["__select"]
+
+ ListControl.__init__(self, type, name, attrs, select_default,
+ called_as_base_class=True)
+
+ self._label_map = None
+ self.disabled = self.attrs.has_key("disabled")
+ self.id = self.attrs.get("id")
+
+ self._menu = []
+ self._selected = []
+ self._value_is_set = False
+ if self.attrs.has_key("multiple"):
+ self.__dict__["multiple"] = True
+ self._selected = []
+ else:
+ self.__dict__["multiple"] = False
+ self._selected = None
+
+ if attrs: # OPTION item data was provided
+ value = attrs["value"]
+ self._menu.append(value)
+ selected = attrs.has_key("selected")
+ if selected:
+ self._value_is_set = True
+ if self.attrs.has_key("multiple"):
+ self._selected.append(selected)
+ elif selected:
+ self._selected = value
+
+ def _build_select_label_map(self):
+ """Return an ordered mapping of labels to values.
+
+ For example, if the HTML repreentation of the control is as given in
+ SelectControl.__doc__, this function will return a mapping like:
+
+ {"2002": "0", "2001": "1", "2000": "2000"}
+
+ """
+ alist = []
+ for val in self._menu:
+ attrs = self.get_item_attrs(val)
+ alist.append((attrs["label"], val))
+ return AList(alist)
+
+ def _value_from_label(self, label):
+ try:
+ return self._label_map[label]
+ except KeyError:
+ raise ItemNotFoundError("no item has label '%s'" % label)
+
+ def fixup(self):
+ if not self._value_is_set:
+ # No item explicitly selected.
+ if len(self._menu) > 0:
+ if self.multiple:
+ if self._select_default:
+ self._selected[0] = True
+ else:
+ assert self._selected is None
+ self._selected = self._menu[0]
+ self._value_is_set = True
+ self._label_map = self._build_select_label_map()
+
+ def possible_items(self, by_label=False):
+ if not by_label:
+ return copy.copy(self._menu)
+ else:
+ self._label_map.set_inverted(True)
+ try:
+ r = map(lambda v, self=self: self._label_map[v], self._menu)
+ finally:
+ self._label_map.set_inverted(False)
+ return r
+
+ def set_value_by_label(self, value):
+ if isstringlike(value):
+ raise TypeError("ListControl, must set a sequence, not a string")
+ if self.disabled:
+ raise AttributeError("control '%s' is disabled" % self.name)
+ if self.readonly:
+ raise AttributeError("control '%s' is readonly" % self.name)
+
+ try:
+ value = map(lambda v, self=self: self._label_map[v], value)
+ except KeyError, e:
+ raise ItemNotFoundError("no item has label '%s'" % e.args[0])
+ self._set_value(value)
+
+ def get_value_by_label(self):
+ menu = self._menu
+ self._label_map.set_inverted(True)
+ try:
+ if self.multiple:
+ values = []
+ for i in range(len(menu)):
+ if self._selected[i]:
+ values.append(self._label_map[menu[i]])
+ return values
+ else:
+ return [self._label_map[self._selected]]
+ finally:
+ self._label_map.set_inverted(False)
+
+
+#---------------------------------------------------
+class SubmitControl(ScalarControl):
+ """
+ Covers:
+
+ INPUT/SUBMIT
+ BUTTON/SUBMIT
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
+ # blank, Konqueror 3.1 defaults to "Submit". HTML spec. doesn't seem
+ # to define this.
+ if self.value is None: self.value = ""
+ self.readonly = True
+
+ def _click(self, form, coord, return_type):
+ self._clicked = coord
+ r = form._switch_click(return_type)
+ self._clicked = False
+ return r
+
+ def pairs(self):
+ if not self._clicked:
+ return []
+ return ScalarControl.pairs(self)
+
+
+#---------------------------------------------------
+class ImageControl(SubmitControl):
+ """
+ Covers:
+
+ INPUT/IMAGE
+
+ The value attribute of an ImageControl is always None. Coordinates are
+ specified using one of the HTMLForm.click* methods.
+
+ """
+ def __init__(self, type, name, attrs):
+ ScalarControl.__init__(self, type, name, attrs)
+ self.__dict__["value"] = None
+
+ def __setattr__(self, name, value):
+ if name in ("value", "name", "type"):
+ raise AttributeError("%s attribute is readonly" % name)
+ else:
+ self.__dict__[name] = value
+
+ def pairs(self):
+ clicked = self._clicked
+ if self.disabled or not clicked:
+ return []
+ name = self.name
+ if name is None: return []
+ return [("%s.x" % name, str(clicked[0])),
+ ("%s.y" % name, str(clicked[1]))]
+
+
+# aliases, just to make str(control) and str(form) clearer
+class PasswordControl(TextControl): pass
+class HiddenControl(TextControl): pass
+class TextareaControl(TextControl): pass
+class SubmitButtonControl(SubmitControl): pass
+
+
+def is_listcontrol(control): return isinstance(control, ListControl)
+
+
+class HTMLForm:
+ """Represents a single HTML <form> ... </form> element.
+
+ A form consists of a sequence of controls that usually have names, and
+ which can take on various values. The values of the various types of
+ controls represent variously: text, zero-, one- or many-of-many choices,
+ and files to be uploaded.
+
+ Forms can be filled in with data to be returned to the server, and then
+ submitted, using the click method to generate a request object suitable for
+ passing to urllib2.urlopen (or the click_request_data or click_pairs
+ methods if you're not using urllib2).
+
+ import ClientForm
+ forms = ClientForm.ParseFile(html, base_uri)
+ form = forms[0]
+
+ form["query"] = "Python"
+ form.set("lots", "nr_results")
+
+ response = urllib2.urlopen(form.click())
+
+ Usually, HTMLForm instances are not created directly. Instead, the
+ ParseFile or ParseResponse factory functions are used. If you do construct
+ HTMLForm objects yourself, however, note that an HTMLForm instance is only
+ properly initialised after the fixup method has been called (ParseFile and
+ ParseResponse do this for you). See ListControl.__doc__ for the reason
+ this is required.
+
+ Indexing a form (form["control_name"]) returns the named Control's value
+ attribute. Assignment to a form index (form["control_name"] = something)
+ is equivalent to assignment to the named Control's value attribute. If you
+ need to be more specific than just supplying the control's name, use the
+ set_value and get_value methods.
+
+ ListControl values are lists of item names. The list item's name is the
+ value of the corresponding HTML element's "value" attribute.
+
+ Example:
+
+ <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
+ <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
+
+ defines a CHECKBOX control with name "cheeses" which has two items, named
+ "leicester" and "cheddar".
+
+ Another example:
+
+ <SELECT name="more_cheeses">
+ <OPTION>1</OPTION>
+ <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
+ </SELECT>
+
+ defines a SELECT control with name "more_cheeses" which has two items,
+ named "1" and "2".
+
+ To set, clear or toggle individual list items, use the set and toggle
+ methods. To set the whole value, do as for any other control:use indexing
+ or the set_/get_value methods.
+
+ Example:
+
+ # select *only* the item named "cheddar"
+ form["cheeses"] = ["cheddar"]
+ # select "cheddar", leave other items unaffected
+ form.set("cheddar", "cheeses")
+
+ Some controls (RADIO and SELECT without the multiple attribute) can only
+ have zero or one items selected at a time. Some controls (CHECKBOX and
+ SELECT with the multiple attribute) can have multiple items selected at a
+ time. To set the whole value of a multiple-selection ListControl, assign a
+ sequence to a form index:
+
+ form["cheeses"] = ["cheddar", "leicester"]
+
+ To check whether a control has an item, or whether an item is selected,
+ respectively:
+
+ "cheddar" in form.possible_items("cheeses")
+ "cheddar" in form["cheeses"] # (or "cheddar" in form.get_value("cheeses"))
+
+ Note that some items may be disabled (see below).
+
+ Note the following mistake:
+
+ form[control_name] = control_value
+ assert form[control_name] == control_value # not necessarily true
+
+ The reason for this is that form[control_name] always gives the list items
+ in the order they were listed in the HTML.
+
+ List items (hence list values, too) can be referred to in terms of list
+ item labels rather than list item names. Currently, this is only possible
+ for SELECT controls (this is a bug). To use this feature, use the by_label
+ arguments to the various HTMLForm methods. Note that it is *item* names
+ (hence ListControl values also), not *control* names, that can be referred
+ to by label.
+
+ The question of default values of OPTION contents, labels and values is
+ somewhat complicated: see SelectControl.__doc__ and
+ ListControl.get_item_attrs.__doc__ if you think you need to know.
+
+ Controls can be disabled or readonly. In either case, the control's value
+ cannot be changed until you clear those flags (using the methods on
+ HTMLForm). Disabled is the state typically represented by browsers by
+ `greying out' a control. Disabled controls are not `successful' -- they
+ don't cause data to get returned to the server. Readonly controls usually
+ appear in browsers as read-only text boxes. Readonly controls are
+ successful. List items can also be disabled. Attempts to select disabled
+ items (with form[name] = value, or using the ListControl.set method, for
+ example) fail. Attempts to clear disabled items are allowed.
+
+ If a lot of controls are readonly, it can be useful to do this:
+
+ form.set_all_readonly(False)
+
+ When you want to do several things with a single control, or want to do
+ less common things, like changing which controls and items are disabled,
+ you can get at a particular control:
+
+ control = form.find_control("cheeses")
+ control.set_item_disabled(False, "gruyere")
+ control.set("gruyere")
+
+ Most methods on HTMLForm just delegate to the contained controls, so see
+ the docstrings of the various Control classes for further documentation.
+ Most of these delegating methods take name, type, kind, id and nr arguments
+ to specify the control to be operated on: see
+ HTMLForm.find_control.__doc__.
+
+ ControlNotFoundError (subclass of ValueError) is raised if the specified
+ control can't be found. This includes occasions where a non-ListControl
+ is found, but the method (set, for example) requires a ListControl.
+ ItemNotFoundError (subclass of ValueError) is raised if a list item can't
+ be found. ItemCountError (subclass of ValueError) is raised if an attempt
+ is made to select more than one item and the control doesn't allow that, or
+ set/get_single are called and the control contains more than one item.
+ AttributeError is raised if a control or item is readonly or disabled and
+ an attempt is made to alter its value.
+
+ XXX CheckBoxControl and RadioControl don't yet support item access by label
+
+ Security note: Remember that any passwords you store in HTMLForm instances
+ will be saved to disk in the clear if you pickle them (directly or
+ indirectly). The simplest solution to this is to avoid pickling HTMLForm
+ objects. You could also pickle before filling in any password, or just set
+ the password to "" before pickling.
+
+
+ Public attributes:
+
+ action: full (absolute URI) form action
+ method: "GET" or "POST"
+ enctype: form transfer encoding MIME type
+ name: name of form (None if no name was specified)
+ attrs: dictionary mapping original HTML form attributes to their values
+
+ controls: list of Control instances; do not alter this list
+ (instead, call form.new_control to make a Control and add it to the
+ form, or control.add_to_form if you already have a Control instance)
+
+
+
+ Methods for form filling:
+ -------------------------
+
+ Most of the these methods have very similar arguments. See
+ HTMLForm.find_control.__doc__ for details of the name, type, kind and nr
+ arguments. See above for a description of by_label.
+
+ def find_control(self,
+ name=None, type=None, kind=None, id=None, predicate=None,
+ nr=None)
+
+ get_value(name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+ set_value(value,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+
+ set_all_readonly(readonly)
+
+
+ Methods applying only to ListControls:
+
+ possible_items(name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+
+ set(selected, item_name,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+ toggle(item_name,
+ name=None, type=None, id=None, nr=None,
+ by_label=False)
+
+ set_single(selected,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+ toggle_single(name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False)
+
+
+ Method applying only to FileControls:
+
+ add_file(file_object,
+ content_type="application/octet-stream", filename=None,
+ name=None, id=None, nr=None)
+
+
+ Methods applying only to clickable controls:
+
+ click(name=None, type=None, id=None, nr=0, coord=(1,1))
+ click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1))
+ click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1))
+
+ """
+
+ type2class = {
+ "text": TextControl,
+ "password": PasswordControl,
+ "hidden": HiddenControl,
+ "textarea": TextareaControl,
+
+ "isindex": IsindexControl,
+
+ "file": FileControl,
+
+ "button": IgnoreControl,
+ "buttonbutton": IgnoreControl,
+ "reset": IgnoreControl,
+ "resetbutton": IgnoreControl,
+
+ "submit": SubmitControl,
+ "submitbutton": SubmitButtonControl,
+ "image": ImageControl,
+
+ "radio": RadioControl,
+ "checkbox": CheckboxControl,
+ "select": SelectControl,
+ }
+
+#---------------------------------------------------
+# Initialisation. Use ParseResponse / ParseFile instead.
+
+ def __init__(self, action, method="GET",
+ enctype="application/x-www-form-urlencoded",
+ name=None, attrs=None):
+ """
+ In the usual case, use ParseResponse (or ParseFile) to create new
+ HTMLForm objects.
+
+ action: full (absolute URI) form action
+ method: "GET" or "POST"
+ enctype: form transfer encoding MIME type
+ name: name of form
+ attrs: dictionary mapping original HTML form attributes to their values
+
+ """
+ self.action = action
+ self.method = method
+ self.enctype = enctype
+ self.name = name
+ if attrs is not None:
+ self.attrs = attrs.copy()
+ else:
+ self.attrs = {}
+ self.controls = []
+
+ def new_control(self, type, name, attrs,
+ ignore_unknown=False, select_default=False):
+ """Adds a new control to the form.
+
+ This is usually called by ParseFile and ParseResponse. Don't call it
+ youself unless you're building your own Control instances.
+
+ Note that controls representing lists of items are built up from
+ controls holding only a single list item. See ListControl.__doc__ for
+ further information.
+
+ type: type of control (see Control.__doc__ for a list)
+ attrs: HTML attributes of control
+ ignore_unknown: if true, use a dummy Control instance for controls of
+ unknown type; otherwise, raise ValueError
+ select_default: for RADIO and multiple-selection SELECT controls, pick
+ the first item as the default if no 'selected' HTML attribute is
+ present (this defaulting happens when the HTMLForm.fixup method is
+ called)
+
+ """
+ type = string.lower(type)
+ klass = self.type2class.get(type)
+ if klass is None:
+ if ignore_unknown:
+ klass = IgnoreControl
+ else:
+ raise ValueError("Unknown control type '%s'" % type)
+
+ a = attrs.copy()
+ if issubclass(klass, ListControl):
+ control = klass(type, name, a, select_default)
+ else:
+ control = klass(type, name, a)
+ control.add_to_form(self)
+
+ def fixup(self):
+ """Normalise form after all controls have been added.
+
+ This is usually called by ParseFile and ParseResponse. Don't call it
+ youself unless you're building your own Control instances.
+
+ This method should only be called once, after all controls have been
+ added to the form.
+
+ """
+ for control in self.controls:
+ control.fixup()
+
+#---------------------------------------------------
+ def __str__(self):
+ header = "%s %s %s" % (self.method, self.action, self.enctype)
+ rep = [header]
+ for control in self.controls:
+ rep.append(" %s" % str(control))
+ return "<%s>" % string.join(rep, "\n")
+
+#---------------------------------------------------
+# Form-filling methods.
+
+ def __getitem__(self, name):
+ return self.find_control(name).value
+ def __setitem__(self, name, value):
+ control = self.find_control(name)
+ try:
+ control.value = value
+ except AttributeError, e:
+ raise ValueError(str(e))
+
+ def get_value(self,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Return value of control.
+
+ If only name and value arguments are supplied, equivalent to
+
+ form[name]
+
+ """
+ c = self.find_control(name, type, kind, id, nr=nr)
+ if by_label:
+ try:
+ meth = c.get_value_by_label
+ except AttributeError:
+ raise NotImplementedError(
+ "control '%s' does not yet support by_label" % c.name)
+ else:
+ return meth()
+ else:
+ return c.value
+ def set_value(self, value,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Set value of control.
+
+ If only name and value arguments are supplied, equivalent to
+
+ form[name] = value
+
+ """
+ c = self.find_control(name, type, kind, id, nr=nr)
+ if by_label:
+ try:
+ meth = c.set_value_by_label
+ except AttributeError:
+ raise NotImplementedError(
+ "control '%s' does not yet support by_label" % c.name)
+ else:
+ meth(value)
+ else:
+ c.value = value
+
+ def set_all_readonly(self, readonly):
+ for control in self.controls:
+ control.readonly = bool(readonly)
+
+
+#---------------------------------------------------
+# Form-filling methods applying only to ListControls.
+
+ def possible_items(self,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Return a list of all values that the specified control can take."""
+ c = self._find_list_control(name, type, kind, id, nr)
+ return c.possible_items(by_label)
+
+ def set(self, selected, item_name,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Select / deselect named list item.
+
+ selected: boolean selected state
+
+ """
+ self._find_list_control(name, type, kind, id, nr).set(
+ selected, item_name, by_label)
+ def toggle(self, item_name,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Toggle selected state of named list item."""
+ self._find_list_control(name, type, kind, id, nr).toggle(
+ item_name, by_label)
+
+ def set_single(self, selected,
+ name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Select / deselect list item in a control having only one item.
+
+ If the control has multiple list items, ItemCountError is raised.
+
+ This is just a convenience method, so you don't need to know the item's
+ name -- the item name in these single-item controls is usually
+ something meaningless like "1" or "on".
+
+ For example, if a checkbox has a single item named "on", the following
+ two calls are equivalent:
+
+ control.toggle("on")
+ control.toggle_single()
+
+ """
+ self._find_list_control(name, type, kind, id, nr).set_single(
+ selected, by_label)
+ def toggle_single(self, name=None, type=None, kind=None, id=None, nr=None,
+ by_label=False):
+ """Toggle selected state of list item in control having only one item.
+
+ The rest is as for HTMLForm.set_single.__doc__.
+
+ """
+ self._find_list_control(name, type, kind, id, nr).toggle_single(
+ by_label)
+
+#---------------------------------------------------
+# Form-filling method applying only to FileControls.
+
+ def add_file(self, file_object, content_type=None, filename=None,
+ name=None, id=None, nr=None):
+ """Add a file to be uploaded.
+
+ file_object: file-like object (with read method) from which to read
+ data to upload
+ content_type: MIME content type of data to upload
+ filename: filename to pass to server
+
+ If filename is None, no filename is sent to the server.
+
+ If content_type is None, the content type is guessed based on the
+ filename and the data from read from the file object.
+
+ XXX
+ At the moment, guessed content type is always application/octet-stream.
+ Use sndhdr, imghdr modules. Should also try to guess HTML, XML, and
+ plain text.
+
+ """
+ self.find_control(name, "file", id=id, nr=nr).add_file(
+ file_object, content_type, filename)
+
+#---------------------------------------------------
+# Form submission methods, applying only to clickable controls.
+
+ def click(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
+ """Return request that would result from clicking on a control.
+
+ The request object is a urllib2.Request instance, which you can pass to
+ urllib2.urlopen (or ClientCookie.urlopen).
+
+ Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
+ IMAGEs) can be clicked.
+
+ Will click on the first clickable control, subject to the name, type
+ and nr arguments (as for find_control). If no name, type, id or number
+ is specified and there are no clickable controls, a request will be
+ returned for the form in its current, un-clicked, state.
+
+ IndexError is raised if any of name, type, id or nr is specified but no
+ matching control is found. ValueError is raised if the HTMLForm has an
+ enctype attribute that is not recognised.
+
+ You can optionally specify a coordinate to click at, which only makes a
+ difference if you clicked on an image.
+
+ """
+ return self._click(name, type, id, nr, coord, "request")
+
+ def click_request_data(self,
+ name=None, type=None, id=None, nr=0, coord=(1,1)):
+ """As for click method, but return a tuple (url, data, headers).
+
+ You can use this data to send a request to the server. This is useful
+ if you're using httplib or urllib rather than urllib2. Otherwise, use
+ the click method.
+
+ # Untested. Have to subclass to add headers, I think -- so use urllib2
+ # instead!
+ import urllib
+ url, data, hdrs = form.click_request_data()
+ r = urllib.urlopen(url, data)
+
+ # Untested. I don't know of any reason to use httplib -- you can get
+ # just as much control with urllib2.
+ import httplib, urlparse
+ url, data, hdrs = form.click_request_data()
+ tup = urlparse(url)
+ host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
+ conn = httplib.HTTPConnection(host)
+ if data:
+ httplib.request("POST", path, data, hdrs)
+ else:
+ httplib.request("GET", path, headers=hdrs)
+ r = conn.getresponse()
+
+ """
+ return self._click(name, type, id, nr, coord, "request_data")
+
+ def click_pairs(self, name=None, type=None, id=None, nr=0, coord=(1,1)):
+ """As for click_request_data, but returns a list of (key, value) pairs.
+
+ You can use this list as an argument to ClientForm.urlencode. This is
+ usually only useful if you're using httplib or urllib rather than
+ urllib2 or ClientCookie. It may also be useful if you want to manually
+ tweak the keys and/or values, but this should not be necessary.
+ Otherwise, use the click method.
+
+ Note that this method is only useful for forms of MIME type
+ x-www-form-urlencoded. In particular, it does not return the
+ information required for file upload. If you need file upload and are
+ not using urllib2, use click_request_data.
+
+ Also note that Python 2.0's urllib.urlencode is slightly broken: it
+ only accepts a mapping, not a sequence of pairs, as an argument. This
+ messes up any ordering in the argument. Use ClientForm.urlencode
+ instead.
+
+ """
+ return self._click(name, type, id, nr, coord, "pairs")
+
+#---------------------------------------------------
+
+ def find_control(self,
+ name=None, type=None, kind=None, id=None, predicate=None,
+ nr=None):
+ """Locate some specific control within the form.
+
+ At least one of the name, type, kind, predicate and nr arguments must
+ be supplied. If no matching control is found, ControlNotFoundError is
+ raised.
+
+ If name is specified, then the control must have the indicated name.
+
+ If type is specified then the control must have the specified type (in
+ addition to the types possible for <input> HTML tags: "text",
+ "password", "hidden", "submit", "image", "button", "radio", "checkbox",
+ "file" we also have "reset", "buttonbutton", "submitbutton",
+ "resetbutton", "textarea", "select" and "isindex").
+
+ If kind is specified, then the control must fall into the specified
+ group, each of which satisfies a particular interface. The types are
+ "text", "list", "multilist", "singlelist", "clickable" and "file".
+
+ If id is specified, then the control must have the indicated id.
+
+ If predicate is specified, then the control must match that function.
+ The predicate function is passed the control as its single argument,
+ and should return a boolean value indicating whether the control
+ matched.
+
+ nr, if supplied, is the sequence number of the control (where 0 is the
+ first). Note that control 0 is the first control matching all the
+ other arguments (if supplied); it is not necessarily the first control
+ in the form.
+
+ """
+ if ((name is None) and (type is None) and (kind is None) and
+ (id is None) and (predicate is None) and (nr is None)):
+ raise ValueError(
+ "at least one argument must be supplied to specify control")
+ if nr is None: nr = 0
+
+ return self._find_control(name, type, kind, id, predicate, nr)
+
+#---------------------------------------------------
+# Private methods.
+
+ def _find_list_control(self,
+ name=None, type=None, kind=None, id=None, nr=None):
+ if ((name is None) and (type is None) and (kind is None) and
+ (id is None) and (nr is None)):
+ raise ValueError(
+ "at least one argument must be supplied to specify control")
+ if nr is None: nr = 0
+
+ return self._find_control(name, type, kind, id, is_listcontrol, nr)
+
+ def _find_control(self, name, type, kind, id, predicate, nr):
+ if (name is not None) and not isstringlike(name):
+ raise TypeError("control name must be string-like")
+ if (type is not None) and not isstringlike(type):
+ raise TypeError("control type must be string-like")
+ if (kind is not None) and not isstringlike(kind):
+ raise TypeError("control kind must be string-like")
+ if (id is not None) and not isstringlike(id):
+ raise TypeError("control id must be string-like")
+ if (predicate is not None) and not callable(predicate):
+ raise TypeError("control predicate must be callable")
+ if nr < 0: raise ValueError("control number must be a positive "
+ "integer")
+
+ orig_nr = nr
+
+ for control in self.controls:
+ if name is not None and name != control.name:
+ continue
+ if type is not None and type != control.type:
+ continue
+ if (kind is not None and
+ not self._is_control_in_kind(control, kind)):
+ continue
+ if id is not None and id != control.id:
+ continue
+ if predicate and not predicate(control):
+ continue
+ if nr:
+ nr = nr - 1
+ continue
+ return control
+
+ description = []
+ if name is not None: description.append("name '%s'" % name)
+ if type is not None: description.append("type '%s'" % type)
+ if kind is not None: description.append("kind '%s'" % kind)
+ if id is not None: description.append("id '%s'" % id)
+ if predicate is not None:
+ description.append("matching predicate %s" % predicate)
+ if orig_nr: description.append("nr %d" % orig_nr)
+ description = string.join(description, ", ")
+ raise ControlNotFoundError("no control with "+description)
+
+ def _is_control_in_kind(self, control, kind):
+ # XXX not OO
+ if kind == "list":
+ return isinstance(control, ListControl)
+ elif kind == "multilist":
+ return bool(isinstance(control, ListControl) and control.multiple)
+ elif kind == "singlelist":
+ return bool(isinstance(control, ListControl) and
+ not control.multiple)
+ elif kind == "file":
+ return isinstance(control, FileControl)
+ elif kind == "text":
+ return isinstance(control, TextControl)
+ elif kind == "clickable":
+ return (isinstance(control, SubmitControl) or
+ isinstance(control, IsindexControl))
+ else:
+ raise ValueError("no such control kind '%s'" % kind)
+
+ def _click(self, name, type, id, nr, coord, return_type):
+ try:
+ control = self._find_control(name, type, "clickable", id, None, nr)
+ except ControlNotFoundError:
+ if ((name is not None) or (type is not None) or (id is not None) or
+ (nr != 0)):
+ raise
+ # no clickable controls, but no control was explicitly requested,
+ # so return state without clicking any control
+ return self._switch_click(return_type)
+ else:
+ return control._click(self, coord, return_type)
+
+ def _pairs(self):
+ """Return sequence of (key, value) pairs suitable for urlencoding."""
+ pairs = []
+ for control in self.controls:
+ pairs.extend(control.pairs())
+ return pairs
+
+ def _request_data(self):
+ """Return a tuple (url, data, headers)."""
+ method = string.upper(self.method)
+ if method == "GET":
+ if self.enctype != "application/x-www-form-urlencoded":
+ raise ValueError(
+ "unknown GET form encoding type '%s'" % self.enctype)
+ uri = "%s?%s" % (self.action, urlencode(self._pairs()))
+ return uri, None, []
+ elif method == "POST":
+ if self.enctype == "application/x-www-form-urlencoded":
+ return (self.action, urlencode(self._pairs()),
+ [("Content-type", self.enctype)])
+ elif self.enctype == "multipart/form-data":
+ data = StringIO()
+ http_hdrs = []
+ mw = MimeWriter(data, http_hdrs)
+ f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
+ prefix=0)
+ for control in self.controls:
+ control._write_mime_data(mw)
+ mw.lastpart()
+ return self.action, data.getvalue(), http_hdrs
+ else:
+ raise ValueError(
+ "unknown POST form encoding type '%s'" % self.enctype)
+ else:
+ raise ValueError("Unknown method '%s'" % method)
+
+ def _switch_click(self, return_type):
+ # This is called by HTMLForm and clickable Controls to hide switching
+ # on return_type.
+ # XXX
+ # not OO
+ # duplicated in IsindexControl._click
+ if return_type == "pairs":
+ return self._pairs()
+ elif return_type == "request_data":
+ return self._request_data()
+ else:
+ req_data = self._request_data()
+ req = urllib2.Request(req_data[0], req_data[1])
+ for key, val in req_data[2]:
+ req.add_header(key, val)
+ return req