Spynner is a stateful programmatic web browser module for Python based on PyQT and WebKit.
How to install Spynner – easy 🙂
> cat spynner-test.py
# -*- coding: utf-8 -*- import spynner import pyquery import os os.environ['DISPLAY'] = ':0.0' def main(): browse_via_jquery() browse_using_xpath() browse_via_webkit() download_image() def browse_via_jquery(): browser = spynner.Browser(debug_level=spynner.DEBUG) browser.create_webview() browser.show() #browser.hide() browser.load("http://www.wordreference.com") browser.load_jquery(True) browser.select("input[id=id=deen]") browser.fill("input[id=si]", "spass") browser.click("input[type=submit]", wait_load=True) d = pyquery.PyQuery(browser.html) d.make_links_absolute(base_url=browser.url) href = d('a:last').attr('href') print href print len(browser.download(href)) #browser.browse() def get_tree(h): from spynner import browser from lxml.html import document_fromstring """h can be either a zc.testbrowser.Browser or a string.""" if isinstance(h, file): h = h.read() if isinstance(h, browser.Browser): h = h.html if not isinstance(h, basestring): h = h.contents return document_fromstring(h) def browse_using_xpath(): browser = spynner.Browser(debug_level=spynner.INFO) browser.create_webview() browser.webview.show() #browser.webview.setWindowState(QtCore.Qt.WindowMaximized) browser.load('http://www.google.com') browser.load_jquery(True) def can_continuea(abrowser): t = get_tree(abrowser) print "can_continuea",len(t.xpath("//input[@name='q']")) return len(t.xpath("//input[@name='q']")) > 0 #id=gbqfq class=gbqfif name=q browser.wait_for_content(can_continuea, 5, u'Timeout while loading account data') browser.fill('input[name="q"]', 'kiorky') t = get_tree(browser) name = [a.attrib['name'] for a in t.xpath('//input[@type="submit"]') if 'google' in a.value.lower()][0] # search for the search input control which can change id input_sel = "input[name='%s']" % name # remove the search live query ... browser.native_click('input[name="q"]') browser.click(input_sel) def can_continueb(abrowser): t = get_tree(abrowser) print "can_continueb", len( t.xpath('//*[@id="ires"]')) return len( t.xpath('//*[@id="ires"]')) > 0 browser.wait_for_content(can_continueb, 10, u'Timeout while loading account data') # XPATH Tester/Evaluator/Query browser.wk_click('h3:first-child > a', wait_load=True) # click on first google result assert 'twitter.com/kiorky' in browser.html print browser.url print len(browser.html) browser.close() print 'done' def browse_via_webkit(): def custom_wait_load(browser): return 'id="footer"' in browser.html browser = spynner.Browser(debug_level=spynner.DEBUG) browser.create_webview() browser.show() browser.load("http://www.wordreference.com") browser.runjs("console.log('I can run Javascript')") browser.wk_select("#deen") browser.wk_fill("input[id=si]", "spass") browser.click("input[type=submit]", wait_load=custom_wait_load) print browser.url, len(browser.html), browser.html browser.close() def download_image(): browser = spynner.Browser(debug_level=spynner.INFO) browser.create_webview() browser.set_html_parser(pyquery.PyQuery) browser.show() browser.load("http://www.wordreference.com") browser.load_jquery(True) browser.select("#deen") browser.fill("input[id=si]", "spass") browser.click("input[type=submit]", wait_load=True) print "url:", browser.url # Soup is a PyQuery object browser.soup.make_links_absolute(base_url=browser.url) print "html:", len(browser.soup("#article").html()) # Demonstrate how to download a resource using PyQuery soup imagedata = browser.download(browser.soup("img:first").attr('src')) print "image length:", len(imagedata) browser.close() if __name__ == "__main__": main()
Run spynner test script
> python spynner-test.py
Installation
1. spynner Installation: /2733/python-spynner-installation-in-ubuntu
2. spynner Documentation: https://pypi.python.org/pypi/spynner
3. X-Server Bugfixing: http://stackoverflow.com/questions/646930/cannot-connect-to-x-server-0-0-with-a-qt-application