summaryrefslogtreecommitdiff
path: root/other-code/gsearch.py
blob: 6408f1e4c93aa5363e851658686ce68a835f00b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/python
from lxml import etree as et
from urllib import quote_plus,urlopen

def gsearch(q='',num=10,datelimit=''):
        returninfo=[]
        searchurl='http://google.com/search?hl=en&as_q=%s&num=%s&as_qdr=%s'%(quote_plus(q),str(num),datelimit)
        results=urlopen(searchurl).read()
        tree=et.fromstring(results,et.HTMLParser())
        links=tree.xpath('/html/body[@id="gsr"]/div[@id="res"]/div[1]/h3/a')
        return tree
        for a in links:
            returninfo.append({'href':a.values()[0],'text':a.text})
        return returninfo

def gs(xpath):
    returninfo = []
    content = (open("tests/science-direct-search-results.html","r")).read()
    tree = et.fromstring(content, et.HTMLParser())
    links = tree.xpath(xpath)
    return links