summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkanzure <kanzure@gmail.com>2009-07-24 21:26:39 -0500
committerkanzure <kanzure@gmail.com>2009-07-24 21:26:39 -0500
commit8a45f872aa57c30d08022da498ec6dbdac082f01 (patch)
treeca3e2cc1f68f3220af9ea2c5b386774c5843a7d6
parentc72303b99a64afc22a6d39e28352e99e69fe93ea (diff)
downloadpyscholar-8a45f872aa57c30d08022da498ec6dbdac082f01.tar.gz
pyscholar-8a45f872aa57c30d08022da498ec6dbdac082f01.zip
cleaned up some comments, prints out yaml, accepts an argument (query string)
-rw-r--r--pyscholar.py32
1 files changed, 16 insertions, 16 deletions
diff --git a/pyscholar.py b/pyscholar.py
index 43edb28..c118468 100644
--- a/pyscholar.py
+++ b/pyscholar.py
@@ -2,11 +2,14 @@
import copy
import re
+import sys
import yaml
import httplib
import urllib
from BeautifulSoup import BeautifulSoup
+if len(sys.argv) == 0: exit()
+
class Paper(yaml.YAMLObject):
yaml_tag='!paper'
def __init__(self, title="paper title goes here", journal_href="http://google.com/", potential_PDF_link="", cites_link="", cites_link_name="", diff_versions_link="", diff_versions_link_name="", related_papers_link="", view_as_html_link="", authors=[], publication="", pub_year="0001", server=""):
@@ -18,23 +21,23 @@ class Paper(yaml.YAMLObject):
SEARCH_HOST = "scholar.google.com"
SEARCH_BASE_URL = "/scholar"
-terms = ["PDMS"]
-limit = 10 #100
+terms = [sys.argv[1]]
+limit = 100 #10
params = urllib.urlencode({'q': "+".join(terms), 'num':limit})
headers = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux i686; en-US ; rv:1.9.0.9) Gecko/2009050519 Iceweasel/3.0.6 (Debian-3.0.6-1)'}
url = SEARCH_BASE_URL + "?" + params
-#conn = httplib.HTTPConnection(SEARCH_HOST)
-#conn.request("GET", url, {}, headers)
-#resp = conn.getresponse()
-status = 200#resp.status
+conn = httplib.HTTPConnection(SEARCH_HOST)
+conn.request("GET", url, {}, headers)
+resp = conn.getresponse()
+status = resp.status #200
papers = []
if status==200:
- #html = resp.read()
- file2 = open("scholar.htm","r")
- html = file2.read()
- file2.close()
+ html = resp.read()
+ #file2 = open("scholar.htm","r")
+ #html = file2.read()
+ #file2.close()
results = []
html = html.decode('ascii', 'ignore')
soup = BeautifulSoup(html)
@@ -64,7 +67,6 @@ if status==200:
link_title = stuff.renderContents()
if not link_title == "BL Direct": #then it might be useful.
if href[-3:] == "pdf":
- #TODO: add to record object as potential PDF link
potential_PDF_link = href
pass
else:
@@ -76,19 +78,17 @@ if status==200:
#ISI Web of Knowledge integration? Nah.
cites_link = href
cites_link_name = stuff.renderContents()
- #TODO: add to record object
elif href.count("cluster") > 0: #All x versions
cluster_link = href
cluster_link_name = stuff.renderContents()
- #TODO: parse number of papers that have cited this paper
- #TODO: add to record object
elif href.count("related") > 0: #related articles
related_link = href
related_link_name = stuff.renderContents()
- #TODO: add to record object
elif stuff.renderContents() == "View as HTML":
view_as_html_link = href
- #TODO: add to record object
+ elif stuff.renderContents() == "Cached":
+ pass
+ #TODO: add to record or object
else:
print "ERROR: was not a citation nor a 'see all versions' nor a related-papers link"
print "title was = ", stuff.renderContents()