summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkanzure <kanzure@gmail.com>2009-07-25 14:02:16 -0500
committerkanzure <kanzure@gmail.com>2009-07-25 14:02:16 -0500
commit5d41cd8a4b07e2b44950456b4bec9b16822bd07b (patch)
treeec0b4ac421a12dda4978e2b7034f5300a0f73f4a
parent569ee998331cf612c95c0193e4182afb4d0790b4 (diff)
downloadpyscholar-5d41cd8a4b07e2b44950456b4bec9b16822bd07b.tar.gz
pyscholar-5d41cd8a4b07e2b44950456b4bec9b16822bd07b.zip
working on doWeb
-rw-r--r--packages/classes.py6
-rw-r--r--packages/tests.py9
-rw-r--r--tests/science-direct.html6
3 files changed, 13 insertions, 8 deletions
diff --git a/packages/classes.py b/packages/classes.py
index 1f98afb..99807aa 100644
--- a/packages/classes.py
+++ b/packages/classes.py
@@ -34,10 +34,10 @@ class ScienceDirect(yaml.YAMLObject):
return "journalArticle"
return False
def doWeb(self, doc, url):
- if type(doc) == type(""): #then it's not BeautifulSoup
+ if type(doc) == type("huh"): #then it's not BeautifulSoup
document = BSXPathEvaluator(doc)
else: document = doc
- if doc.evaluate('//*[contains(@src, "exportarticle_a.gif")]', doc, None, XPathResult.ANY_TYPE, None).iterateNext():
+ if document.evaluate("//*[contains(@src, \"exportarticle_a.gif\")]", doc, None, XPathResult.ANY_TYPE, None).iterateNext():
articles = []
if (self.detectWeb(doc, url) == "multiple"):
#search page
@@ -66,5 +66,7 @@ class ScienceDirect(yaml.YAMLObject):
if len(articles) == 0:
print "ERROR: no items were found"
return
+ print "articles = ", articles
+ print "result_sets = ", result_sets
return
diff --git a/packages/tests.py b/packages/tests.py
index 42b2d8c..abf05ac 100644
--- a/packages/tests.py
+++ b/packages/tests.py
@@ -20,6 +20,15 @@ class TestScienceDirect(unittest.TestCase):
doc = contents
self.assertTrue(sci.detectWeb(doc, url)== "journalArticle")
return
+ def test_doWeb(self):
+ sci = classes.ScienceDirect()
+ some_file = open("../tests/science-direct-search-results.html", "r")
+ contents = some_file.read()
+ some_file.close()
+ url = "http://www.sciencedirect.com/science?_ob=ArticleListURL&_method=list&_ArticleListID=966440345&_sort=r&view=c&_acct=C000059713&_version=1&_urlVersion=0&_userid=108429&md5=68c788df065c832e7749a7ae42d0261e"
+ doc = contents
+ print sci.doWeb(doc, url)
+ return
if __name__ == '__main__':
unittest.main()
diff --git a/tests/science-direct.html b/tests/science-direct.html
index 6892c4f..af1f02c 100644
--- a/tests/science-direct.html
+++ b/tests/science-direct.html
@@ -64,12 +64,6 @@ function getStyleObj(elem,parent) {
<a href="http://www.sciencedirect.com.ezproxy.lib.utexas.edu/science?_ob=ArticleURL&amp;_udi=B6TWB-4CGM8MB-1&amp;_user=10&amp;_rdoc=1&amp;_fmt=&amp;_orig=search&amp;_sort=d&amp;view=c&amp;_acct=C000050221&amp;_version=1&amp;_urlVersion=0&amp;_userid=10&amp;md5=332a6427cb7f4df3e22a83f91c859c04#Skip%20Buttons" target="_top"><img src="science-direct_files/clear.gif" alt="Skip Main Navigation Links" title="Skip Main Navigation Links" border="0" width="10" height="1"></a>
</td>
<td align="center" bgcolor="#ffffff" valign="middle" nowrap="nowrap">
-
-
-
-
-
-
</td>
<td style="padding-right: 0.2em;" align="right" width="37%" nowrap="nowrap">
<span style="font-size: 75%; font-family: arial,verdana,helvetica,sans-serif;">