summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Bishop <kanzure@gmail.com>2015-02-02 00:39:20 -0600
committerBryan Bishop <kanzure@gmail.com>2015-02-02 00:39:20 -0600
commit9312c05d2e39dbe246929560182f7e184ada3f45 (patch)
tree994d0c6707e8ab7e2325b69e04e307026b3f4f75
parentb7f8407db6abeb634274de668fd0d032b46d2f79 (diff)
parent713e978faf3244eb322f9e32d6eb6bce1b03acdd (diff)
downloadpaperbot-master.tar.gz
paperbot-master.zip
Merge remote-tracking branch 'diyhplus/master'HEADmaster
-rw-r--r--paperbot/__init__.py10
-rw-r--r--paperbot/htmltools.py2
-rw-r--r--paperbot/orchestrate.py4
-rw-r--r--paperbot/plugins/__init__.py4
-rw-r--r--paperbot/storage.py2
5 files changed, 17 insertions, 5 deletions
diff --git a/paperbot/__init__.py b/paperbot/__init__.py
index 7708e37..1564df4 100644
--- a/paperbot/__init__.py
+++ b/paperbot/__init__.py
@@ -1,3 +1,13 @@
# _prefixed to avoid cluttering namespace
from .logstuff import setup_logging as _setup_logging
_setup_logging()
+
+import ezproxy
+import htmltools
+import httptools
+import libgen
+import logstuff
+import orchestrate
+import paper
+import storage
+import plugins
diff --git a/paperbot/htmltools.py b/paperbot/htmltools.py
index 73e1cb8..316b043 100644
--- a/paperbot/htmltools.py
+++ b/paperbot/htmltools.py
@@ -61,7 +61,7 @@ def extract_metadata(tree, meta_attribute_mapping=meta_attribute_mapping):
except:
log.debug("Couldn't find {metakey} in the html.".format(metakey=metakey))
else:
- log.debug("Found {metakey} with value {value}".format(value=value))
+ log.debug("Found {metakey} with value {value}".format(metakey=metakey, value=value))
output[paperkey] = value
return output
diff --git a/paperbot/orchestrate.py b/paperbot/orchestrate.py
index 2753e5f..c74a226 100644
--- a/paperbot/orchestrate.py
+++ b/paperbot/orchestrate.py
@@ -60,7 +60,7 @@ def remove_watermarks(pdfcontent):
Use pdfparanoia to remove watermarks from the pdf.
"""
log.debug("Removing pdf watermarks.")
- pdfcontent = pdfparanoia.scrub(StringIO(pdfontent))
+ pdfcontent = pdfparanoia.scrub(StringIO(pdfcontent))
return pdfcontent
@@ -206,7 +206,7 @@ def download(url, paper=None):
log.debug("HTTP GET {}".format(urldoi))
response = requests.get(urldoi, headers=DEFAULT_HEADERS)
- if is_pdf_response(response):
+ if is_response_pdf(response):
log.debug("Got pdf from libgen.")
# skip pdfparanoia because it's from libgen
diff --git a/paperbot/plugins/__init__.py b/paperbot/plugins/__init__.py
index 165ef64..410460d 100644
--- a/paperbot/plugins/__init__.py
+++ b/paperbot/plugins/__init__.py
@@ -1,2 +1,4 @@
from plugin import Plugin
-from sciencedirect import ScienceDirectPlugin
+
+#seems to be missing
+#from sciencedirect import ScienceDirectPlugin
diff --git a/paperbot/storage.py b/paperbot/storage.py
index 86bebc3..7c98856 100644
--- a/paperbot/storage.py
+++ b/paperbot/storage.py
@@ -12,7 +12,7 @@ import logging
log = logging.getLogger("paperbot.storage")
DEFAULT_STORAGE_PATH = "/home/bryan/public_html/papers2/paperbot/"
-if not os.path.exists(DEFAULT_STORAGE_PATH):
+if not (os.path.exists(DEFAULT_STORAGE_PATH) and os.access(DEFAULT_STORAGE_PATH, os.W_OK)):
DEFAULT_STORAGE_PATH = "/tmp/"
STORAGE_PATH = os.environ.get("STORAGE_PATH", DEFAULT_STORAGE_PATH)