diff options
author | Bryan Bishop <kanzure@gmail.com> | 2015-02-02 00:39:20 -0600 |
---|---|---|
committer | Bryan Bishop <kanzure@gmail.com> | 2015-02-02 00:39:20 -0600 |
commit | 9312c05d2e39dbe246929560182f7e184ada3f45 (patch) | |
tree | 994d0c6707e8ab7e2325b69e04e307026b3f4f75 | |
parent | b7f8407db6abeb634274de668fd0d032b46d2f79 (diff) | |
parent | 713e978faf3244eb322f9e32d6eb6bce1b03acdd (diff) | |
download | paperbot-master.tar.gz paperbot-master.zip |
-rw-r--r-- | paperbot/__init__.py | 10 | ||||
-rw-r--r-- | paperbot/htmltools.py | 2 | ||||
-rw-r--r-- | paperbot/orchestrate.py | 4 | ||||
-rw-r--r-- | paperbot/plugins/__init__.py | 4 | ||||
-rw-r--r-- | paperbot/storage.py | 2 |
5 files changed, 17 insertions, 5 deletions
diff --git a/paperbot/__init__.py b/paperbot/__init__.py index 7708e37..1564df4 100644 --- a/paperbot/__init__.py +++ b/paperbot/__init__.py @@ -1,3 +1,13 @@ # _prefixed to avoid cluttering namespace from .logstuff import setup_logging as _setup_logging _setup_logging() + +import ezproxy +import htmltools +import httptools +import libgen +import logstuff +import orchestrate +import paper +import storage +import plugins diff --git a/paperbot/htmltools.py b/paperbot/htmltools.py index 73e1cb8..316b043 100644 --- a/paperbot/htmltools.py +++ b/paperbot/htmltools.py @@ -61,7 +61,7 @@ def extract_metadata(tree, meta_attribute_mapping=meta_attribute_mapping): except: log.debug("Couldn't find {metakey} in the html.".format(metakey=metakey)) else: - log.debug("Found {metakey} with value {value}".format(value=value)) + log.debug("Found {metakey} with value {value}".format(metakey=metakey, value=value)) output[paperkey] = value return output diff --git a/paperbot/orchestrate.py b/paperbot/orchestrate.py index 2753e5f..c74a226 100644 --- a/paperbot/orchestrate.py +++ b/paperbot/orchestrate.py @@ -60,7 +60,7 @@ def remove_watermarks(pdfcontent): Use pdfparanoia to remove watermarks from the pdf. """ log.debug("Removing pdf watermarks.") - pdfcontent = pdfparanoia.scrub(StringIO(pdfontent)) + pdfcontent = pdfparanoia.scrub(StringIO(pdfcontent)) return pdfcontent @@ -206,7 +206,7 @@ def download(url, paper=None): log.debug("HTTP GET {}".format(urldoi)) response = requests.get(urldoi, headers=DEFAULT_HEADERS) - if is_pdf_response(response): + if is_response_pdf(response): log.debug("Got pdf from libgen.") # skip pdfparanoia because it's from libgen diff --git a/paperbot/plugins/__init__.py b/paperbot/plugins/__init__.py index 165ef64..410460d 100644 --- a/paperbot/plugins/__init__.py +++ b/paperbot/plugins/__init__.py @@ -1,2 +1,4 @@ from plugin import Plugin -from sciencedirect import ScienceDirectPlugin + +#seems to be missing +#from sciencedirect import ScienceDirectPlugin diff --git a/paperbot/storage.py b/paperbot/storage.py index 86bebc3..7c98856 100644 --- a/paperbot/storage.py +++ b/paperbot/storage.py @@ -12,7 +12,7 @@ import logging log = logging.getLogger("paperbot.storage") DEFAULT_STORAGE_PATH = "/home/bryan/public_html/papers2/paperbot/" -if not os.path.exists(DEFAULT_STORAGE_PATH): +if not (os.path.exists(DEFAULT_STORAGE_PATH) and os.access(DEFAULT_STORAGE_PATH, os.W_OK)): DEFAULT_STORAGE_PATH = "/tmp/" STORAGE_PATH = os.environ.get("STORAGE_PATH", DEFAULT_STORAGE_PATH) |