initial commit

author: Bryan Bishop <kanzure@gmail.com> 2013-01-07 22:27:46 -0800
committer: Bryan Bishop <kanzure@gmail.com> 2013-01-07 22:27:46 -0800
commit: 8d930c95d3e65c4362ee0351948f88c6f6d19f07 (patch)
tree: d73f5cc3cf436b58e70b4f140bb86aa91d959977
download: paperbot-8d930c95d3e65c4362ee0351948f88c6f6d19f07.tar.gz
paperbot-8d930c95d3e65c4362ee0351948f88c6f6d19f07.zip
2 files changed, 134 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9d8a08b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+# precompiled python
+*.pyc
+
+# editor leftovers
+*~
+.*.sw*
+
diff --git a/modules/papers.py b/modules/papers.py
new file mode 100644
index 0000000..3009dd0
--- /dev/null
+++ b/modules/papers.py
@@ -0,0 +1,127 @@
+"""
+Fetches papers.
+"""
+
+import os
+import json
+import requests
+
+def download(phenny, input, verbose=True):
+    """
+    Downloads a paper.
+    """
+    # only accept requests in a channel
+    if not input.sender.startswith('#'):
+        # unless the user is an admin, of course
+        if not input.admin:
+            phenny.say("i only take requests in the ##hplusroadmap channel.")
+            return
+        else:
+            # just give a warning message to the admin.. not a big deal.
+            phenny.say("okay i'll try, but please send me requests in ##hplusroadmap in the future.")
+
+    # get the input
+    line = input.group()
+
+    # was this an explicit command?
+    explicit = False
+    if line.startswith(phenny.nick):
+        explicit = True
+        line = line[len(phenny.nick):]
+
+        if line.startswith(",") or line.startswith(":"):
+            line = line[1:]
+
+    if line.startswith(" "):
+        line = line.strip()
+
+    # don't bother if there's nothing there
+    if len(line) < 5 or (not "http://" in line and not "https://" in line) or not line.startswith("http"):
+        return
+
+    translation_url = "http://localhost:1969/web"
+
+    headers = {
+        "Content-Type": "application/json",
+    }
+    
+    data = {
+        "url": line,
+        "sessionid": "what"
+    }
+
+    data = json.dumps(data)
+
+    response = requests.post(translation_url, data=data, headers=headers)
+
+    if response.status_code == 200:
+        # see if there are any attachments
+        content = json.loads(response.content)
+        item = content[0]
+        title = item["title"]
+        
+        if item.has_key("attachments"):
+            pdf_url = None
+            for attachment in item["attachments"]:
+                if attachment.has_key("mimeType") and "application/pdf" in attachment["mimeType"]:
+                    pdf_url = attachment["url"]
+                    break
+
+            if pdf_url:
+                user_agent = "Mozilla/5.0 (X11; Linux i686 (x86_64)) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11"
+
+                headers = {
+                    "User-Agent": user_agent,
+                }
+
+                response = None
+                if pdf_url.startswith("https://"):
+                    response = requests.get(pdf_url, headers=headers, verify=False)
+                else:
+                    response = requests.get(pdf_url, headers=headers)
+
+                # detect failure
+                if response.status_code == 401:
+                    phenny.say("HTTP 401 unauthorized " + str(pdf_url))
+                    return
+                elif response.status_code != 200:
+                    phenny.say("HTTP " + str(response.status_code) + " " + str(pdf_url))
+                    return
+
+                data = response.content
+
+                path = os.path.join("/home/bryan/public_html/papers2/paperbot/", title + ".pdf")
+
+                file_handler = open(path, "w")
+                file_handler.write(data)
+                file_handler.close()
+
+                # grr..
+                title = title.encode("ascii", "ignore")
+
+                filename = requests.utils.quote(title)
+                url = "http://diyhpl.us/~bryan/papers2/paperbot/" + filename + ".pdf"
+                
+                phenny.say(url)
+                return
+            elif verbose and explicit:
+                phenny.say("error: didn't find any pdfs on " + line)
+                return
+        elif verbose and explicit:
+            phenny.say("error: dunno how to find the pdf on " + line)
+            return
+    elif verbose and explicit:
+        if response.status_code == 501:
+            if verbose:
+                phenny.say("error: HTTP " + str(response.status_code) + " " + line + " (battle station not fully operational)")
+                return
+        else:
+            if verbose:
+                phenny.say("error: HTTP " + str(response.status_code) + " " + line)
+                return
+    else:
+        return
+download.commands = ["fetch", "get", "download"]
+download.priority = "high"
+download.rule = r'(.*)'
+
author	Bryan Bishop <kanzure@gmail.com>	2013-01-07 22:27:46 -0800
committer	Bryan Bishop <kanzure@gmail.com>	2013-01-07 22:27:46 -0800
commit	8d930c95d3e65c4362ee0351948f88c6f6d19f07 (patch)
tree	d73f5cc3cf436b58e70b4f140bb86aa91d959977
download	paperbot-8d930c95d3e65c4362ee0351948f88c6f6d19f07.tar.gz paperbot-8d930c95d3e65c4362ee0351948f88c6f6d19f07.zip