summaryrefslogtreecommitdiff
path: root/paperbot/storage.py
blob: 7c98856722d61f205607694272172c2ee4730251 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
Store a paper to the file system.
"""

import os
import json
import random
import hashlib
import shutil

import logging
log = logging.getLogger("paperbot.storage")

DEFAULT_STORAGE_PATH = "/home/bryan/public_html/papers2/paperbot/"
if not (os.path.exists(DEFAULT_STORAGE_PATH) and os.access(DEFAULT_STORAGE_PATH, os.W_OK)):
    DEFAULT_STORAGE_PATH = "/tmp/"
STORAGE_PATH = os.environ.get("STORAGE_PATH", DEFAULT_STORAGE_PATH)


def make_random_string(bits=128):
    """
    Make a random string suitable as a filename.
    """
    return "%0.2x" % random.getrandbits(bits)


def make_hash(content):
    """
    Calculate md5 sum of the content.
    """
    md5sum = hashlib.md5()
    md5sum.update(content)
    return md5sum.hexdigest()


def make_pdf_filename(paper, pdfcontent=None):
    """
    Construct a filename for the pdf of this paper.
    """
    if paper.title in ["", None]:
        if pdfcontent:
            paper.title = make_hash(pdfcontent)
        else:
            paper.title = make_random_string()

    pdf_filename = "{}.pdf".format(paper.title)

    # don't create directories
    pdf_filename = pdf_filename.replace("/", "_")

    return pdf_filename


def make_full_path(filename, storage_path=STORAGE_PATH):
    """
    Construct a full path including the filename.
    """
    return os.path.join(storage_path, filename)


def store_json(paper, storage_path=STORAGE_PATH):
    """
    Store paper metadata somewhere.
    """
    if not paper.file_path_json or paper.file_path_json in [None, ""]:
        name = make_random_string()
        filename = name + ".json"
        jsonpath = make_full_path(filename, storage_path=storage_path)
    else:
        jsonpath = paper.file_path_json

    # may be a new path, store it
    paper.file_path_json = jsonpath

    # convert dict data to json
    output = json.dumps(paper.to_dict())

    log.debug("Storing paper metadata to {}".format(jsonpath))
    with open(jsonpath, "w") as jsonfile:
        jsonfile.write(output)

    return jsonpath


def store_logs(paper, templogpath):
    """
    Store logs near other paper files. Return the path.
    """
    jsonpath = paper.file_path_json
    filename = jsonpath[0:-5]

    # compute the desired log file path
    desiredpath = os.path.abspath(filename + ".log")

    # move the log file into position
    log.debug("Moving log from {} to {}".format(templogpath, desiredpath))
    shutil.move(templogpath, desiredpath)

    return desiredpath


def store(paper, pdfcontent=None):
    """
    Save a paper to the file system.

    Returns a tuple of (json_path, pdf_path).
    """
    log.debug("Storing the paper.")

    if pdfcontent is None:
        pdfcontent = paper.pdf

    pdf_filename = make_pdf_filename(paper, pdfcontent)
    pdf_path = make_full_path(pdf_filename)
    paper.file_path_pdf = pdf_path

    with open(pdf_path, "w") as pdfdoc:
        log.debug("Storing pdf to {}".format(pdf_filename))
        pdfdoc.write(pdfcontent)

    json_filename = pdf_filename + ".json"
    json_path = make_full_path(json_filename)
    paper.file_path_json = json_path
    jsondata = json.dumps(paper.to_dict())

    paper.stored = True

    with open(json_path, "w") as jsondoc:
        log.debug("Storing paper metadata at {}".format(json_filename))
        jsondoc.write(jsondata)

    return (json_path, pdf_path)