summaryrefslogtreecommitdiff
path: root/paperbot/paper.py
blob: 9d98a9e4ebb46717a1f7dbfe6a373c62153820d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Generic model of a paper.
"""

from modelo import Model
import modelo.trait.trait_types as field

# Mapping between HTML meta attribute names and variables on the Paper model.
meta_attribute_mapping = {
    "citation_journal_title": "journal_title",
    "citation_journal_abbrev": "journal_abbrev",
    "citation_issn": "journal_issn",
    "citation_publisher": "publisher",
    "citation_title": "title",
    "citation_online_date": "online_publication_date",
    "citation_publication_date": "publication_date",
    "citation_volume": "journal_volume",
    "citation_issue": "journal_issue",
    "citation_firstpage": "pagenumber",
    "citation_doi": "doi",
    "citation_abstract_html_url": "url",
    "citation_pdf_url": "pdf_url",
    "citation_language": "language",
}


class Paper(Model):
    """
    Represents collected information about a paper.
    """

    title = field.String()
    doi = field.String()

    abstract = field.String()
    keywords = field.List(field.String())

    # url directly to the pdf from the publisher
    pdf_url = field.String()

    # publisher url
    url = field.String()

    # where the pdf is stored on this server
    file_path_pdf = field.String()

    # where metadata is stored about the paper
    file_path_json = field.String()

    publication_date = field.String()
    online_publication_date = field.String()

    authors = field.List(field.String())

    journal_title = field.String()
    journal_abbrev = field.String()
    journal_volume = field.String()
    journal_issue = field.String()
    journal_issn = field.String()
    pagenumber = field.String()

    publisher = field.String()

    # html from page before downloading pdf
    html = field.String()

    # paper hasn't been stored yet
    stored = field.Bool(default=False)