summaryrefslogtreecommitdiff
path: root/docs/src/checklinks.py
blob: fa287ad80d541a07f457b92668d74d170045ad0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os, sys, sgmllib, cookielib, urllib, htmlentitydefs

if len(sys.argv) > 1:
    ref = sys.argv[1]
else:
    ref = "../html/gcode.html"

if len(sys.argv) > 2:
    targets = sys.argv[2:]
else:
    targets = None

def get(attr, attrs, default=""):
    attr = attr.lower()
    for k, v in attrs:
        if k.lower() == attr: return v
    return default

class MetaHandler:
    def do_meta(self,  attrs):
        equiv = get("http-equiv", attrs)
        content = get("content", attrs)
        if equiv != "content-type": return
        attrs = cookielib.split_header_words([content])[0]
        encoding = get("charset", attrs)
        if encoding == "ASCII": encoding = "ISO-8859-1"
        if encoding: self.encoding = encoding

class get_refs(sgmllib.SGMLParser, MetaHandler):
    entitydefs = htmlentitydefs.entitydefs

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.refs = set()
        self.encoding = None

    def do_a(self, attrs):
        href = get('href', attrs)
        if self.encoding:
            href = href.decode(self.encoding)
        href = urllib.unquote(href)
	self.refs.add(href)

class get_anchors(sgmllib.SGMLParser, MetaHandler):
    entitydefs = htmlentitydefs.entitydefs

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.anchors = set()
        self.encoding = None

    def unknown_starttag(self, tag, attrs):
        id = get('id', attrs)
        if id:
            self.do_a([('name', id)])

    def unknown_endtag(self, tag): pass

    def do_a(self, attrs):
        name = get('name', attrs, get('id', attrs))
        if self.encoding:
            name = name.decode(self.encoding)
        name = urllib.unquote(name)
        if name:
            self.anchors.add(name)

_anchors = {}
def get_anchors_cached(filename):
    if filename not in _anchors:
	a = get_anchors()
	a.feed(open(filename).read())
	_anchors[filename] = a.anchors
    return _anchors[filename]

def resolve_file(src, target):
    if "#" in target:
	a, b = target.split("#", 1)
    else:
	a, b = target, None

    a = a or src

    return os.path.join(os.path.dirname(ref), a), b

def resolve(target, anchor):
    if not anchor: return True

    anchors = get_anchors_cached(target)
    return anchor in anchors

refs = get_refs()
refs.feed(open(ref).read())
refs = refs.refs

missing_anchor = set()
missing_file = set()
unlisted_targets = set()
good = set()
for r in refs:
    target, anchor = resolve_file(ref, r)
    if targets and not target in targets:
	unlisted_targets.add(target)
    elif not os.path.exists(target):
	missing_file.add(r)
    elif not resolve(target, anchor):
	missing_anchor.add(r)
    else:
	good.add(r)

if missing_file:
    print "Files linked to in %s but could not be found:" % (
        os.path.basename(ref),)
    for i in sorted(missing_file):
        print "\t%r" % i
if missing_anchor:
    print "Anchors used in %s but not defined in linked file:" % (
        os.path.basename(ref),)
    for i in sorted(missing_anchor):
        print "\t%r" % i
if unlisted_targets:
    print "Links to files not listed as targets:"
    for i in sorted(unlisted_targets):
	print "\t%r" % i
    print "If all link targets are not listed in the Submakefile, then the results of this program is unreliable."
print "Good links: %d/%d" % (len(good), len(refs))
if missing_anchor or missing_file or unlisted_targets:
    raise SystemExit, 1