import os, sys, sgmllib, cookielib, urllib, htmlentitydefs if len(sys.argv) > 1: ref = sys.argv[1] else: ref = "../html/gcode.html" if len(sys.argv) > 2: targets = sys.argv[2:] else: targets = None def get(attr, attrs, default=""): attr = attr.lower() for k, v in attrs: if k.lower() == attr: return v return default class MetaHandler: def do_meta(self, attrs): equiv = get("http-equiv", attrs) content = get("content", attrs) if equiv != "content-type": return attrs = cookielib.split_header_words([content])[0] encoding = get("charset", attrs) if encoding == "ASCII": encoding = "ISO-8859-1" if encoding: self.encoding = encoding class get_refs(sgmllib.SGMLParser, MetaHandler): entitydefs = htmlentitydefs.entitydefs def __init__(self, verbose=0): sgmllib.SGMLParser.__init__(self, verbose) self.refs = set() self.encoding = None def do_a(self, attrs): href = get('href', attrs) if self.encoding: href = href.decode(self.encoding) href = urllib.unquote(href) self.refs.add(href) class get_anchors(sgmllib.SGMLParser, MetaHandler): entitydefs = htmlentitydefs.entitydefs def __init__(self, verbose=0): sgmllib.SGMLParser.__init__(self, verbose) self.anchors = set() self.encoding = None def unknown_starttag(self, tag, attrs): id = get('id', attrs) if id: self.do_a([('name', id)]) def unknown_endtag(self, tag): pass def do_a(self, attrs): name = get('name', attrs, get('id', attrs)) if self.encoding: name = name.decode(self.encoding) name = urllib.unquote(name) if name: self.anchors.add(name) _anchors = {} def get_anchors_cached(filename): if filename not in _anchors: a = get_anchors() a.feed(open(filename).read()) _anchors[filename] = a.anchors return _anchors[filename] def resolve_file(src, target): if "#" in target: a, b = target.split("#", 1) else: a, b = target, None a = a or src return os.path.join(os.path.dirname(ref), a), b def resolve(target, anchor): if not anchor: return True anchors = get_anchors_cached(target) return anchor in anchors refs = get_refs() refs.feed(open(ref).read()) refs = refs.refs missing_anchor = set() missing_file = set() unlisted_targets = set() good = set() for r in refs: target, anchor = resolve_file(ref, r) if targets and not target in targets: unlisted_targets.add(target) elif not os.path.exists(target): missing_file.add(r) elif not resolve(target, anchor): missing_anchor.add(r) else: good.add(r) if missing_file: print "Files linked to in %s but could not be found:" % ( os.path.basename(ref),) for i in sorted(missing_file): print "\t%r" % i if missing_anchor: print "Anchors used in %s but not defined in linked file:" % ( os.path.basename(ref),) for i in sorted(missing_anchor): print "\t%r" % i if unlisted_targets: print "Links to files not listed as targets:" for i in sorted(unlisted_targets): print "\t%r" % i print "If all link targets are not listed in the Submakefile, then the results of this program is unreliable." print "Good links: %d/%d" % (len(good), len(refs)) if missing_anchor or missing_file or unlisted_targets: raise SystemExit, 1