X-Git-Url: http://g0dil.de/git?a=blobdiff_plain;f=doclib%2Ffix-links.py;h=d19dc5255a8f0ff5b69550f104e77e483555bb41;hb=4e3d3eabb9def43ad4496bff5f7e3bf77ce1719d;hp=fb7450eb4d564cb90ae0d62fe5935e365e513047;hpb=b2e4357d68ec6788b4b336171a9c613d3b2416c7;p=senf.git diff --git a/doclib/fix-links.py b/doclib/fix-links.py index fb7450e..d19dc52 100644 --- a/doclib/fix-links.py +++ b/doclib/fix-links.py @@ -147,7 +147,7 @@ def stripHTML(s): s = s.replace(" "," ").replace("\n"," ") s = REF_RE.sub("?",s) return s.strip() - + class LinkFixer: def __init__(self, skipdirs=('.svn',)): @@ -155,89 +155,69 @@ class LinkFixer: def init(self): self._index.build() - self._files = 0 - self._found = 0 - self._fixed = 0 - self._removed = {} class LinkFilter(HTMLFilter): - def __init__(self, index, key, topdir, out): + def __init__(self, index, topdir, out): HTMLFilter.__init__(self, out) self._index = index - self._key = key self._topdir = topdir - self._found = 0 - self._fixed = 0 - self._removed = {} - def _s_A(self, attrs): - self._skip_a = False - if self._key in dict(attrs).get('href',''): - self._found += 1 - ix = [ i for i, attr in enumerate(attrs) if attr[0] == 'href' ][0] - anchor = attrs[ix][1] - if '#' in anchor: - anchor = anchor.split('#')[1] - a = anchor - target = None - while not target: - target = self._index[a] - if target: - target = '%s#%s' % (target, a) - elif a.startswith('g'): - a = a[1:] - else: - break - else: - anchor = os.path.split(anchor)[1] - target = self._index[anchor] + def _check(self, tag, linkAttr, attrs): + ix = target = None + for i,(k,v) in enumerate(attrs): + if k == linkAttr: + ix, target = i, v + break + if target: + fix = False + tdir = anchor = None + if '#' in target : target, anchor = target.rsplit('#',1) + if '/' in target : tdir, target = target.rsplit('/', 1) + newTarget = None + if anchor: + newTarget = self.anchorLookup(anchor) + if newTarget is None: + newTarget = self.fileLookup(target) + if newTarget: + attrs[ix] = (attrs[ix][0], '/'.join((self._topdir, newTarget))) + self.emit_starttag(tag, attrs) + + def anchorLookup(self,anchor): + target = None + while not target: + target = self._index[anchor] if target: - self._fixed += 1 - attrs[ix] = ('href', os.path.join(self._topdir,target)) + target = '%s#%s' % (target, anchor) + elif anchor.startswith('g'): + anchor = anchor[1:] else: - self._removed[anchor] = {} - self._collectFor = anchor - self.startCollect() - return - self.emit_starttag('a',attrs) - - def _e_A(self): - if self.collecting(): - self._removed[self._collectFor][stripHTML(self.endCollect())] = None - else: - self.emit_endtag('a') - - def stats(self): - return (self._found, self._fixed, self._removed) - - def fix(self, path, target): - self._files += 1 + break + return target + + def fileLookup(self,target): + return self._index[target] + + def _s_A(self, attrs): + self._check('a', 'href', attrs) + + def _s_AREA(self, attrs): + self._check('area', 'href', attrs) + + def fix(self, path): data = codecs.open(path, "r", "utf-8").read() filt = LinkFixer.LinkFilter(self._index, - target, - "../" * (len(os.path.split(path)[0].split("/"))), + ("../" * (len(os.path.split(path)[0].split("/"))))[:-1], codecs.open(path, "w", "utf-8") ) filt.feed(data) filt.close() - found, fixed, removed = filt.stats() - self._found += found - self._fixed += fixed - for anchor, labels in removed.items(): - for label in labels.keys(): - self._removed.setdefault((anchor,label),{})[path] = None - - def stats(self): - return (self._files, self._found, self._fixed, self._removed) - + (opts, args) = getopt.getopt(sys.argv[1:], "vs:") -if len(args) != 2: - sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... +if len(args) != 0: + sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... -Process the 'errorX.txt' and 'errorAX.txt' files as generated by -'linklint': Check all invalid links and try to find the correct -target. If a target is found, the link is changed accordingly, -otherwise the link is removed. +Check all links and try to find the correct target. If a target is +found, the link is changed accordingly, otherwise the link is removed. To find anchors, fix-links.py generates a complete index of all anchors defined in any HTML file in the current directory or some @@ -249,46 +229,14 @@ not be scanned for '*.html' files. skipdirs = [ val for opt, val in opts if opt == '-s' ] verbose = ( '-v', '' ) in opts -if not os.path.exists(args[0]) and not os.path.exists(args[1]): - # No bad links to nothing to do - sys.exit(0) - fixer = LinkFixer(skipdirs) fixer.init() -target = None - -if os.path.exists(args[0]): - for l in file(args[0]): - l = l.rstrip() - if l.startswith('/'): - target = '#' + os.path.split(l)[1] - elif l.startswith(' /') and not l.endswith('/'): - sys.stderr.write("%s\n" % l) - fixer.fix(l[5:], target) - -if os.path.exists(args[1]): - for l in file(args[1]): - l = l.rstrip() - if l.startswith('/'): - target = l.split('#')[1] - elif l.startswith(' /') and not l.endswith('/'): - sys.stderr.write("%s\n" % l) - fixer.fix(l[5:], target) - -total, found, fixed, removed = fixer.stats() - -if verbose: - sys.stderr.write("\nRemoved links:\n") - for (anchor, label), files in removed.items(): - sys.stderr.write("%-36.36s %-48.48s %s\n" - % ( anchor, - "(%s)" % label[:46], - " ".join(files.keys())) ) - -sys.stderr.write(""" -Files processed : %5d -Links processed : %5d -Links fixed : %5d -Links removed : %5d -""" % (total, found, fixed, found-fixed)) +for dirname, subdirs, files in os.walk('.'): + for d in skipdirs: + if d in subdirs: + subdirs.remove(d) + for f in fnmatch.filter(files,'*.html'): + path = os.path.normpath(os.path.join(dirname, f)) + print path + fixer.fix(path)