X-Git-Url: http://g0dil.de/git?a=blobdiff_plain;f=doclib%2Ffix-links.py;h=fb7450eb4d564cb90ae0d62fe5935e365e513047;hb=61b2e2ea5cb50df90931acf3fcd840493ba762a9;hp=440a33bc77d2ed5a0ca75e0e7a40731526954728;hpb=81447258e6ecc9b5d9434fa5a7d382684179c7ab;p=senf.git diff --git a/doclib/fix-links.py b/doclib/fix-links.py index 440a33b..fb7450e 100644 --- a/doclib/fix-links.py +++ b/doclib/fix-links.py @@ -1,6 +1,32 @@ #!/usr/bin/python - -import sys,os.path,fnmatch, HTMLParser, getopt, re +# +# This tool will hack the doxygen generated documentation to fix link +# errors produced by doxygen. +# +# This works because most anchors doxygen generates are unique 32 char +# hash values. To speed up the operation, the tool will not check all +# the files itself but will let 'linklint' do the grunt +# work. fix-links.py reads the 'errorX.txt' and 'errorAX.txt' files +# generated by linklint. These files list links to missing files +# (errorX.html) and links to missing anchors +# (errorAX.html). fix-links.py works in the following way: +# +# - Build a complete index of all unique anchors found in any html +# file. The index will only include *unique* anchors. Anchors found +# multiple times are removed from the index +# +# - The index is extended to include all unique names of html files +# +# - Scn the linklint result and check the bad links against the +# index. If the file or anchor is found in the index, an accoringly +# corrected link is generated otherwise the link is removed. +# +# One additional twak is, that fix-links.py will successively remove +# initial 'g' charachters from anchors until the link is found in the +# index. Doxygen seems to create links with the wrong number of 'g' +# charachters in front sometimes. + +import sys,os.path,fnmatch, HTMLParser, getopt, re, codecs class HTMLFilter(HTMLParser.HTMLParser): @@ -187,11 +213,11 @@ class LinkFixer: def fix(self, path, target): self._files += 1 - data = file(path).read() + data = codecs.open(path, "r", "utf-8").read() filt = LinkFixer.LinkFilter(self._index, target, "../" * (len(os.path.split(path)[0].split("/"))), - file(path,"w")) + codecs.open(path, "w", "utf-8") ) filt.feed(data) filt.close() found, fixed, removed = filt.stats() @@ -206,8 +232,7 @@ class LinkFixer: (opts, args) = getopt.getopt(sys.argv[1:], "vs:") if len(args) != 2: - sys.stderr.write("""Usage: - fix-links.py [-s skip-dir]... + sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... Process the 'errorX.txt' and 'errorAX.txt' files as generated by 'linklint': Check all invalid links and try to find the correct @@ -256,7 +281,7 @@ total, found, fixed, removed = fixer.stats() if verbose: sys.stderr.write("\nRemoved links:\n") for (anchor, label), files in removed.items(): - sys.stdout.write("%-36.36s %-48.48s %s\n" + sys.stderr.write("%-36.36s %-48.48s %s\n" % ( anchor, "(%s)" % label[:46], " ".join(files.keys())) )