# index. Doxygen seems to create links with the wrong number of 'g'
# charachters in front sometimes.
-import sys,os.path,fnmatch, HTMLParser, getopt, re
+import sys,os.path,fnmatch, HTMLParser, getopt, re, codecs
class HTMLFilter(HTMLParser.HTMLParser):
s = s.replace(" "," ").replace("\n"," ")
s = REF_RE.sub("?",s)
return s.strip()
-
+
class LinkFixer:
def __init__(self, skipdirs=('.svn',)):
def init(self):
self._index.build()
- self._files = 0
- self._found = 0
- self._fixed = 0
- self._removed = {}
class LinkFilter(HTMLFilter):
- def __init__(self, index, key, topdir, out):
+ def __init__(self, index, topdir, out):
HTMLFilter.__init__(self, out)
self._index = index
- self._key = key
self._topdir = topdir
- self._found = 0
- self._fixed = 0
- self._removed = {}
- def _s_A(self, attrs):
- self._skip_a = False
- if self._key in dict(attrs).get('href',''):
- self._found += 1
- ix = [ i for i, attr in enumerate(attrs) if attr[0] == 'href' ][0]
- anchor = attrs[ix][1]
- if '#' in anchor:
- anchor = anchor.split('#')[1]
- a = anchor
- target = None
- while not target:
- target = self._index[a]
- if target:
- target = '%s#%s' % (target, a)
- elif a.startswith('g'):
- a = a[1:]
- else:
- break
- else:
- anchor = os.path.split(anchor)[1]
- target = self._index[anchor]
+ def _check(self, tag, linkAttr, attrs):
+ ix = target = None
+ for i,(k,v) in enumerate(attrs):
+ if k == linkAttr:
+ ix, target = i, v
+ break
+ if target:
+ fix = False
+ tdir = anchor = None
+ if '#' in target : target, anchor = target.rsplit('#',1)
+ if '/' in target : tdir, target = target.rsplit('/', 1)
+ newTarget = None
+ if anchor:
+ newTarget = self.anchorLookup(anchor)
+ if newTarget is None:
+ newTarget = self.fileLookup(target)
+ if newTarget and anchor:
+ newTarget = '%s#%s' % (newTarget, anchor)
+ if newTarget:
+ attrs[ix] = (attrs[ix][0], '/'.join((self._topdir, newTarget)))
+ self.emit_starttag(tag, attrs)
+
+ def anchorLookup(self,anchor):
+ target = None
+ while not target:
+ target = self._index[anchor]
if target:
- self._fixed += 1
- attrs[ix] = ('href', os.path.join(self._topdir,target))
+ target = '%s#%s' % (target, anchor)
+ elif anchor.startswith('g'):
+ anchor = anchor[1:]
else:
- self._removed[anchor] = {}
- self._collectFor = anchor
- self.startCollect()
- return
- self.emit_starttag('a',attrs)
-
- def _e_A(self):
- if self.collecting():
- self._removed[self._collectFor][stripHTML(self.endCollect())] = None
- else:
- self.emit_endtag('a')
-
- def stats(self):
- return (self._found, self._fixed, self._removed)
-
- def fix(self, path, target):
- self._files += 1
- data = file(path).read()
+ break
+ return target
+
+ def fileLookup(self,target):
+ return self._index[target]
+
+ def _s_A(self, attrs):
+ self._check('a', 'href', attrs)
+
+ def _s_AREA(self, attrs):
+ self._check('area', 'href', attrs)
+
+ def fix(self, path):
+ data = codecs.open(path, "r", "utf-8").read()
filt = LinkFixer.LinkFilter(self._index,
- target,
- "../" * (len(os.path.split(path)[0].split("/"))),
- file(path,"w"))
+ ("../" * (len(os.path.split(path)[0].split("/"))))[:-1],
+ codecs.open(path, "w", "utf-8") )
filt.feed(data)
filt.close()
- found, fixed, removed = filt.stats()
- self._found += found
- self._fixed += fixed
- for anchor, labels in removed.items():
- for label in labels.keys():
- self._removed.setdefault((anchor,label),{})[path] = None
-
- def stats(self):
- return (self._files, self._found, self._fixed, self._removed)
-
+
(opts, args) = getopt.getopt(sys.argv[1:], "vs:")
-if len(args) != 2:
- sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... <errrorX.txt> <errorAX.txt>
+if len(args) != 0:
+ sys.stderr.write("""Usage: fix-links.py [-s skip-dir]...
-Process the 'errorX.txt' and 'errorAX.txt' files as generated by
-'linklint': Check all invalid links and try to find the correct
-target. If a target is found, the link is changed accordingly,
-otherwise the link is removed.
+Check all links and try to find the correct target. If a target is
+found, the link is changed accordingly, otherwise the link is removed.
To find anchors, fix-links.py generates a complete index of all
anchors defined in any HTML file in the current directory or some
skipdirs = [ val for opt, val in opts if opt == '-s' ]
verbose = ( '-v', '' ) in opts
-if not os.path.exists(args[0]) and not os.path.exists(args[1]):
- # No bad links to nothing to do
- sys.exit(0)
-
fixer = LinkFixer(skipdirs)
fixer.init()
-target = None
-
-if os.path.exists(args[0]):
- for l in file(args[0]):
- l = l.rstrip()
- if l.startswith('/'):
- target = '#' + os.path.split(l)[1]
- elif l.startswith(' /') and not l.endswith('/'):
- sys.stderr.write("%s\n" % l)
- fixer.fix(l[5:], target)
-
-if os.path.exists(args[1]):
- for l in file(args[1]):
- l = l.rstrip()
- if l.startswith('/'):
- target = l.split('#')[1]
- elif l.startswith(' /') and not l.endswith('/'):
- sys.stderr.write("%s\n" % l)
- fixer.fix(l[5:], target)
-
-total, found, fixed, removed = fixer.stats()
-
-if verbose:
- sys.stderr.write("\nRemoved links:\n")
- for (anchor, label), files in removed.items():
- sys.stderr.write("%-36.36s %-48.48s %s\n"
- % ( anchor,
- "(%s)" % label[:46],
- " ".join(files.keys())) )
-
-sys.stderr.write("""
-Files processed : %5d
-Links processed : %5d
-Links fixed : %5d
-Links removed : %5d
-""" % (total, found, fixed, found-fixed))
+for dirname, subdirs, files in os.walk('.'):
+ for d in skipdirs:
+ if d in subdirs:
+ subdirs.remove(d)
+ for f in fnmatch.filter(files,'*.html'):
+ path = os.path.normpath(os.path.join(dirname, f))
+ print path
+ fixer.fix(path)