X-Git-Url: http://g0dil.de/git?a=blobdiff_plain;f=doclib%2Ffix-links.py;h=d19dc5255a8f0ff5b69550f104e77e483555bb41;hb=4e3d3eabb9def43ad4496bff5f7e3bf77ce1719d;hp=fb7450eb4d564cb90ae0d62fe5935e365e513047;hpb=b2e4357d68ec6788b4b336171a9c613d3b2416c7;p=senf.git

diff --git a/doclib/fix-links.py b/doclib/fix-links.py
index fb7450e..d19dc52 100644
--- a/doclib/fix-links.py
+++ b/doclib/fix-links.py
@@ -147,7 +147,7 @@ def stripHTML(s):
     s = s.replace("&nbsp;"," ").replace("\n"," ")
     s = REF_RE.sub("?",s)
     return s.strip()
-    
+
 class LinkFixer:
 
     def __init__(self, skipdirs=('.svn',)):
@@ -155,89 +155,69 @@ class LinkFixer:
 
     def init(self):
         self._index.build()
-        self._files = 0
-        self._found = 0
-        self._fixed = 0
-        self._removed = {}
 
     class LinkFilter(HTMLFilter):
 
-        def __init__(self, index, key, topdir, out):
+        def __init__(self, index, topdir, out):
             HTMLFilter.__init__(self, out)
             self._index = index
-            self._key = key
             self._topdir = topdir
-            self._found = 0
-            self._fixed = 0
-            self._removed = {}
 
-        def _s_A(self, attrs):
-            self._skip_a = False
-            if self._key in dict(attrs).get('href',''):
-                self._found += 1
-                ix = [ i for i, attr in enumerate(attrs) if attr[0] == 'href' ][0]
-                anchor = attrs[ix][1]
-                if '#' in anchor:
-                    anchor = anchor.split('#')[1]
-                    a = anchor
-                    target = None
-                    while not target:
-                        target = self._index[a]
-                        if target:
-                            target = '%s#%s' % (target, a)
-                        elif a.startswith('g'):
-                            a = a[1:]
-                        else:
-                            break
-                else:
-                    anchor = os.path.split(anchor)[1]
-                    target = self._index[anchor]
+        def _check(self, tag, linkAttr, attrs):
+            ix = target = None
+            for i,(k,v) in enumerate(attrs):
+                if k == linkAttr:
+                    ix, target = i, v
+                    break
+            if target:
+                fix = False
+                tdir = anchor = None
+                if '#' in target : target, anchor = target.rsplit('#',1)
+                if '/' in target : tdir, target = target.rsplit('/', 1)
+                newTarget = None
+                if anchor:
+                    newTarget = self.anchorLookup(anchor)
+                if newTarget is None:
+                    newTarget = self.fileLookup(target)
+                if newTarget:
+                    attrs[ix] = (attrs[ix][0], '/'.join((self._topdir, newTarget)))
+            self.emit_starttag(tag, attrs)
+
+        def anchorLookup(self,anchor):
+            target = None
+            while not target:
+                target = self._index[anchor]
                 if target:
-                    self._fixed += 1
-                    attrs[ix] = ('href', os.path.join(self._topdir,target))
+                    target = '%s#%s' % (target, anchor)
+                elif anchor.startswith('g'):
+                    anchor = anchor[1:]
                 else:
-                    self._removed[anchor] = {}
-                    self._collectFor = anchor
-                    self.startCollect()
-                    return
-            self.emit_starttag('a',attrs)
-
-        def _e_A(self):
-            if self.collecting():
-                self._removed[self._collectFor][stripHTML(self.endCollect())] = None
-            else:
-                self.emit_endtag('a')
-
-        def stats(self):
-            return (self._found, self._fixed, self._removed)
-
-    def fix(self, path, target):
-        self._files += 1
+                    break
+            return target
+
+        def fileLookup(self,target):
+            return self._index[target]
+
+        def _s_A(self, attrs):
+            self._check('a', 'href', attrs)
+
+        def _s_AREA(self, attrs):
+            self._check('area', 'href', attrs)
+
+    def fix(self, path):
         data = codecs.open(path, "r", "utf-8").read()
         filt = LinkFixer.LinkFilter(self._index,
-                                    target,
-                                    "../" * (len(os.path.split(path)[0].split("/"))),
+                                    ("../" * (len(os.path.split(path)[0].split("/"))))[:-1],
                                     codecs.open(path, "w", "utf-8") )
         filt.feed(data)
         filt.close()
-        found, fixed, removed = filt.stats()
-        self._found += found
-        self._fixed += fixed
-        for anchor, labels in removed.items():
-            for label in labels.keys():
-                self._removed.setdefault((anchor,label),{})[path] = None
-
-    def stats(self):
-        return (self._files, self._found, self._fixed, self._removed)
-    
+
 (opts, args) = getopt.getopt(sys.argv[1:], "vs:")
-if len(args) != 2:
-    sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... <errrorX.txt> <errorAX.txt>
+if len(args) != 0:
+    sys.stderr.write("""Usage: fix-links.py [-s skip-dir]...
 
-Process the 'errorX.txt' and 'errorAX.txt' files as generated by
-'linklint': Check all invalid links and try to find the correct
-target. If a target is found, the link is changed accordingly,
-otherwise the link is removed.
+Check all links and try to find the correct target. If a target is
+found, the link is changed accordingly, otherwise the link is removed.
 
 To find anchors, fix-links.py generates a complete index of all
 anchors defined in any HTML file in the current directory or some
@@ -249,46 +229,14 @@ not be scanned for '*.html' files.
 skipdirs = [ val for opt, val in opts if opt == '-s' ]
 verbose = ( '-v', '' ) in opts
 
-if not os.path.exists(args[0]) and not os.path.exists(args[1]):
-    # No bad links to nothing to do
-    sys.exit(0)
-
 fixer = LinkFixer(skipdirs)
 fixer.init()
 
-target = None
-
-if os.path.exists(args[0]):
-    for l in file(args[0]):
-        l = l.rstrip()
-        if l.startswith('/'):
-            target = '#' + os.path.split(l)[1]
-        elif l.startswith('    /') and not l.endswith('/'):
-            sys.stderr.write("%s\n" % l)
-            fixer.fix(l[5:], target)
-
-if os.path.exists(args[1]):
-    for l in file(args[1]):
-        l = l.rstrip()
-        if l.startswith('/'):
-            target = l.split('#')[1]
-        elif l.startswith('    /') and not l.endswith('/'):
-            sys.stderr.write("%s\n" % l)
-            fixer.fix(l[5:], target)
-
-total, found, fixed, removed = fixer.stats()
-
-if verbose:
-    sys.stderr.write("\nRemoved links:\n")
-    for (anchor, label), files in removed.items():
-        sys.stderr.write("%-36.36s %-48.48s %s\n"
-                         % ( anchor,
-                             "(%s)" % label[:46],
-                             " ".join(files.keys())) )
-
-sys.stderr.write("""
-Files processed : %5d
-Links processed : %5d
-Links fixed     : %5d
-Links removed   : %5d
-""" % (total, found, fixed, found-fixed))
+for dirname, subdirs, files in os.walk('.'):
+    for d in skipdirs:
+        if d in subdirs:
+            subdirs.remove(d)
+    for f in fnmatch.filter(files,'*.html'):
+        path = os.path.normpath(os.path.join(dirname, f))
+        print path
+        fixer.fix(path)