From: g0dil Date: Sat, 6 Nov 2010 13:38:09 +0000 (+0000) Subject: Extend fixlinks to fix the completely broken cross-linking in maverick X-Git-Url: http://g0dil.de/git?p=senf.git;a=commitdiff_plain;h=4e3d3eabb9def43ad4496bff5f7e3bf77ce1719d Extend fixlinks to fix the completely broken cross-linking in maverick git-svn-id: https://svn.berlios.de/svnroot/repos/senf/trunk@1744 270642c3-0616-0410-b53a-bc976706d245 --- diff --git a/doclib/SConscript b/doclib/SConscript index b83c09c..7a396bf 100644 --- a/doclib/SConscript +++ b/doclib/SConscript @@ -2,39 +2,39 @@ # # The documentation generation process is tightly integrated with the # scons build framework: -# +# # * SCons analyzes the Doxyfile's to find all the documentation # dependencies. This happens in the doxygen builder in # senfscons/Doxygen.py. # # * the doclib/doxy-header.html and/or doclib/doxy-footer.html files # are regenerated -# +# # * If any documentation is out-of-date with respect to it's source # files, the documentation is regenerated. -# +# # * To fix some link errors, the additional 'linklint' and 'fixlinks' # targets are used # -# +# # 1. Scanning the Doxyfile's -# +# # The doxygen builder scans all documentation source files which have # the text 'doxyfile' in any case in their name. It understands # @INCLUDE directives and will find all the dependencies of the # documentation: -# +# # * All the source files as selected by INPUT, INPUT_PATTERN, # RECURSIVE and so on. -# +# # * Any referenced tag-files -# +# # * Documentation header and/or footer -# +# # * The INPUT_FILTER program -# +# # * Any included doxygen configuration files -# +# # # 2. Regenerating header and/or footer # @@ -43,9 +43,9 @@ # using a simple python based templating system called yaptu which is # included in site_scons/lib/. # -# +# # 3. Calling doxygen -# +# # The doxygen call itself is quite complex since there is some pre- # and post-processing going on. We can separate this step into two # steps @@ -65,7 +65,7 @@ # 3.2. The main doxygen build (Doxygen builder) # # The Doxygen builder will call the doxygen command to build the -# documentation. +# documentation. # # The doxygen command is configured as 'site_scon/lib/doxygen.sh' which # does some additional processing in addition to calling doxygen @@ -74,7 +74,7 @@ # * it sets environment variables depending on command line arguments. # These variables are then used in the Doxyfile's # -# * after doxygen is finished, 'installdox' is called to resolve +# * after doxygen is finished, 'installdox' is called to resolve # tag file references. # # * the HTML documentation is post-processed using some sed, tidy, and @@ -136,13 +136,13 @@ def modules(): pathbase = env.Dir('#/senf').abspath pathbasel = len(pathbase)+1 for module in env.Alias('all_docs')[0].sources: - if module.name != 'html.stamp' : continue + if module.name != 'html.stamp' : continue if not module.dir.dir.dir.abspath.startswith(pathbase): continue mods[module.dir.dir.dir.abspath] = [ module.dir.dir.dir.abspath[pathbasel:].replace('/','_'), module.dir.dir.dir.name, module.dir.abspath[pathbasel:], 0 ] - + rv = [] keys = mods.keys() keys.sort() @@ -197,7 +197,7 @@ div.tabs li.$projectname a { background-color: #EDE497; }

SENF Extensible Network Framework

@@ -269,14 +269,10 @@ env.Append( ENV = { env.PhonyTarget('linklint', [], [ 'rm -rf doc/linklint', 'linklint -doc doc/linklint -limit 99999999 `find -type d -name html -printf "/%P/@ "`', - '[ ! -r doc/linklint/errorX.html ] || python doclib/linklint_addnames.py doc/linklint/errorX.html.new', - '[ ! -r doc/linklint/errorX.html.new ] || mv doc/linklint/errorX.html.new doc/linklint/errorX.html', - '[ ! -r doc/linklint/errorAX.html ] || python doclib/linklint_addnames.py doc/linklint/errorAX.html.new', - '[ ! -r doc/linklint/errorAX.html.new ] || mv doc/linklint/errorAX.html.new doc/linklint/errorAX.html', ]) env.PhonyTarget('fixlinks', [], [ - 'python doclib/fix-links.py -v -s .svn -s linklint -s debian doc/linklint/errorX.txt doc/linklint/errorAX.txt', + 'python doclib/fix-links.py -v -s .svn -s linklint -s debian -s doclib -s search', ]) header = env.Command('doxy-header.html', 'SConscript', writeTemplate, diff --git a/doclib/fix-links.py b/doclib/fix-links.py index fb7450e..d19dc52 100644 --- a/doclib/fix-links.py +++ b/doclib/fix-links.py @@ -147,7 +147,7 @@ def stripHTML(s): s = s.replace(" "," ").replace("\n"," ") s = REF_RE.sub("?",s) return s.strip() - + class LinkFixer: def __init__(self, skipdirs=('.svn',)): @@ -155,89 +155,69 @@ class LinkFixer: def init(self): self._index.build() - self._files = 0 - self._found = 0 - self._fixed = 0 - self._removed = {} class LinkFilter(HTMLFilter): - def __init__(self, index, key, topdir, out): + def __init__(self, index, topdir, out): HTMLFilter.__init__(self, out) self._index = index - self._key = key self._topdir = topdir - self._found = 0 - self._fixed = 0 - self._removed = {} - def _s_A(self, attrs): - self._skip_a = False - if self._key in dict(attrs).get('href',''): - self._found += 1 - ix = [ i for i, attr in enumerate(attrs) if attr[0] == 'href' ][0] - anchor = attrs[ix][1] - if '#' in anchor: - anchor = anchor.split('#')[1] - a = anchor - target = None - while not target: - target = self._index[a] - if target: - target = '%s#%s' % (target, a) - elif a.startswith('g'): - a = a[1:] - else: - break - else: - anchor = os.path.split(anchor)[1] - target = self._index[anchor] + def _check(self, tag, linkAttr, attrs): + ix = target = None + for i,(k,v) in enumerate(attrs): + if k == linkAttr: + ix, target = i, v + break + if target: + fix = False + tdir = anchor = None + if '#' in target : target, anchor = target.rsplit('#',1) + if '/' in target : tdir, target = target.rsplit('/', 1) + newTarget = None + if anchor: + newTarget = self.anchorLookup(anchor) + if newTarget is None: + newTarget = self.fileLookup(target) + if newTarget: + attrs[ix] = (attrs[ix][0], '/'.join((self._topdir, newTarget))) + self.emit_starttag(tag, attrs) + + def anchorLookup(self,anchor): + target = None + while not target: + target = self._index[anchor] if target: - self._fixed += 1 - attrs[ix] = ('href', os.path.join(self._topdir,target)) + target = '%s#%s' % (target, anchor) + elif anchor.startswith('g'): + anchor = anchor[1:] else: - self._removed[anchor] = {} - self._collectFor = anchor - self.startCollect() - return - self.emit_starttag('a',attrs) - - def _e_A(self): - if self.collecting(): - self._removed[self._collectFor][stripHTML(self.endCollect())] = None - else: - self.emit_endtag('a') - - def stats(self): - return (self._found, self._fixed, self._removed) - - def fix(self, path, target): - self._files += 1 + break + return target + + def fileLookup(self,target): + return self._index[target] + + def _s_A(self, attrs): + self._check('a', 'href', attrs) + + def _s_AREA(self, attrs): + self._check('area', 'href', attrs) + + def fix(self, path): data = codecs.open(path, "r", "utf-8").read() filt = LinkFixer.LinkFilter(self._index, - target, - "../" * (len(os.path.split(path)[0].split("/"))), + ("../" * (len(os.path.split(path)[0].split("/"))))[:-1], codecs.open(path, "w", "utf-8") ) filt.feed(data) filt.close() - found, fixed, removed = filt.stats() - self._found += found - self._fixed += fixed - for anchor, labels in removed.items(): - for label in labels.keys(): - self._removed.setdefault((anchor,label),{})[path] = None - - def stats(self): - return (self._files, self._found, self._fixed, self._removed) - + (opts, args) = getopt.getopt(sys.argv[1:], "vs:") -if len(args) != 2: - sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... +if len(args) != 0: + sys.stderr.write("""Usage: fix-links.py [-s skip-dir]... -Process the 'errorX.txt' and 'errorAX.txt' files as generated by -'linklint': Check all invalid links and try to find the correct -target. If a target is found, the link is changed accordingly, -otherwise the link is removed. +Check all links and try to find the correct target. If a target is +found, the link is changed accordingly, otherwise the link is removed. To find anchors, fix-links.py generates a complete index of all anchors defined in any HTML file in the current directory or some @@ -249,46 +229,14 @@ not be scanned for '*.html' files. skipdirs = [ val for opt, val in opts if opt == '-s' ] verbose = ( '-v', '' ) in opts -if not os.path.exists(args[0]) and not os.path.exists(args[1]): - # No bad links to nothing to do - sys.exit(0) - fixer = LinkFixer(skipdirs) fixer.init() -target = None - -if os.path.exists(args[0]): - for l in file(args[0]): - l = l.rstrip() - if l.startswith('/'): - target = '#' + os.path.split(l)[1] - elif l.startswith(' /') and not l.endswith('/'): - sys.stderr.write("%s\n" % l) - fixer.fix(l[5:], target) - -if os.path.exists(args[1]): - for l in file(args[1]): - l = l.rstrip() - if l.startswith('/'): - target = l.split('#')[1] - elif l.startswith(' /') and not l.endswith('/'): - sys.stderr.write("%s\n" % l) - fixer.fix(l[5:], target) - -total, found, fixed, removed = fixer.stats() - -if verbose: - sys.stderr.write("\nRemoved links:\n") - for (anchor, label), files in removed.items(): - sys.stderr.write("%-36.36s %-48.48s %s\n" - % ( anchor, - "(%s)" % label[:46], - " ".join(files.keys())) ) - -sys.stderr.write(""" -Files processed : %5d -Links processed : %5d -Links fixed : %5d -Links removed : %5d -""" % (total, found, fixed, found-fixed)) +for dirname, subdirs, files in os.walk('.'): + for d in skipdirs: + if d in subdirs: + subdirs.remove(d) + for f in fnmatch.filter(files,'*.html'): + path = os.path.normpath(os.path.join(dirname, f)) + print path + fixer.fix(path) diff --git a/doclib/linklint_addnames.py b/doclib/linklint_addnames.py deleted file mode 100644 index d8bb8c5..0000000 --- a/doclib/linklint_addnames.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/python - -import HTMLParser, re, sys, os.path - -class ScanLinks(HTMLParser.HTMLParser): - - def __init__(self, target, base): - HTMLParser.HTMLParser.__init__(self) - self._target = target - self._base = base - self._collect = False - self._data = "" - self._value = "" - - def startCollect(self): - self._collect = True - self._data = "" - - def endCollect(self): - self._collect = False - return self._data - - def collecting(self): - return self._collect - - def handle_starttag(self,tag,attrs): - m = getattr(self,'_s_'+tag.upper(),None) - if m: - m(attrs) - - def handle_endtag(self,tag): - m = getattr(self,'_e_'+tag.upper(),None) - if m: - m() - - def handle_data(self,data): - if self._collect: - self._data += data - - def handle_charref(self,name): - self.handle_data(name) - - def handle_entityref(self,name): - self.handle_data(name) - - def value(self): - return self._value - - ########################################################################### - - SCHEME_RE=re.compile("[a-z]+:") - - def _s_A(self,attrs): - attrs = dict(attrs) - url = attrs.get('href') - if url and not self.SCHEME_RE.match(url): - if '#' in self._target: - p = os.path.abspath(os.path.join(self._base,url)) - else: - p = os.path.abspath(os.path.join(self._base,url.split('#')[0])) - if p == self._target: - self.startCollect() - - def _e_A(self): - if self.collecting(): - self._value = self.endCollect() - -WS_RE=re.compile("\\s+") - -def linkName(target,f): - scanner = ScanLinks(target,os.path.split(os.path.abspath(f))[0]) - scanner.feed(file(f).read()) - return WS_RE.sub(' ',scanner.value().strip()) - -process = 0 -target = 0 -for line in sys.stdin: - if line.startswith(']*\) id="\1"/name="\1"\2/g' \ -e 's/id="\([^"]*\)"\([^>]*\) name="\1"/name="\1"\2/g' \ + -e 's/\(<\/a>//g' \ | xsltproc --novalid --nonet --html --stringparam topdir "$reltopdir" \ "$base/html-munge.xsl" - }