Update Examples/Sniffer documentation
[senf.git] / doclib / linklint_addnames.py
1 #!/usr/bin/python
2
3 import HTMLParser, re, sys, os.path
4
5 class ScanLinks(HTMLParser.HTMLParser):
6
7     def __init__(self, target, base):
8         HTMLParser.HTMLParser.__init__(self)
9         self._target = target
10         self._base = base
11         self._collect = False
12         self._data = ""
13         self._value = ""
14
15     def startCollect(self):
16         self._collect = True
17         self._data = ""
18
19     def endCollect(self):
20         self._collect = False
21         return self._data
22
23     def collecting(self):
24         return self._collect
25
26     def handle_starttag(self,tag,attrs):
27         m = getattr(self,'_s_'+tag.upper(),None)
28         if m:
29             m(attrs)
30
31     def handle_endtag(self,tag):
32         m = getattr(self,'_e_'+tag.upper(),None)
33         if m:
34             m()
35
36     def handle_data(self,data):
37         if self._collect:
38             self._data += data
39
40     def handle_charref(self,name):
41         self.handle_data(name)
42
43     def handle_entityref(self,name):
44         self.handle_data(name)
45
46     def value(self):
47         return self._value
48
49     ###########################################################################
50
51     SCHEME_RE=re.compile("[a-z]+:")
52
53     def _s_A(self,attrs):
54         attrs = dict(attrs)
55         url = attrs.get('href')
56         if url and not self.SCHEME_RE.match(url):
57             if '#' in self._target:
58                 p = os.path.abspath(os.path.join(self._base,url))
59             else:
60                 p = os.path.abspath(os.path.join(self._base,url.split('#')[0]))
61             if  p == self._target:
62                 self.startCollect()
63         
64     def _e_A(self):
65         if self.collecting():
66             self._value = self.endCollect()
67
68 WS_RE=re.compile("\\s+")
69
70 def linkName(target,f):
71     scanner = ScanLinks(target,os.path.split(os.path.abspath(f))[0])
72     scanner.feed(file(f).read())
73     return WS_RE.sub(' ',scanner.value().strip())
74
75 process = 0
76 target = 0
77 for line in sys.stdin:
78     if line.startswith('<a href='):
79         target = line.split(':')[1]
80         target = target[2:].split('"')[0]
81     elif line.startswith('    <a href='):
82         f = line.split(':')[1]
83         f = f[2:].split('"')[0]
84         line = '%s (%s)</a>\n'  % (line[:-5], linkName(target,f))
85     sys.stdout.write(line)