Add some documentation to the SCons-version-switching hack
[senf.git] / scons / scons-1.2.0 / engine / SCons / compat / _scons_shlex.py
1 # -*- coding: iso-8859-1 -*-
2 """A lexical analyzer class for simple shell-like syntaxes."""
3
4 # Module and documentation by Eric S. Raymond, 21 Dec 1998
5 # Input stacking and error message cleanup added by ESR, March 2000
6 # push_source() and pop_source() made explicit by ESR, January 2001.
7 # Posix compliance, split(), string arguments, and
8 # iterator interface by Gustavo Niemeyer, April 2003.
9
10 import os.path
11 import sys
12 #from collections import deque
13
14 class deque:
15     def __init__(self):
16         self.data = []
17     def __len__(self):
18         return len(self.data)
19     def appendleft(self, item):
20         self.data.insert(0, item)
21     def popleft(self):
22         return self.data.pop(0)
23
24 try:
25     basestring
26 except NameError:
27     import types
28     def is_basestring(s):
29         return type(s) is types.StringType
30 else:
31     def is_basestring(s):
32         return isinstance(s, basestring)
33
34 try:
35     from cStringIO import StringIO
36 except ImportError:
37     from StringIO import StringIO
38
39 __all__ = ["shlex", "split"]
40
41 class shlex:
42     "A lexical analyzer class for simple shell-like syntaxes."
43     def __init__(self, instream=None, infile=None, posix=False):
44         if is_basestring(instream):
45             instream = StringIO(instream)
46         if instream is not None:
47             self.instream = instream
48             self.infile = infile
49         else:
50             self.instream = sys.stdin
51             self.infile = None
52         self.posix = posix
53         if posix:
54             self.eof = None
55         else:
56             self.eof = ''
57         self.commenters = '#'
58         self.wordchars = ('abcdfeghijklmnopqrstuvwxyz'
59                           'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
60         if self.posix:
61             self.wordchars = self.wordchars + ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
62                                'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
63         self.whitespace = ' \t\r\n'
64         self.whitespace_split = False
65         self.quotes = '\'"'
66         self.escape = '\\'
67         self.escapedquotes = '"'
68         self.state = ' '
69         self.pushback = deque()
70         self.lineno = 1
71         self.debug = 0
72         self.token = ''
73         self.filestack = deque()
74         self.source = None
75         if self.debug:
76             print 'shlex: reading from %s, line %d' \
77                   % (self.instream, self.lineno)
78
79     def push_token(self, tok):
80         "Push a token onto the stack popped by the get_token method"
81         if self.debug >= 1:
82             print "shlex: pushing token " + repr(tok)
83         self.pushback.appendleft(tok)
84
85     def push_source(self, newstream, newfile=None):
86         "Push an input source onto the lexer's input source stack."
87         if is_basestring(newstream):
88             newstream = StringIO(newstream)
89         self.filestack.appendleft((self.infile, self.instream, self.lineno))
90         self.infile = newfile
91         self.instream = newstream
92         self.lineno = 1
93         if self.debug:
94             if newfile is not None:
95                 print 'shlex: pushing to file %s' % (self.infile,)
96             else:
97                 print 'shlex: pushing to stream %s' % (self.instream,)
98
99     def pop_source(self):
100         "Pop the input source stack."
101         self.instream.close()
102         (self.infile, self.instream, self.lineno) = self.filestack.popleft()
103         if self.debug:
104             print 'shlex: popping to %s, line %d' \
105                   % (self.instream, self.lineno)
106         self.state = ' '
107
108     def get_token(self):
109         "Get a token from the input stream (or from stack if it's nonempty)"
110         if self.pushback:
111             tok = self.pushback.popleft()
112             if self.debug >= 1:
113                 print "shlex: popping token " + repr(tok)
114             return tok
115         # No pushback.  Get a token.
116         raw = self.read_token()
117         # Handle inclusions
118         if self.source is not None:
119             while raw == self.source:
120                 spec = self.sourcehook(self.read_token())
121                 if spec:
122                     (newfile, newstream) = spec
123                     self.push_source(newstream, newfile)
124                 raw = self.get_token()
125         # Maybe we got EOF instead?
126         while raw == self.eof:
127             if not self.filestack:
128                 return self.eof
129             else:
130                 self.pop_source()
131                 raw = self.get_token()
132         # Neither inclusion nor EOF
133         if self.debug >= 1:
134             if raw != self.eof:
135                 print "shlex: token=" + repr(raw)
136             else:
137                 print "shlex: token=EOF"
138         return raw
139
140     def read_token(self):
141         quoted = False
142         escapedstate = ' '
143         while True:
144             nextchar = self.instream.read(1)
145             if nextchar == '\n':
146                 self.lineno = self.lineno + 1
147             if self.debug >= 3:
148                 print "shlex: in state", repr(self.state), \
149                       "I see character:", repr(nextchar)
150             if self.state is None:
151                 self.token = ''        # past end of file
152                 break
153             elif self.state == ' ':
154                 if not nextchar:
155                     self.state = None  # end of file
156                     break
157                 elif nextchar in self.whitespace:
158                     if self.debug >= 2:
159                         print "shlex: I see whitespace in whitespace state"
160                     if self.token or (self.posix and quoted):
161                         break   # emit current token
162                     else:
163                         continue
164                 elif nextchar in self.commenters:
165                     self.instream.readline()
166                     self.lineno = self.lineno + 1
167                 elif self.posix and nextchar in self.escape:
168                     escapedstate = 'a'
169                     self.state = nextchar
170                 elif nextchar in self.wordchars:
171                     self.token = nextchar
172                     self.state = 'a'
173                 elif nextchar in self.quotes:
174                     if not self.posix:
175                         self.token = nextchar
176                     self.state = nextchar
177                 elif self.whitespace_split:
178                     self.token = nextchar
179                     self.state = 'a'
180                 else:
181                     self.token = nextchar
182                     if self.token or (self.posix and quoted):
183                         break   # emit current token
184                     else:
185                         continue
186             elif self.state in self.quotes:
187                 quoted = True
188                 if not nextchar:      # end of file
189                     if self.debug >= 2:
190                         print "shlex: I see EOF in quotes state"
191                     # XXX what error should be raised here?
192                     raise ValueError, "No closing quotation"
193                 if nextchar == self.state:
194                     if not self.posix:
195                         self.token = self.token + nextchar
196                         self.state = ' '
197                         break
198                     else:
199                         self.state = 'a'
200                 elif self.posix and nextchar in self.escape and \
201                      self.state in self.escapedquotes:
202                     escapedstate = self.state
203                     self.state = nextchar
204                 else:
205                     self.token = self.token + nextchar
206             elif self.state in self.escape:
207                 if not nextchar:      # end of file
208                     if self.debug >= 2:
209                         print "shlex: I see EOF in escape state"
210                     # XXX what error should be raised here?
211                     raise ValueError, "No escaped character"
212                 # In posix shells, only the quote itself or the escape
213                 # character may be escaped within quotes.
214                 if escapedstate in self.quotes and \
215                    nextchar != self.state and nextchar != escapedstate:
216                     self.token = self.token + self.state
217                 self.token = self.token + nextchar
218                 self.state = escapedstate
219             elif self.state == 'a':
220                 if not nextchar:
221                     self.state = None   # end of file
222                     break
223                 elif nextchar in self.whitespace:
224                     if self.debug >= 2:
225                         print "shlex: I see whitespace in word state"
226                     self.state = ' '
227                     if self.token or (self.posix and quoted):
228                         break   # emit current token
229                     else:
230                         continue
231                 elif nextchar in self.commenters:
232                     self.instream.readline()
233                     self.lineno = self.lineno + 1
234                     if self.posix:
235                         self.state = ' '
236                         if self.token or (self.posix and quoted):
237                             break   # emit current token
238                         else:
239                             continue
240                 elif self.posix and nextchar in self.quotes:
241                     self.state = nextchar
242                 elif self.posix and nextchar in self.escape:
243                     escapedstate = 'a'
244                     self.state = nextchar
245                 elif nextchar in self.wordchars or nextchar in self.quotes \
246                     or self.whitespace_split:
247                     self.token = self.token + nextchar
248                 else:
249                     self.pushback.appendleft(nextchar)
250                     if self.debug >= 2:
251                         print "shlex: I see punctuation in word state"
252                     self.state = ' '
253                     if self.token:
254                         break   # emit current token
255                     else:
256                         continue
257         result = self.token
258         self.token = ''
259         if self.posix and not quoted and result == '':
260             result = None
261         if self.debug > 1:
262             if result:
263                 print "shlex: raw token=" + repr(result)
264             else:
265                 print "shlex: raw token=EOF"
266         return result
267
268     def sourcehook(self, newfile):
269         "Hook called on a filename to be sourced."
270         if newfile[0] == '"':
271             newfile = newfile[1:-1]
272         # This implements cpp-like semantics for relative-path inclusion.
273         if is_basestring(self.infile) and not os.path.isabs(newfile):
274             newfile = os.path.join(os.path.dirname(self.infile), newfile)
275         return (newfile, open(newfile, "r"))
276
277     def error_leader(self, infile=None, lineno=None):
278         "Emit a C-compiler-like, Emacs-friendly error-message leader."
279         if infile is None:
280             infile = self.infile
281         if lineno is None:
282             lineno = self.lineno
283         return "\"%s\", line %d: " % (infile, lineno)
284
285     def __iter__(self):
286         return self
287
288     def next(self):
289         token = self.get_token()
290         if token == self.eof:
291             raise StopIteration
292         return token
293
294 def split(s, comments=False):
295     lex = shlex(s, posix=True)
296     lex.whitespace_split = True
297     if not comments:
298         lex.commenters = ''
299     #return list(lex)
300     result = []
301     while True:
302         token = lex.get_token()
303         if token == lex.eof:
304             break
305         result.append(token)
306     return result
307
308 if __name__ == '__main__':
309     if len(sys.argv) == 1:
310         lexer = shlex()
311     else:
312         file = sys.argv[1]
313         lexer = shlex(open(file), file)
314     while 1:
315         tt = lexer.get_token()
316         if tt:
317             print "Token: " + repr(tt)
318         else:
319             break