f676f1026df34ee069f72649564b0e3a0d3c270f
[senf.git] / Console / Parse.ih
1 // $Id$
2 //
3 // Copyright (C) 2008 
4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
5 // Competence Center NETwork research (NET), St. Augustin, GERMANY
6 //     Stefan Bund <g0dil@berlios.de>
7 //
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 2 of the License, or
11 // (at your option) any later version.
12 //
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 //
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the
20 // Free Software Foundation, Inc.,
21 // 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22
23 /** \file
24     \brief Parse internal header */
25
26 #ifndef IH_Parse_
27 #define IH_Parse_ 1
28
29 // Custom includes
30 #include <vector>
31 #include <boost/regex.hpp>
32 #include <boost/spirit.hpp>
33 #include <boost/spirit/utility/grammar_def.hpp>
34 #include <boost/spirit/actor.hpp>
35 #include <boost/bind.hpp>
36 #include <boost/function.hpp>
37 #include <boost/ref.hpp>
38
39 ///////////////////////////////ih.p////////////////////////////////////////
40
41 namespace senf {
42 namespace console {
43 namespace detail {
44
45 #ifndef DOXYGEN
46
47     struct append_action
48     {
49         template <class T, class Value>
50         void act(T & ref, Value const & value) const
51             { ref += T(1, value); }
52
53         template <class T, class Iterator>
54         void act(T & ref, Iterator const & f, Iterator const & l) const
55             { ref += T(f,l); }
56     };
57
58     template <class T>
59     inline boost::spirit::ref_value_actor<T, append_action> 
60     append_a(T & ref)
61     {
62         return boost::spirit::ref_value_actor<T, append_action>(ref);
63     }
64     
65     template <class T, class Value>
66     inline boost::spirit::ref_const_ref_actor<T, Value, append_action> 
67     append_a(T & ref, Value const & value)
68     {
69         return boost::spirit::ref_const_ref_actor<T, Value, append_action>(ref, value);
70     }
71
72     template <class ParseDispatcher>
73     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
74     {
75         ///////////////////////////////////////////////////////////////////////////
76         // Start rules
77
78         enum { CommandParser, SkipParser };
79
80         ///////////////////////////////////////////////////////////////////////////
81         // The parse context (variables needed while parsing)
82
83         struct Context {
84             std::string str;
85             std::vector<std::string> path;
86             char ch;
87         };
88
89         Context & context;
90
91         ///////////////////////////////////////////////////////////////////////////
92         // Dispatching semantic actions
93
94         ParseDispatcher & dispatcher;
95
96         struct Dispatch_actor
97         {
98             Dispatch_actor(boost::function<void ()> fn_) : fn (fn_) {}
99
100             template <class Value>
101             void operator()(Value const & value) const
102                 { fn(); }
103
104             template <class Iterator>
105             void operator()(Iterator const & f, Iterator const & l) const
106                 { fn(); }
107
108             boost::function<void ()> fn;
109         };
110         
111         template <class Callback>
112         Dispatch_actor dispatch(Callback cb) const
113             { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher))); }
114
115         template <class Callback, class Arg>
116         Dispatch_actor dispatch(Callback cb, Arg const & arg) const
117             { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher), arg)); }
118
119         ///////////////////////////////////////////////////////////////////////////
120
121         CommandGrammar(ParseDispatcher & d, Context & c) 
122             : context(c), dispatcher(d) {}
123
124         template <class Scanner>
125         struct definition 
126             : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
127                                                  boost::spirit::rule<Scanner> >
128         {
129             boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
130                 punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
131                 skip, commands, block, statement, relpath, abspath;
132             boost::spirit::chset<> special_p, punctuation_p, space_p, invalid_p, word_p;
133             boost::spirit::distinct_parser<> keyword_p;
134
135             definition(CommandGrammar const & self) : 
136
137                 // Characters with a special meaning within the parser
138                 special_p ("/(){};"),
139
140                 // Characters which are returned as punctuation tokens
141                 punctuation_p (",="),
142
143                 // Whitespace characters
144                 space_p (" \t\n\r"),
145
146                 // Invalid characters: All chars below \x20 (space) which are not space_p
147                 // (don't put a \0 in the chset<> argument *string* ...)
148                 invalid_p (
149                     boost::spirit::chset<>('\0') | boost::spirit::chset<>("\x01-\x20") - space_p ),
150
151                 // Valid word characters
152                 word_p (
153                     boost::spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p),
154
155                 // Keywords must not be followed by a word char or '/'
156                 keyword_p ( word_p | boost::spirit::ch_p('/') )
157
158             {
159                 using namespace boost::spirit;
160                 typedef ParseDispatcher PD;
161
162                 ///////////////////////////////////////////////////////////////////
163                 // Spirit grammar
164                 //
165                 // Syntax summary:
166                 // This is EBNF with some minor tweaks to accommodate C++ syntax
167                 //
168                 //   * and +    precede their argument
169                 //   >>         is followed by
170                 //   !          optional
171                 //   a % b      match any number of a's separated by b
172                 //   a - b      match a but not b
173                 //
174                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
175                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
176                 // quite readable.
177                 //   
178                 //   ch_p             match character
179                 //   eps_p            always matches nothing (to attach unconditional actions)
180                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
181                 //                    string literals and comments
182                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
183                 //                    literal string escape char, however \x will be replaced by 'x'
184                 //                    for any char 'x' if it has no special meaning.
185                 //   keyword_p        match a delimited keyword
186                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
187                 //                    defaults to end-of-line
188                 //
189                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
190                 //
191                 // Aligned to the right at column 50 are semantic actions.
192                 //
193                 // For clarity, I have used 'ch_p' explicitly throughout even though it is auxiliary
194                 // in most cases.
195                 //
196                 // More info is in the Boost.Spirit documentation
197
198                 commands
199                     =  * command
200                     ;
201
202                 command 
203                     =    builtin >> (ch_p(';') | end_p)
204                     |    path  >> ( block | statement )
205                     |    ch_p(';') // Ignore empty commands
206                     ;
207
208                 builtin
209                     =    keyword_p("cd") 
210                       >> path
211                       >> eps_p                    [ self.dispatch(&PD::builtin_cd,
212                                                                   boost::ref(self.context.path)) ]
213                     |    keyword_p("ls")
214                       >> ! path
215                       >> eps_p                    [ self.dispatch(&PD::builtin_ls,
216                                                                   boost::ref(self.context.path)) ]
217                     |    keyword_p("exit")        [ self.dispatch(&PD::builtin_exit) ]
218                     
219                     |    keyword_p("help")
220                       >> ! path
221                       >> eps_p                    [ self.dispatch(&PD::builtin_help,
222                                                                   boost::ref(self.context.path)) ]
223                     ;
224
225                 block
226                     =    ch_p('{')                [ self.dispatch(&PD::pushDirectory,
227                                                                   boost::ref(self.context.path)) ]
228                       >> * command 
229                       >> ch_p('}')                [ self.dispatch(&PD::popDirectory) ]
230                     ;
231
232                 statement
233                     = eps_p                       [ self.dispatch(&PD::beginCommand, 
234                                                                   boost::ref(self.context.path)) ]
235                       >> * argument
236                       >> (ch_p(';') | end_p)
237                       >> eps_p                    [ self.dispatch(&PD::endCommand) ]
238                     ;
239
240                 argument
241                     =    simple_argument          [ self.dispatch(&PD::pushArgument, 
242                                                                   boost::ref(self.context.str)) ]
243                     |    complex_argument
244                     ;
245                 
246                 simple_argument         // All these return their value in context.str
247                     =    string
248                     |    hexstring
249                     |    word
250                     ;
251                 
252                 complex_argument        // Argument consists of multiple tokens
253                     =    ch_p('(')                [ self.dispatch(&PD::openGroup) ]
254                       >> * token
255                       >> ch_p(')')                [ self.dispatch(&PD::closeGroup) ]
256                     ;
257
258                 string                  // Returns value in context.str
259                     =    eps_p                    [ clear_a(self.context.str) ]
260                       >> lexeme_d
261                          [
262                              ch_p('"')
263                           >> * ( ( lex_escape_ch_p[ assign_a(self.context.ch) ] 
264                                    - '"' 
265                                  )                [ append_a(self.context.str,
266                                                              self.context.ch) ] 
267                                )
268                           >> ch_p('"')
269                          ]
270                     ;
271
272                 hexstring               // Returns value in context.str
273                     =    eps_p                    [ clear_a(self.context.str) ]
274                       >> confix_p( "x\"", * hexbyte, '"' )
275                     ;
276
277                 path                    // Returns value in context.path
278                     =    eps_p                    [ clear_a(self.context.path) ]
279                       >> relpath | abspath
280                     ;
281
282                 relpath
283                     =    (   word                 [ push_back_a(self.context.path) ] 
284                            % ch_p('/') )
285                       >> ( ! ch_p('/')            [ push_back_a(self.context.path,"") ] )
286                     ;
287
288                 abspath
289                     =    ch_p('/')                [ push_back_a(self.context.path, "") ]
290                       >> ( relpath
291                          | eps_p                  [ push_back_a(self.context.path, "") ] )
292                     ;
293
294                 balanced_tokens 
295                     =    ch_p('(')                [ self.dispatch(&PD::pushPunctuation, "(") ]
296                       >> * token
297                       >> ch_p(')')                [ self.dispatch(&PD::pushPunctuation, ")") ]
298                     ;
299
300                 token
301                     =    simple_argument          [ self.dispatch(&PD::pushWord, 
302                                                                   boost::ref(self.context.str)) ]
303                     |    punctuation              [ self.dispatch(&PD::pushPunctuation,
304                                                                   boost::ref(self.context.str)) ]
305                     |    balanced_tokens
306                     ;
307
308                 punctuation             // Returns value in context.str
309                     =    punctuation_p            [ assign_a(self.context.str) ]
310                     ;
311
312                 word                    // Returns value in context.str
313                     =    lexeme_d[ + word_p ]     [ assign_a(self.context.str) ]
314                     ;
315
316                 hexbyte
317                     =    uint_parser<char, 16, 2, 2>()
318                                                   [ append_a(self.context.str) ]
319                     ;
320
321                 skip
322                     =    space_p | comment_p('#')
323                     ;
324
325                 ///////////////////////////////////////////////////////////////////
326
327                 start_parsers(
328                     commands,           // CommandParser
329                     skip                // SkipParser
330                 );
331
332                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
333                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
334                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
335                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
336                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
337                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
338                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
339                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
340                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
341                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
342                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
343                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
344                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
345                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
346                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
347                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
348                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
349                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
350             }
351         };
352     };
353
354 #endif
355
356 }}}
357
358 ///////////////////////////////ih.e////////////////////////////////////////
359 #endif
360
361 \f
362 // Local Variables:
363 // mode: c++
364 // fill-column: 100
365 // comment-column: 40
366 // c-file-style: "senf"
367 // indent-tabs-mode: nil
368 // ispell-local-dictionary: "american"
369 // compile-command: "scons -u test"
370 // End: