Console: Refactor config file parser into several classes
[senf.git] / Console / Parse.ih
1 // $Id$
2 //
3 // Copyright (C) 2008 
4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
5 // Competence Center NETwork research (NET), St. Augustin, GERMANY
6 //     Stefan Bund <g0dil@berlios.de>
7 //
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 2 of the License, or
11 // (at your option) any later version.
12 //
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 //
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the
20 // Free Software Foundation, Inc.,
21 // 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22
23 /** \file
24     \brief Parse internal header */
25
26 #ifndef IH_Parse_
27 #define IH_Parse_ 1
28
29 // Custom includes
30 #include <vector>
31 #include <boost/regex.hpp>
32 #include <boost/spirit.hpp>
33 #include <boost/spirit/utility/grammar_def.hpp>
34 #include <boost/spirit/actor.hpp>
35 #include <boost/bind.hpp>
36 #include <boost/function.hpp>
37 #include <boost/ref.hpp>
38
39 ///////////////////////////////ih.p////////////////////////////////////////
40
41 namespace senf {
42 namespace console {
43 namespace detail {
44
45 #ifndef DOXYGEN
46
47     ///////////////////////////////////////////////////////////////////////////
48     // append_a
49
50     struct append_action
51     {
52         template <class T, class Value>
53         void act(T & ref, Value const & value) const
54             { ref += T(1, value); }
55
56         template <class T, class Iterator>
57         void act(T & ref, Iterator const & f, Iterator const & l) const
58             { ref += T(f,l); }
59     };
60
61     template <class T>
62     inline boost::spirit::ref_value_actor<T, append_action> 
63     append_a(T & ref)
64     { return boost::spirit::ref_value_actor<T, append_action>(ref); }
65     
66     template <class T, class Value>
67     inline boost::spirit::ref_const_ref_actor<T, Value, append_action> 
68     append_a(T & ref, Value const & value)
69     { return boost::spirit::ref_const_ref_actor<T, Value, append_action>(ref, value); }
70
71     ///////////////////////////////////////////////////////////////////////////
72     // Grammar
73
74     template <class ParseDispatcher>
75     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
76     {
77         ///////////////////////////////////////////////////////////////////////////
78         // Start rules
79
80         enum { CommandParser, SkipParser, ArgumentsParser };
81
82         ///////////////////////////////////////////////////////////////////////////
83         // The parse context (variables needed while parsing)
84
85         typedef Token::TokenType TokenType;
86
87         struct Context {
88             std::string str;
89             std::vector<Token> path;
90             char ch;
91             Token token;
92             
93             // OUCH ... This is sooooo stupid .. push_back_a and assign_a take their 
94             // arguments by const-reference and STORE the REFERENCE ... they do NOT accept
95             // literal values !!!!!! 
96             static const Token EmptyToken;
97         };
98
99         Context & context;
100
101         ///////////////////////////////////////////////////////////////////////////
102         // Dispatching semantic actions
103
104         ParseDispatcher & dispatcher;
105
106         struct Dispatch_actor
107         {
108             Dispatch_actor(boost::function<void ()> fn_) : fn (fn_) {}
109
110             template <class Value>
111             void operator()(Value const & value) const
112                 { fn(); }
113
114             template <class Iterator>
115             void operator()(Iterator const & f, Iterator const & l) const
116                 { fn(); }
117
118             boost::function<void ()> fn;
119         };
120         
121         template <class Callback>
122         Dispatch_actor dispatch(Callback cb) const
123             { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher))); }
124
125         template <class Callback, class Arg>
126         Dispatch_actor dispatch(Callback cb, Arg const & arg) const
127             { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher), arg)); }
128
129         template <class Callback, class Arg1, class Arg2>
130         Dispatch_actor dispatch(Callback cb, Arg1 const & arg1, Arg2 const & arg2) const
131             { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher), arg1, arg2)); }
132
133         struct TokenSetter_actor
134         {
135             TokenSetter_actor(Context & c, TokenType t) : c_ (c), t_ (t) {}
136             
137             void operator()(std::string const & value) const
138                 { c_.token = Token(t_, value); }
139             void operator()(char value) const
140                 { c_.token = Token(t_, std::string(1,value)); }
141             template <class Iterator> void operator()(Iterator const & f, Iterator const & l) const
142                 { c_.token = Token(t_, std::string(f,l)); }
143
144             Context & c_;
145             TokenType t_;
146         };
147
148         struct TokenSetter_value_actor
149             : public TokenSetter_actor
150         {
151             TokenSetter_value_actor(Context & c, TokenType t, std::string & v) 
152                 : TokenSetter_actor(c,t), v_ (v) {}
153
154             template <class Value> void operator()(Value const &) const
155                 { TokenSetter_actor::operator()(v_); }
156
157             template <class Iterator> void operator()(Iterator const &, Iterator const &) const
158                 { TokenSetter_actor::operator()(v_); }
159
160             std::string & v_;
161         };
162
163         TokenSetter_actor set_token_a(TokenType t) const
164             { return TokenSetter_actor(context, t); }
165
166         TokenSetter_value_actor set_token_a(TokenType t, std::string & arg) const
167             { return TokenSetter_value_actor(context, t, arg); }
168
169         ///////////////////////////////////////////////////////////////////////////
170
171         CommandGrammar(ParseDispatcher & d, Context & c) 
172             : context(c), dispatcher(d) {}
173
174         template <class Scanner>
175         struct definition 
176             : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
177                                                  boost::spirit::rule<Scanner>,
178                                                  boost::spirit::rule<Scanner> >
179         {
180             boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
181                 punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
182                 skip, commands, block, statement, relpath, abspath, arguments;
183             boost::spirit::chset<> special_p, punctuation_p, space_p, invalid_p, word_p;
184             boost::spirit::distinct_parser<> keyword_p;
185
186             definition(CommandGrammar const & self) : 
187
188                 // Characters with a special meaning within the parser
189                 special_p ("/(){};"),
190
191                 // Additional characters which are returned as punctuation tokens
192                 // (only allowed within '()').
193                 punctuation_p (",="),
194
195                 // Whitespace characters
196                 space_p (" \t\n\r"),
197
198                 // Invalid characters: All chars below \x20 (space) which are not space_p
199                 // (don't put a \0 in the chset<> argument *string* ...)
200                 invalid_p (
201                     boost::spirit::chset<>('\0') | boost::spirit::chset<>("\x01-\x20") - space_p ),
202
203                 // Valid word characters
204                 word_p (
205                     boost::spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p),
206
207                 // Keywords must not be followed by a word char or '/'
208                 keyword_p ( word_p | boost::spirit::ch_p('/') )
209
210             {
211                 using namespace boost::spirit;
212                 typedef ParseDispatcher PD;
213                 typedef Token AT;
214
215                 ///////////////////////////////////////////////////////////////////
216                 // Spirit grammar
217                 //
218                 // Syntax summary:
219                 // This is EBNF with some minor tweaks to accommodate C++ syntax
220                 //
221                 //   * and +    like EBNF but they precede their argument
222                 //   >>         is followed by
223                 //   !          optional
224                 //   a % b      match any number of a's separated by b
225                 //   a - b      match a but not b
226                 //
227                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
228                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
229                 // quite readable.
230                 //   
231                 //   ch_p             match character
232                 //   eps_p            always matches nothing (to attach unconditional actions)
233                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
234                 //                    string literals and comments
235                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
236                 //                    literal string escape char, however \x will be replaced by 'x'
237                 //                    for any char 'x' if it has no special meaning.
238                 //   keyword_p        match a delimited keyword
239                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
240                 //                    defaults to end-of-line
241                 //
242                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
243                 //
244                 // Aligned to the right at column 50 are semantic actions.
245                 //
246                 // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
247                 // in most cases.
248                 //
249                 // More info is in the Boost.Spirit documentation
250
251                 commands
252                     =  * command
253                     ;
254
255                 command 
256                     =    builtin >> (ch_p(';') | end_p)
257                     |    path  >> ( block | statement )
258                     |    ch_p(';') // Ignore empty commands
259                     ;
260
261                 builtin
262                     =    keyword_p("cd") 
263                       >> path
264                       >> eps_p                    [ self.dispatch(&PD::builtin_cd,
265                                                                   boost::ref(self.context.path)) ]
266                     |    keyword_p("ls")
267                       >> ! path
268                       >> eps_p                    [ self.dispatch(&PD::builtin_ls,
269                                                                   boost::ref(self.context.path)) ]
270                     |    keyword_p("exit")        [ self.dispatch(&PD::builtin_exit) ]
271                     
272                     |    keyword_p("help")
273                       >> ! path
274                       >> eps_p                    [ self.dispatch(&PD::builtin_help,
275                                                                   boost::ref(self.context.path)) ]
276                     ;
277
278                 block
279                     =    ch_p('{')                [ self.dispatch(&PD::pushDirectory,
280                                                                   boost::ref(self.context.path)) ]
281                       >> * command 
282                       >> ch_p('}')                [ self.dispatch(&PD::popDirectory) ]
283                     ;
284
285                 statement
286                     = eps_p                       [ self.dispatch(&PD::beginCommand, 
287                                                                   boost::ref(self.context.path)) ]
288                       >> arguments
289                       >> (ch_p(';') | end_p)
290                       >> eps_p                    [ self.dispatch(&PD::endCommand) ]
291                     ;
292
293                 arguments
294                     =    * argument
295                     ;
296
297                 argument
298                     =    simple_argument          [ self.dispatch(&PD::pushToken, 
299                                                                   boost::ref(self.context.token)) ]
300                     |    balanced_tokens
301                     ;
302                 
303                 simple_argument         // All these return their value in context.token
304                     =    string
305                     |    hexstring
306                     |    word
307                     ;
308                 
309                 string                  // Returns value in context.token
310                     =    eps_p                    [ clear_a(self.context.str) ]
311                       >> lexeme_d
312                          [
313                              ch_p('"')
314                           >> * ( ( lex_escape_ch_p[ assign_a(self.context.ch) ] 
315                                    - '"' 
316                                  )                [ append_a(self.context.str,
317                                                              self.context.ch) ] 
318                                )
319                           >> ch_p('"')            [ self.set_token_a(AT::BasicString, 
320                                                                      self.context.str) ]
321                          ]
322                     ;
323
324                 hexstring               // Returns value in context.token
325                     =    eps_p                    [ clear_a(self.context.str) ]
326                       >> confix_p( "x\"", * hexbyte, '"' )
327                                                   [ self.set_token_a(AT::HexString,
328                                                                      self.context.str) ]
329                     ;
330
331                 path                    // Returns value in context.path
332                     =    eps_p                    [ clear_a(self.context.path) ]
333                       >> relpath | abspath
334                     ;
335
336                 relpath
337                     =    (   word                 [ push_back_a(self.context.path,
338                                                                 self.context.token) ]
339                            % ch_p('/') )
340                       >> ( ! ch_p('/')            [ push_back_a(self.context.path,
341                                                                 self.context.EmptyToken) ] )
342                     ;
343
344                 abspath
345                     =    ch_p('/')                [ push_back_a(self.context.path,
346                                                                 self.context.EmptyToken) ]
347                       >> ( relpath
348                          | eps_p                  [ push_back_a(self.context.path,
349                                                                 self.context.EmptyToken) ] )
350                     ;
351
352                 balanced_tokens 
353                     =    ch_p('(')                [ self.set_token_a(AT::ArgumentGroupOpen) ]
354                                                   [ self.dispatch(&PD::pushToken, 
355                                                                   boost::ref(self.context.token)) ]
356                       >> * token
357                       >> ch_p(')')                [ self.set_token_a(AT::ArgumentGroupClose) ]
358                                                   [ self.dispatch(&PD::pushToken, 
359                                                                   boost::ref(self.context.token)) ]
360                     ;
361
362                 token
363                     =    simple_argument          [ self.dispatch(&PD::pushToken,
364                                                                   boost::ref(self.context.token)) ]
365                     |    punctuation              [ self.dispatch(&PD::pushToken,
366                                                                   boost::ref(self.context.token)) ]
367                     |    balanced_tokens
368                     ;
369
370                 punctuation             // Returns value in context.str
371                     =    ch_p('/')                [ self.set_token_a(AT::PathSeparator) ]
372                     |    ch_p('{')                [ self.set_token_a(AT::DirectoryGroupOpen) ]
373                     |    ch_p('}')                [ self.set_token_a(AT::DirectoryGroupClose) ]
374                     |    ch_p(';')                [ self.set_token_a(AT::CommandTerminator) ]
375                     |    punctuation_p            [ self.set_token_a(AT::OtherPunctuation) ]
376                     ;
377
378                 word                    // Returns value in context.token
379                     =    lexeme_d
380                          [
381                              eps_p
382                           >> (+ word_p)           [ assign_a(self.context.str) ]
383                          ]
384                       >> eps_p                    [ self.set_token_a(AT::Word, self.context.str) ]
385                     ;
386
387                 hexbyte
388                     =    uint_parser<char, 16, 2, 2>()
389                                                   [ append_a(self.context.str) ]
390                     ;
391
392                 skip
393                     =    space_p | comment_p('#')
394                     ;
395
396                 ///////////////////////////////////////////////////////////////////
397
398                 start_parsers(
399                     commands,           // CommandParser
400                     skip,               // SkipParser
401                     arguments           // ArgumentsParser
402                 );
403
404                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
405                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
406                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
407                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
408                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
409                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
410                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
411                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
412                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
413                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
414                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
415                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
416                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
417                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
418                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
419                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
420                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
421                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
422             }
423         };
424     };
425
426     template <class ParseDispatcher>
427     Token const CommandGrammar<ParseDispatcher>::Context::EmptyToken;
428
429 #endif
430
431 }}}
432
433 ///////////////////////////////ih.e////////////////////////////////////////
434 #endif
435
436 \f
437 // Local Variables:
438 // mode: c++
439 // fill-column: 100
440 // comment-column: 40
441 // c-file-style: "senf"
442 // indent-tabs-mode: nil
443 // ispell-local-dictionary: "american"
444 // compile-command: "scons -u test"
445 // End: