e2859ff2db84be28c49d778deb53583fff64a9bf
[senf.git] / Console / Parse.ih
1 // $Id$
2 //
3 // Copyright (C) 2008 
4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
5 // Competence Center NETwork research (NET), St. Augustin, GERMANY
6 //     Stefan Bund <g0dil@berlios.de>
7 //
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 2 of the License, or
11 // (at your option) any later version.
12 //
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 //
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the
20 // Free Software Foundation, Inc.,
21 // 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22
23 /** \file
24     \brief Parse internal header */
25
26 #ifndef IH_Parse_
27 #define IH_Parse_ 1
28
29 // Custom includes
30 #include <vector>
31 #include <boost/spirit.hpp>
32 #include <boost/spirit/utility/grammar_def.hpp>
33 #include <boost/spirit/dynamic.hpp>
34 #include <boost/spirit/phoenix.hpp>
35 #include "../Utils/Phoenix.hh"
36
37 ///////////////////////////////ih.p////////////////////////////////////////
38
39 namespace senf {
40 namespace console {
41 namespace detail {
42
43 #ifndef DOXYGEN
44
45     ///////////////////////////////////////////////////////////////////////////
46     // Grammar
47
48     template <class ParseDispatcher>
49     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
50     {
51         ///////////////////////////////////////////////////////////////////////////
52         // Start rules
53
54         enum { CommandParser, SkipParser, ArgumentsParser };
55
56         ///////////////////////////////////////////////////////////////////////////
57         // The parse context (variables needed while parsing)
58
59         typedef Token::TokenType TokenType;
60
61         struct Context {
62             std::string str;
63             std::vector<Token> path;
64             char ch;
65             Token token;
66         };
67
68         Context & context;
69
70         ///////////////////////////////////////////////////////////////////////////
71         // Configuration
72
73         bool incremental;
74
75         ///////////////////////////////////////////////////////////////////////////
76         // Dispatching semantic actions
77
78         ParseDispatcher & dispatcher;
79
80         ///////////////////////////////////////////////////////////////////////////
81
82         CommandGrammar(ParseDispatcher & d, Context & c) 
83             : context(c), incremental(false), dispatcher(d) {}
84
85         template <class Scanner>
86         struct definition 
87             : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
88                                                  boost::spirit::rule<Scanner>,
89                                                  boost::spirit::rule<Scanner> >
90         {
91             boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
92                 punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
93                 skip, statement, relpath, abspath, arguments, group_start, group_close, 
94                 statement_end;
95             boost::spirit::chset<> special_p, punctuation_p, space_p, invalid_p, word_p;
96             boost::spirit::distinct_parser<> keyword_p;
97
98             definition(CommandGrammar const & self) : 
99
100                 // Characters with a special meaning within the parser
101                 special_p ("/(){};"),
102
103                 // Additional characters which are returned as punctuation tokens
104                 // (only allowed within '()').
105                 punctuation_p (",="),
106
107                 // Whitespace characters
108                 space_p (" \t\n\r"),
109
110                 // Invalid characters: All chars below \x20 (space) which are not space_p
111                 // (don't put a \0 in the chset<> argument *string* ...)
112                 invalid_p (
113                     boost::spirit::chset<>('\0') | boost::spirit::chset<>("\x01-\x20") - space_p ),
114
115                 // Valid word characters
116                 word_p (
117                     boost::spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p),
118
119                 // Keywords must not be followed by a word char or '/'
120                 keyword_p ( word_p | boost::spirit::ch_p('/') )
121
122             {
123                 using namespace boost::spirit;
124                 using namespace ::phoenix;
125                 using namespace senf::phoenix;
126                 typedef ParseDispatcher PD;
127
128                 actor< variable< char > >               ch_    (self.context.ch);
129                 actor< variable< std::string > >        str_   (self.context.str);
130                 actor< variable< std::vector<Token> > > path_  (self.context.path);
131                 actor< variable< Token > >              token_ (self.context.token);
132                 actor< variable< ParseDispatcher > >    d_     (self.dispatcher);
133
134                 ///////////////////////////////////////////////////////////////////
135                 // Spirit grammar
136                 //
137                 // Syntax summary:
138                 // This is EBNF with some minor tweaks to accommodate C++ syntax
139                 //
140                 //   * a        any number of a's
141                 //   + a        at least one a
142                 //   ! a        an optional a
143                 //   a >> b     a followed by b
144                 //   a | b      a or b
145                 //   a % b      any number of a's separated by b's
146                 //   a - b      a but not b
147                 //
148                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
149                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
150                 // quite readable.
151                 //   
152                 //   ch_p             match character
153                 //   eps_p            always matches nothing (to attach unconditional actions)
154                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
155                 //                    string literals and comments
156                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
157                 //                    literal string escape char, however \x will be replaced by 'x'
158                 //                    for any char 'x' if it has no special meaning.
159                 //   keyword_p        match a delimited keyword
160                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
161                 //                    defaults to end-of-line
162                 //
163                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
164                 //
165                 // Aligned to the right at column 50 are semantic actions.
166                 //
167                 // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
168                 // in most cases.
169                 //
170                 // More info is in the Boost.Spirit documentation
171
172                 command 
173                     =    builtin >> statement_end
174                     |    path >> ( group_start | statement )
175                     |    group_close
176                     |    ch_p(';') // Ignore empty commands
177                     ;
178
179                 builtin
180                     =    keyword_p("cd") 
181                       >> path
182                       >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
183                     |    keyword_p("ls")
184                       >> ! path
185                       >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
186                     |    keyword_p("exit")        [ bind(&PD::builtin_exit)(d_) ]
187                     |    keyword_p("help")
188                       >> ! path
189                       >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
190                     ;
191
192                 group_start
193                     =    ch_p('{')                [ bind(&PD::pushDirectory)(d_, path_) ]
194                     ;
195
196                 group_close
197                     =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
198                     ;
199
200                 statement
201                     =    eps_p                    [ bind(&PD::beginCommand)(d_, path_) ]
202                       >> arguments
203                       >> statement_end            [ bind(&PD::endCommand)(d_) ]
204                     ;
205
206                 arguments
207                     =    * argument
208                     ;
209
210                 argument
211                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
212                     |    balanced_tokens
213                     ;
214                 
215                 simple_argument         // All these return their value in context.token
216                     =    string
217                     |    hexstring
218                     |    word
219                     ;
220                 
221                 string                  // Returns value in context.token
222                     =    eps_p                    [ clear(str_) ]
223                       >> lexeme_d
224                          [
225                              ch_p('"')
226                           >> * ( ( lex_escape_ch_p[ ch_ = arg1 ] 
227                                    - '"' 
228                                  )                [ str_ += ch_ ]
229                                )
230                           >> ch_p('"')            [ token_ = construct_<Token>(Token::BasicString, 
231                                                                                str_) ]
232                          ]
233                     ;
234
235                 hexstring               // Returns value in context.token
236                     =    eps_p                    [ clear(str_) ]
237                       >> confix_p( "x\"", * hexbyte, '"' )
238                                                   [ token_ = construct_<Token>(Token::HexString,
239                                                                                str_) ]
240                     ;
241
242                 path                    // Returns value in context.path
243                     =    eps_p                    [ clear(path_) ]
244                       >> relpath | abspath
245                     ;
246
247                 relpath
248                     =    (   word                 [ push_back(path_, token_) ]
249                            % ch_p('/') )
250                       >> ( ! ch_p('/')            [ push_back(path_, construct_<Token>()) ] )
251                     ;
252
253                 abspath
254                     =    ch_p('/')                [ push_back(path_, construct_<Token>()) ]
255                       >> ( relpath
256                          | eps_p                  [ push_back(path_, construct_<Token>()) ] )
257                     ;
258
259                 balanced_tokens 
260                     =    ch_p('(')                [ token_ = construct_<Token>(
261                                                         Token::ArgumentGroupOpen,
262                                                         "(") ]
263                                                   [ bind(&PD::pushToken)(d_, token_) ]
264                       >> * token
265                       >> ch_p(')')                [ token_ = construct_<Token>(
266                                                         Token::ArgumentGroupClose,
267                                                         ")") ]
268                                                   [ bind(&PD::pushToken)(d_, token_) ]
269                     ;
270
271                 token
272                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
273                     |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
274                     |    balanced_tokens
275                     ;
276
277                 punctuation             // Returns value in context.str
278                     =    ch_p('/')                [ token_ = construct_<Token>(
279                                                         Token::PathSeparator,
280                                                         "/") ]
281                     |    ch_p('{')                [ token_ = construct_<Token>(
282                                                         Token::DirectoryGroupOpen,
283                                                         "{") ]
284                     |    ch_p('}')                [ token_ = construct_<Token>(
285                                                         Token::DirectoryGroupClose,
286                                                         "}") ]
287                     |    ch_p(';')                [ token_ = construct_<Token>(
288                                                         Token::CommandTerminator,
289                                                         ";") ]
290                     |    punctuation_p            [ token_ = construct_<Token>(
291                                                         Token::OtherPunctuation,
292                                                         construct_<std::string>(1u, arg1)) ]
293                     ;
294
295                 word                    // Returns value in context.token
296                     =    lexeme_d
297                          [
298                              (+ word_p)           [ str_ = construct_<std::string>(arg1, arg2) ]
299                          ]
300                       >> eps_p                    [ token_ = construct_<Token>(
301                                                         Token::Word, 
302                                                         str_) ]
303                     ;
304
305                 hexbyte
306                     =    uint_parser<char, 16, 2, 2>()
307                                                   [ push_back(str_, arg1) ]
308                     ;
309
310                 statement_end
311                     =    if_p(var(self.incremental)) [
312                                ch_p(';')
313                          ]
314                          .else_p [
315                                ch_p(';') 
316                              | end_p
317                          ]
318                     ;
319
320                 skip
321                     =    space_p | comment_p('#')
322                     ;
323
324                 ///////////////////////////////////////////////////////////////////
325
326                 start_parsers(
327                     command,            // CommandParser
328                     skip,               // SkipParser
329                     arguments           // ArgumentsParser
330                 );
331
332                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
333                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
334                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
335                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
336                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
337                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
338                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
339                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
340                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
341                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
342                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
343                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
344                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
345                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
346                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
347                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
348                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
349                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
350             }
351         };
352     };
353
354 #endif
355
356 }}}
357
358 ///////////////////////////////ih.e////////////////////////////////////////
359 #endif
360
361 \f
362 // Local Variables:
363 // mode: c++
364 // fill-column: 100
365 // comment-column: 40
366 // c-file-style: "senf"
367 // indent-tabs-mode: nil
368 // ispell-local-dictionary: "american"
369 // compile-command: "scons -u test"
370 // End: