Move sourcecode into 'senf/' directory
[senf.git] / senf / Utils / Console / Parse.ih
1 // $Id$
2 //
3 // Copyright (C) 2008 
4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
5 // Competence Center NETwork research (NET), St. Augustin, GERMANY
6 //     Stefan Bund <g0dil@berlios.de>
7 //
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 2 of the License, or
11 // (at your option) any later version.
12 //
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 // GNU General Public License for more details.
17 //
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the
20 // Free Software Foundation, Inc.,
21 // 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22
23 /** \file
24     \brief Parse internal header */
25
26 #ifndef IH_SENF_Scheduler_Console_Parse_
27 #define IH_SENF_Scheduler_Console_Parse_ 1
28
29 // Custom includes
30 #include <vector>
31 #include "../../config.hh"
32 #include <boost/spirit.hpp>
33 #include <boost/spirit/utility/grammar_def.hpp>
34 #include <boost/spirit/dynamic.hpp>
35 #include <boost/spirit/phoenix.hpp>
36 #include "../../Utils/Phoenix.hh"
37
38 ///////////////////////////////ih.p////////////////////////////////////////
39
40 namespace senf {
41 namespace console {
42 namespace detail {
43
44 #ifndef DOXYGEN
45
46     struct FilePositionWithIndex 
47         : public boost::spirit::file_position
48     {
49         int index;
50         
51         FilePositionWithIndex(std::string const & file_ = std::string(),
52                                  int line_ = 1, int column_ = 1, int index_ = 0)
53             : boost::spirit::file_position (file_, line_, column_), index (index_)
54             {}
55
56         bool operator==(const FilePositionWithIndex & fp) const
57             {
58                 return boost::spirit::file_position::operator==(fp) && index == fp.index; 
59             }
60     };
61
62     struct PositionOf {
63         template <class A1> struct result { typedef FilePositionWithIndex type; };
64         template <class A1> FilePositionWithIndex operator()(A1 & a1) { return a1.get_position(); }
65         FilePositionWithIndex operator()(char const * a1) { return FilePositionWithIndex(); }
66     };
67
68     ::phoenix::function<PositionOf> const positionOf;
69
70     ///////////////////////////////////////////////////////////////////////////
71     // Grammar
72
73     template <class ParseDispatcher>
74     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
75     {
76         ///////////////////////////////////////////////////////////////////////////
77         // Start rules
78
79         enum { CommandParser, SkipParser, ArgumentsParser, PathParser };
80
81         ///////////////////////////////////////////////////////////////////////////
82         // The parse context (variables needed while parsing)
83
84         typedef Token::TokenType TokenType;
85
86         struct Context {
87             std::string str;
88             std::vector<Token> path;
89             char ch;
90             Token token;
91             FilePositionWithIndex pos;
92         };
93
94         Context & context;
95
96         ///////////////////////////////////////////////////////////////////////////
97         // Configuration
98
99         bool incremental;
100
101         ///////////////////////////////////////////////////////////////////////////
102         // Dispatching semantic actions
103
104         ParseDispatcher & dispatcher;
105
106         //////////////////////////////////////////////////////////////////////////
107         // charachter sets
108         
109         static boost::spirit::chset<> special_p;
110         static boost::spirit::chset<> punctuation_p;
111         static boost::spirit::chset<> space_p;
112         static boost::spirit::chset<> invalid_p;
113         static boost::spirit::chset<> word_p;
114         static boost::spirit::distinct_parser<> keyword_p;
115
116         ///////////////////////////////////////////////////////////////////////////
117         // Errors
118
119         enum Errors {
120             EndOfStatementExpected,
121             PathExpected,
122             ClosingParenExpected,
123             QuoteExpected
124         };
125
126         ///////////////////////////////////////////////////////////////////////////
127
128         CommandGrammar(ParseDispatcher & d, Context & c) 
129             : context(c), incremental(false), dispatcher(d) {}
130
131         template <class Scanner>
132         struct definition 
133             : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
134                                                  boost::spirit::rule<Scanner>,
135                                                  boost::spirit::rule<Scanner>,
136                                                  boost::spirit::rule<Scanner> >
137         {
138             boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
139                 punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
140                 skip, statement, relpath, abspath, arguments, group_start, group_close, 
141                 statement_end, opt_path;
142
143             definition(CommandGrammar const & self)
144             {
145                 using namespace boost::spirit;
146                 using namespace ::phoenix;
147                 using namespace senf::phoenix;
148                 typedef ParseDispatcher PD;
149
150                 actor< variable< char > >                  ch_    (self.context.ch);
151                 actor< variable< std::string > >           str_   (self.context.str);
152                 actor< variable< std::vector<Token> > >    path_  (self.context.path);
153                 actor< variable< Token > >                 token_ (self.context.token);
154                 actor< variable< FilePositionWithIndex > > pos_   (self.context.pos);
155                 actor< variable< ParseDispatcher > >       d_     (self.dispatcher);
156
157                 assertion<Errors> end_of_statement_expected   (EndOfStatementExpected);
158                 assertion<Errors> path_expected               (PathExpected);
159                 assertion<Errors> closing_paren_expected      (ClosingParenExpected);
160                 assertion<Errors> quote_expected              (QuoteExpected);
161
162                 ///////////////////////////////////////////////////////////////////
163                 // Spirit grammar
164                 //
165                 // Syntax summary:
166                 // This is EBNF with some minor tweaks to accommodate C++ syntax
167                 //
168                 //   * a        any number of a's
169                 //   + a        at least one a
170                 //   ! a        an optional a
171                 //   a >> b     a followed by b
172                 //   a | b      a or b
173                 //   a % b      any number of a's separated by b's
174                 //   a - b      a but not b
175                 //
176                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
177                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
178                 // quite readable.
179                 //   
180                 //   ch_p             match character
181                 //   eps_p            always matches nothing (to attach unconditional actions)
182                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
183                 //                    string literals and comments
184                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
185                 //                    literal string escape char, however \x will be replaced by 'x'
186                 //                    for any char 'x' if it has no special meaning.
187                 //   keyword_p        match a delimited keyword
188                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
189                 //                    defaults to end-of-line
190                 //
191                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
192                 //
193                 // Aligned to the right at column 50 are semantic actions.
194                 //
195                 // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
196                 // in most cases.
197                 //
198                 // More info is in the Boost.Spirit documentation
199
200                 command 
201                     =    builtin >> end_of_statement_expected(statement_end)
202                     |    group_close
203                     |    ch_p(';') // Ignore empty commands
204                     |    statement
205                     ;
206
207                 statement
208                     =    path_expected(path)      [ bind(&PD::beginCommand)(d_, path_) ]
209                       >> arguments
210                       >> end_of_statement_expected( 
211                            ( group_start | statement_end )
212                                                   [ bind(&PD::endCommand)(d_) ]
213                          )
214                     ;
215
216                 builtin
217                     =    self.keyword_p("cd") 
218                       >> path_expected(path)
219                       >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
220                     |    self.keyword_p("ls")
221                       >> ! path
222                       >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
223                     |    self.keyword_p("ll")
224                       >> ! path
225                       >> eps_p                    [ bind(&PD::builtin_ll)(d_, path_) ]
226                     |    self.keyword_p("lr")
227                       >> ! path
228                       >> eps_p                    [ bind(&PD::builtin_lr)(d_, path_) ]
229                     |    self.keyword_p("exit")   [ bind(&PD::builtin_exit)(d_) ]
230                     |    self.keyword_p("help")
231                       >> ! path
232                       >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
233                     ;
234
235                 group_start
236                     =    ch_p('{')                [ bind(&PD::pushDirectory)(d_) ]
237                     ;
238
239                 group_close
240                     =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
241                     ;
242
243                 arguments
244                     =    * argument
245                     ;
246
247                 argument
248                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
249                     |    balanced_tokens
250                     ;
251                 
252                 simple_argument         // All these return their value in context.token
253                     =    string
254                     |    hexstring
255                     |    word
256                     ;
257                 
258                 string                  // Returns value in context.token
259                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
260                       >> lexeme_d
261                          [
262                              ch_p('"')
263                           >> * ( ( lex_escape_ch_p[ ch_ = arg1 ] 
264                                    - '"' 
265                                  )                [ str_ += ch_ ]
266                                )
267                           >> quote_expected(ch_p('"'))
268                                                   [ token_ = construct_<Token>(Token::BasicString, 
269                                                                                str_,
270                                                                                pos_) ]
271                          ]
272                     ;
273
274                 hexstring               // Returns value in context.token
275                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
276                       >>  "x\""
277                       >> * ( hexbyte - ch_p('"') )
278                       >> quote_expected(ch_p('"'))
279                                                   [ token_ = construct_<Token>(Token::HexString,
280                                                                                str_,
281                                                                                pos_) ]
282                     ;
283                 
284                 opt_path
285                     = ! path                      [ bind(&PD::beginCommand)(d_, path_) ]
286                                                   [ bind(&PD::endCommand)(d_) ]
287                     ;
288
289                 path                    // Returns value in context.path
290                     =    eps_p                    [ clear(path_) ]
291                       >> relpath | abspath
292                     ;
293
294                 relpath
295                     =    (   word                 [ push_back(path_, token_) ]
296                            % ch_p('/') )
297                       >> ( ! ch_p('/')            [ push_back(path_, construct_<Token>()) ] )
298                     ;
299
300                 abspath
301                     =    ch_p('/')                [ push_back(path_, construct_<Token>()) ]
302                       >> ( relpath
303                          | eps_p                  [ push_back(path_, construct_<Token>()) ] )
304                     ;
305
306                 balanced_tokens 
307                     =    eps_p                    [ pos_ = positionOf(arg1) ]
308                       >> ch_p('(')                [ token_ = construct_<Token>(
309                                                         Token::ArgumentGroupOpen,
310                                                         "(",
311                                                         pos_) ]
312                                                   [ bind(&PD::pushToken)(d_, token_) ]
313                       >> * token
314                       >> eps_p                    [ pos_ = positionOf(arg1) ]
315                       >> closing_paren_expected(ch_p(')'))
316                                                   [ token_ = construct_<Token>(
317                                                         Token::ArgumentGroupClose,
318                                                         ")",
319                                                         pos_) ]
320                                                   [ bind(&PD::pushToken)(d_, token_) ]
321                     ;
322
323                 token
324                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
325                     |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
326                     |    balanced_tokens
327                     ;
328
329                 punctuation             // Returns value in context.str
330                     =    eps_p                      [ pos_ = positionOf(arg1) ]
331                       >> (
332                            ch_p('/')                [ token_ = construct_<Token>(
333                                                           Token::PathSeparator,
334                                                           "/") ]
335                          | ch_p('{')                [ token_ = construct_<Token>(
336                                                           Token::DirectoryGroupOpen,
337                                                           "{") ]
338                          | ch_p('}')                [ token_ = construct_<Token>(
339                                                           Token::DirectoryGroupClose,
340                                                           "}") ]
341                          | ch_p(';')                [ token_ = construct_<Token>(
342                                                           Token::CommandTerminator,
343                                                           ";") ]
344                          | self.punctuation_p       [ token_ = construct_<Token>(
345                                                           Token::OtherPunctuation,
346                                                           construct_<std::string>(1u, arg1),
347                                                           pos_) ]
348                         )
349                     ;
350
351                 word                    // Returns value in context.token
352                     =    eps_p                    [ pos_ = positionOf(arg1) ]
353                       >> lexeme_d
354                          [
355                              (+ self.word_p)      [ str_ = construct_<std::string>(arg1, arg2) ]
356                          ]
357                       >> eps_p                    [ token_ = construct_<Token>(
358                                                         Token::Word, 
359                                                         str_,
360                                                         pos_) ]
361                     ;
362
363                 hexbyte
364                     =    uint_parser<char, 16, 2, 2>()
365                                                   [ push_back(str_, arg1) ]
366                     ;
367
368                 statement_end
369                     =    if_p(var(self.incremental)) [
370                                ch_p(';')
371                          ]
372                          .else_p [
373                                ch_p(';') 
374                              | end_p
375                          ]
376                     ;
377
378                 skip
379                     =    self.space_p | comment_p('#')
380                     ;
381
382                 ///////////////////////////////////////////////////////////////////
383
384                 start_parsers(
385                     command,            // CommandParser
386                     skip,               // SkipParser
387                     arguments,          // ArgumentsParser
388                     opt_path            // PathParser
389                 );
390
391                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
392                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
393                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
394                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
395                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
396                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
397                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
398                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
399                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
400                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
401                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
402                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
403                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
404                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
405                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
406                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
407                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
408                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
409             }
410         };
411     };
412
413     template <class PD> boost::spirit::chset<> CommandGrammar<PD>::special_p (
414         "/(){};\"");
415     template <class PD> boost::spirit::chset<> CommandGrammar<PD>::punctuation_p (
416         ",=");
417     template <class PD> boost::spirit::chset<> CommandGrammar<PD>::space_p (
418         " \t\n\r");
419     template <class PD> boost::spirit::chset<> CommandGrammar<PD>::invalid_p ( 
420         (boost::spirit::chset<>('\0') | boost::spirit::chset<>("\x01-\x20")) - space_p );
421     template <class PD> boost::spirit::chset<> CommandGrammar<PD>::word_p (
422         boost::spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p);
423     template <class PD> boost::spirit::distinct_parser<> CommandGrammar<PD>::keyword_p (
424         word_p | boost::spirit::ch_p('/'));
425
426 #endif
427
428 }}}
429
430 ///////////////////////////////ih.e////////////////////////////////////////
431 #endif
432
433 \f
434 // Local Variables:
435 // mode: c++
436 // fill-column: 100
437 // comment-column: 40
438 // c-file-style: "senf"
439 // indent-tabs-mode: nil
440 // ispell-local-dictionary: "american"
441 // compile-command: "scons -u test"
442 // End: