5fc5c5c849fc805578ae5d11dc75788219ed8d33
[senf.git] / senf / Utils / Console / Parse.ih
1 // $Id$
2 //
3 // Copyright (C) 2008
4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
5 //
6 // The contents of this file are subject to the Fraunhofer FOKUS Public License
7 // Version 1.0 (the "License"); you may not use this file except in compliance
8 // with the License. You may obtain a copy of the License at 
9 // http://senf.berlios.de/license.html
10 //
11 // The Fraunhofer FOKUS Public License Version 1.0 is based on, 
12 // but modifies the Mozilla Public License Version 1.1.
13 // See the full license text for the amendments.
14 //
15 // Software distributed under the License is distributed on an "AS IS" basis, 
16 // WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 
17 // for the specific language governing rights and limitations under the License.
18 //
19 // The Original Code is Fraunhofer FOKUS code.
20 //
21 // The Initial Developer of the Original Code is Fraunhofer-Gesellschaft e.V. 
22 // (registered association), Hansastraße 27 c, 80686 Munich, Germany.
23 // All Rights Reserved.
24 //
25 // Contributor(s):
26 //   Stefan Bund <g0dil@berlios.de>
27
28 /** \file
29     \brief Parse internal header */
30
31 #ifndef IH_SENF_Scheduler_Console_Parse_
32 #define IH_SENF_Scheduler_Console_Parse_ 1
33
34 // Custom includes
35 #include <vector>
36 #include <senf/config.hh>
37
38 #if HAVE_BOOST_SPIRIT_INCLUDE_CLASSIC_HPP
39 #  include <boost/spirit/include/classic.hpp>
40 #  include <boost/spirit/include/classic_grammar_def.hpp>
41 #  include <boost/spirit/include/classic_dynamic.hpp>
42 #  include <boost/spirit/include/phoenix1.hpp>
43 #else
44 #  include <boost/spirit.hpp>
45 #  include <boost/spirit/utility/grammar_def.hpp>
46 #  include <boost/spirit/dynamic.hpp>
47 #  include <boost/spirit/phoenix.hpp>
48 #endif
49
50 #include <senf/Utils/Phoenix.hh>
51
52 //-/////////////////////////////////////////////////////////////////////////////////////////////////
53
54 namespace senf {
55 namespace console {
56 namespace detail {
57
58 #if HAVE_BOOST_SPIRIT_INCLUDE_CLASSIC_HPP
59     namespace boost_spirit = ::boost::spirit::classic;
60 #else
61     namespace boost_spirit = ::boost::spirit;
62 #endif
63
64 #ifndef DOXYGEN
65
66     struct FilePositionWithIndex
67         : public boost_spirit::file_position
68     {
69         int index;
70
71         FilePositionWithIndex(std::string const & file_ = std::string(),
72                                  int line_ = 1, int column_ = 1, int index_ = 0)
73             : boost_spirit::file_position (file_, line_, column_), index (index_)
74             {}
75
76         bool operator==(const FilePositionWithIndex & fp) const
77             {
78                 return boost_spirit::file_position::operator==(fp) && index == fp.index;
79             }
80     };
81
82     struct PositionOf {
83         template <class A1> struct result { typedef FilePositionWithIndex type; };
84         template <class A1> FilePositionWithIndex operator()(A1 & a1) { return a1.get_position(); }
85         FilePositionWithIndex operator()(char const * a1) { return FilePositionWithIndex(); }
86     };
87
88     ::phoenix::function<PositionOf> const positionOf;
89
90     //-/////////////////////////////////////////////////////////////////////////////////////////////
91     // Grammar
92
93     template <class ParseDispatcher>
94     struct CommandGrammar : boost_spirit::grammar<CommandGrammar<ParseDispatcher> >
95     {
96         //-/////////////////////////////////////////////////////////////////////////////////////////
97         // Start rules
98
99         enum { CommandParser, SkipParser, ArgumentsParser, PathParser };
100
101         //-/////////////////////////////////////////////////////////////////////////////////////////
102         // The parse context (variables needed while parsing)
103
104         typedef Token::TokenType TokenType;
105
106         struct Context {
107             std::string str;
108             std::vector<Token> path;
109             char ch;
110             Token token;
111             FilePositionWithIndex pos;
112         };
113
114         Context & context;
115
116         //-/////////////////////////////////////////////////////////////////////////////////////////
117         // Configuration
118
119         bool incremental;
120
121         //-/////////////////////////////////////////////////////////////////////////////////////////
122         // Dispatching semantic actions
123
124         ParseDispatcher & dispatcher;
125
126         //-/////////////////////////////////////////////////////////////////////////////////////////
127         // charachter sets
128
129         static boost_spirit::chset<> special_p;
130         static boost_spirit::chset<> punctuation_p;
131         static boost_spirit::chset<> space_p;
132         static boost_spirit::chset<> invalid_p;
133         static boost_spirit::chset<> word_p;
134         static boost_spirit::distinct_parser<> keyword_p;
135
136         //-/////////////////////////////////////////////////////////////////////////////////////////
137         // Errors
138
139         enum Errors {
140             EndOfStatementExpected,
141             PathExpected,
142             ClosingParenExpected,
143             QuoteExpected
144         };
145
146         //-/////////////////////////////////////////////////////////////////////////////////////////
147
148         CommandGrammar(ParseDispatcher & d, Context & c)
149             : context(c), incremental(false), dispatcher(d) {}
150
151         template <class Scanner>
152         struct definition
153             : public boost_spirit::grammar_def< boost_spirit::rule<Scanner>,
154                                                  boost_spirit::rule<Scanner>,
155                                                  boost_spirit::rule<Scanner>,
156                                                  boost_spirit::rule<Scanner> >
157         {
158             boost_spirit::rule<Scanner> command, path, argument, word, string, hexstring,
159                 word_or_string, token, punctuation, hexbyte, balanced_tokens, simple_argument,
160                 complex_argument, builtin, skip, statement, relpath, abspath, arguments,
161                 group_start, group_close, statement_end, opt_path;
162
163             definition(CommandGrammar const & self)
164             {
165                 using namespace boost_spirit;
166                 using namespace ::phoenix;
167                 using namespace senf::phoenix;
168                 typedef ParseDispatcher PD;
169
170                 actor< variable< char > >                  ch_    (self.context.ch);
171                 actor< variable< std::string > >           str_   (self.context.str);
172                 actor< variable< std::vector<Token> > >    path_  (self.context.path);
173                 actor< variable< Token > >                 token_ (self.context.token);
174                 actor< variable< FilePositionWithIndex > > pos_   (self.context.pos);
175                 actor< variable< ParseDispatcher > >       d_     (self.dispatcher);
176
177                 assertion<Errors> end_of_statement_expected   (EndOfStatementExpected);
178                 assertion<Errors> path_expected               (PathExpected);
179                 assertion<Errors> closing_paren_expected      (ClosingParenExpected);
180                 assertion<Errors> quote_expected              (QuoteExpected);
181
182                 //-/////////////////////////////////////////////////////////////////////////////////
183                 // Spirit grammar
184                 //
185                 // Syntax summary:
186                 // This is EBNF with some minor tweaks to accommodate C++ syntax
187                 //
188                 //   * a        any number of a's
189                 //   + a        at least one a
190                 //   ! a        an optional a
191                 //   a >> b     a followed by b
192                 //   a | b      a or b
193                 //   a % b      any number of a's separated by b's
194                 //   a - b      a but not b
195                 //
196                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
197                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
198                 // quite readable.
199                 //
200                 //   ch_p             match character
201                 //   eps_p            always matches nothing (to attach unconditional actions)
202                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
203                 //                    string literals and comments
204                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
205                 //                    literal string escape char, however \x will be replaced by 'x'
206                 //                    for any char 'x' if it has no special meaning.
207                 //   keyword_p        match a delimited keyword
208                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
209                 //                    defaults to end-of-line
210                 //
211                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
212                 //
213                 // Aligned to the right at column 50 are semantic actions.
214                 //
215                 // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
216                 // in most cases.
217                 //
218                 // More info is in the Boost.Spirit documentation
219
220                 command
221                     =    builtin >> end_of_statement_expected(statement_end)
222                     |    group_close
223                     |    ch_p(';') // Ignore empty commands
224                     |    statement
225                     ;
226
227                 statement
228                     =    path_expected(path)      [ bind(&PD::beginCommand)(d_, path_) ]
229                       >> arguments
230                       >> end_of_statement_expected(
231                            ( group_start | statement_end )
232                                                   [ bind(&PD::endCommand)(d_) ]
233                          )
234                     ;
235
236                 builtin
237                     =    self.keyword_p("cd")
238                       >> path_expected(path)
239                       >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
240                     |    self.keyword_p("ls")
241                       >> ! path
242                       >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
243                     |    self.keyword_p("ll")
244                       >> ! path
245                       >> eps_p                    [ bind(&PD::builtin_ll)(d_, path_) ]
246                     |    self.keyword_p("lr")
247                       >> ! path
248                       >> eps_p                    [ bind(&PD::builtin_lr)(d_, path_) ]
249                     |    self.keyword_p("exit")   [ bind(&PD::builtin_exit)(d_) ]
250                     |    self.keyword_p("help")
251                       >> ! path
252                       >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
253                     ;
254
255                 group_start
256                     =    ch_p('{')                [ bind(&PD::pushDirectory)(d_) ]
257                     ;
258
259                 group_close
260                     =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
261                     ;
262
263                 arguments
264                     =    * argument
265                     ;
266
267                 argument
268                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
269                     |    balanced_tokens
270                     ;
271
272                 simple_argument         // All these return their value in context.token
273                     =    string
274                     |    hexstring
275                     |    word
276                     ;
277
278                 string                  // Returns value in context.token
279                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
280                       >> lexeme_d
281                          [
282                              ch_p('"')
283                           >> * ( ( lex_escape_ch_p[ ch_ = arg1 ]
284                                    - '"'
285                                  )                [ str_ += ch_ ]
286                                )
287                           >> quote_expected(ch_p('"'))
288                                                   [ token_ = construct_<Token>(Token::BasicString,
289                                                                                str_,
290                                                                                pos_) ]
291                          ]
292                     ;
293
294                 hexstring               // Returns value in context.token
295                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
296                       >>  "x\""
297                       >> * ( hexbyte - ch_p('"') )
298                       >> quote_expected(ch_p('"'))
299                                                   [ token_ = construct_<Token>(Token::HexString,
300                                                                                str_,
301                                                                                pos_) ]
302                     ;
303
304                 opt_path
305                     = ! path                      [ bind(&PD::beginCommand)(d_, path_) ]
306                                                   [ bind(&PD::endCommand)(d_) ]
307                     ;
308
309                 path                    // Returns value in context.path
310                     =    eps_p                    [ clear(path_) ]
311                       >> relpath | abspath
312                     ;
313
314                 relpath
315                     =    (   word_or_string       [ push_back(path_, token_) ]
316                            % +ch_p('/') )
317                       >> ( ! (+ch_p('/') )        [ push_back(path_, construct_<Token>()) ] )
318                     ;
319
320                 abspath
321                     =    (+ch_p('/'))             [ push_back(path_, construct_<Token>()) ]
322                       >> ( relpath
323                          | eps_p                  [ push_back(path_, construct_<Token>()) ] )
324                     ;
325
326                 balanced_tokens
327                     =    eps_p                    [ pos_ = positionOf(arg1) ]
328                       >> ch_p('(')                [ token_ = construct_<Token>(
329                                                         Token::ArgumentGroupOpen,
330                                                         "(",
331                                                         pos_) ]
332                                                   [ bind(&PD::pushToken)(d_, token_) ]
333                       >> * token
334                       >> eps_p                    [ pos_ = positionOf(arg1) ]
335                       >> closing_paren_expected(ch_p(')'))
336                                                   [ token_ = construct_<Token>(
337                                                         Token::ArgumentGroupClose,
338                                                         ")",
339                                                         pos_) ]
340                                                   [ bind(&PD::pushToken)(d_, token_) ]
341                     ;
342
343                 token
344                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
345                     |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
346                     |    balanced_tokens
347                     ;
348
349                 punctuation             // Returns value in context.str
350                     =    eps_p                      [ pos_ = positionOf(arg1) ]
351                       >> (
352                            ch_p('/')                [ token_ = construct_<Token>(
353                                                           Token::PathSeparator,
354                                                           "/") ]
355                          | ch_p('{')                [ token_ = construct_<Token>(
356                                                           Token::DirectoryGroupOpen,
357                                                           "{") ]
358                          | ch_p('}')                [ token_ = construct_<Token>(
359                                                           Token::DirectoryGroupClose,
360                                                           "}") ]
361                          | ch_p(';')                [ token_ = construct_<Token>(
362                                                           Token::CommandTerminator,
363                                                           ";") ]
364                          | self.punctuation_p       [ token_ = construct_<Token>(
365                                                           Token::OtherPunctuation,
366                                                           construct_<std::string>(1u, arg1),
367                                                           pos_) ]
368                         )
369                     ;
370
371                 word                    // Returns value in context.token
372                     =    eps_p                    [ pos_ = positionOf(arg1) ]
373                       >> lexeme_d
374                          [
375                              (+ self.word_p)      [ str_ = construct_<std::string>(arg1, arg2) ]
376                          ]
377                       >> eps_p                    [ token_ = construct_<Token>(
378                                                         Token::Word,
379                                                         str_,
380                                                         pos_) ]
381                     ;
382
383                 word_or_string
384                     =    word
385                     |    string
386                     ;
387
388                 hexbyte
389                     =    uint_parser<char, 16, 2, 2>()
390                                                   [ push_back(str_, arg1) ]
391                     ;
392
393                 statement_end
394                     =    if_p(var(self.incremental)) [
395                                ch_p(';')
396                          ]
397                          .else_p [
398                                ch_p(';')
399                              | end_p
400                          ]
401                     ;
402
403                 skip
404                     =    self.space_p | comment_p('#')
405                     ;
406
407                 //-/////////////////////////////////////////////////////////////////////////////////
408
409                 start_parsers(
410                     command,            // CommandParser
411                     skip,               // SkipParser
412                     arguments,          // ArgumentsParser
413                     opt_path            // PathParser
414                 );
415
416                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
417                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
418                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
419                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
420                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
421                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
422                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
423                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
424                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
425                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
426                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
427                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
428                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
429                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
430                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
431                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
432                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
433                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
434             }
435         };
436     };
437
438     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::special_p (
439         "/(){};\"");
440     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::punctuation_p (
441         ",=");
442     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::space_p (
443         " \t\n\r");
444     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::invalid_p (
445         (boost_spirit::chset<>('\0') | boost_spirit::chset<>("\x01-\x20")) - space_p );
446     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::word_p (
447         boost_spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p);
448     template <class PD> boost_spirit::distinct_parser<> CommandGrammar<PD>::keyword_p (
449         word_p | boost_spirit::ch_p('/'));
450
451 #endif
452
453 }}}
454
455 //-/////////////////////////////////////////////////////////////////////////////////////////////////
456 #endif
457
458 \f
459 // Local Variables:
460 // mode: c++
461 // fill-column: 100
462 // comment-column: 40
463 // c-file-style: "senf"
464 // indent-tabs-mode: nil
465 // ispell-local-dictionary: "american"
466 // compile-command: "scons -u test"
467 // End: