senf/Utils/Console/Parse.ih

   1 // $Id$
   2 //
   3 // Copyright (C) 2008
   4 // Fraunhofer Institute for Open Communication Systems (FOKUS)
   5 //
   6 // The contents of this file are subject to the Fraunhofer FOKUS Public License
   7 // Version 1.0 (the "License"); you may not use this file except in compliance
   8 // with the License. You may obtain a copy of the License at
   9 // http://senf.berlios.de/license.html
  10 //
  11 // The Fraunhofer FOKUS Public License Version 1.0 is based on,
  12 // but modifies the Mozilla Public License Version 1.1.
  13 // See the full license text for the amendments.
  14 //
  15 // Software distributed under the License is distributed on an "AS IS" basis,
  16 // WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  17 // for the specific language governing rights and limitations under the License.
  18 //
  19 // The Original Code is Fraunhofer FOKUS code.
  20 //
  21 // The Initial Developer of the Original Code is Fraunhofer-Gesellschaft e.V.
  22 // (registered association), Hansastraße 27 c, 80686 Munich, Germany.
  23 // All Rights Reserved.
  24 //
  25 // Contributor(s):
  26 //   Stefan Bund <g0dil@berlios.de>
  27
  28 /** \file
  29     \brief Parse internal header */
  30
  31 #ifndef IH_SENF_Scheduler_Console_Parse_
  32 #define IH_SENF_Scheduler_Console_Parse_ 1
  33
  34 // Custom includes
  35 #include <vector>
  36 #include <senf/config.hh>
  37
  38 #if HAVE_BOOST_SPIRIT_INCLUDE_CLASSIC_HPP
  39 #  include <boost/spirit/include/classic.hpp>
  40 #  include <boost/spirit/include/classic_grammar_def.hpp>
  41 #  include <boost/spirit/include/classic_dynamic.hpp>
  42 #  include <boost/spirit/include/phoenix1.hpp>
  43 #else
  44 #  include <boost/spirit.hpp>
  45 #  include <boost/spirit/utility/grammar_def.hpp>
  46 #  include <boost/spirit/dynamic.hpp>
  47 #  include <boost/spirit/phoenix.hpp>
  48 #endif
  49
  50 #include <senf/Utils/Phoenix.hh>
  51
  52 //-/////////////////////////////////////////////////////////////////////////////////////////////////
  53
  54 namespace senf {
  55 namespace console {
  56 namespace detail {
  57
  58 #if HAVE_BOOST_SPIRIT_INCLUDE_CLASSIC_HPP
  59     namespace boost_spirit = ::boost::spirit::classic;
  60 #else
  61     namespace boost_spirit = ::boost::spirit;
  62 #endif
  63
  64 #ifndef DOXYGEN
  65
  66     struct FilePositionWithIndex
  67         : public boost_spirit::file_position
  68     {
  69         int index;
  70
  71         FilePositionWithIndex(std::string const & file_ = std::string(),
  72                                  int line_ = 1, int column_ = 1, int index_ = 0)
  73             : boost_spirit::file_position (file_, line_, column_), index (index_)
  74             {}
  75
  76         bool operator==(const FilePositionWithIndex & fp) const
  77             {
  78                 return boost_spirit::file_position::operator==(fp) && index == fp.index;
  79             }
  80     };
  81
  82     struct PositionOf {
  83         template <class A1> struct result { typedef FilePositionWithIndex type; };
  84         template <class A1> FilePositionWithIndex operator()(A1 & a1) { return a1.get_position(); }
  85         FilePositionWithIndex operator()(char const * a1) { return FilePositionWithIndex(); }
  86     };
  87
  88     ::phoenix::function<PositionOf> const positionOf;
  89
  90     //-/////////////////////////////////////////////////////////////////////////////////////////////
  91     // Grammar
  92
  93     template <class ParseDispatcher>
  94     struct CommandGrammar : boost_spirit::grammar<CommandGrammar<ParseDispatcher> >
  95     {
  96         //-/////////////////////////////////////////////////////////////////////////////////////////
  97         // Start rules
  98
  99         enum { CommandParser, SkipParser, ArgumentsParser, PathParser };
 100
 101         //-/////////////////////////////////////////////////////////////////////////////////////////
 102         // The parse context (variables needed while parsing)
 103
 104         typedef Token::TokenType TokenType;
 105
 106         struct Context {
 107             std::string str;
 108             std::vector<Token> path;
 109             char ch;
 110             Token token;
 111             FilePositionWithIndex pos;
 112         };
 113
 114         Context & context;
 115
 116         //-/////////////////////////////////////////////////////////////////////////////////////////
 117         // Configuration
 118
 119         bool incremental;
 120
 121         //-/////////////////////////////////////////////////////////////////////////////////////////
 122         // Dispatching semantic actions
 123
 124         ParseDispatcher & dispatcher;
 125
 126         //-/////////////////////////////////////////////////////////////////////////////////////////
 127         // charachter sets
 128
 129         static boost_spirit::chset<> special_p;
 130         static boost_spirit::chset<> punctuation_p;
 131         static boost_spirit::chset<> space_p;
 132         static boost_spirit::chset<> invalid_p;
 133         static boost_spirit::chset<> word_p;
 134         static boost_spirit::distinct_parser<> keyword_p;
 135
 136         //-/////////////////////////////////////////////////////////////////////////////////////////
 137         // Errors
 138
 139         enum Errors {
 140             EndOfStatementExpected,
 141             PathExpected,
 142             ClosingParenExpected,
 143             QuoteExpected
 144         };
 145
 146         //-/////////////////////////////////////////////////////////////////////////////////////////
 147
 148         CommandGrammar(ParseDispatcher & d, Context & c)
 149             : context(c), incremental(false), dispatcher(d) {}
 150
 151         template <class Scanner>
 152         struct definition
 153             : public boost_spirit::grammar_def< boost_spirit::rule<Scanner>,
 154                                                  boost_spirit::rule<Scanner>,
 155                                                  boost_spirit::rule<Scanner>,
 156                                                  boost_spirit::rule<Scanner> >
 157         {
 158             boost_spirit::rule<Scanner> command, path, argument, word, string, hexstring,
 159                 word_or_string, token, punctuation, hexbyte, balanced_tokens, simple_argument,
 160                 complex_argument, builtin, skip, statement, relpath, abspath, arguments,
 161                 group_start, group_close, statement_end, opt_path;
 162
 163             definition(CommandGrammar const & self)
 164             {
 165                 using namespace boost_spirit;
 166                 using namespace ::phoenix;
 167                 using namespace senf::phoenix;
 168                 typedef ParseDispatcher PD;
 169
 170                 actor< variable< char > >                  ch_    (self.context.ch);
 171                 actor< variable< std::string > >           str_   (self.context.str);
 172                 actor< variable< std::vector<Token> > >    path_  (self.context.path);
 173                 actor< variable< Token > >                 token_ (self.context.token);
 174                 actor< variable< FilePositionWithIndex > > pos_   (self.context.pos);
 175                 actor< variable< ParseDispatcher > >       d_     (self.dispatcher);
 176
 177                 assertion<Errors> end_of_statement_expected   (EndOfStatementExpected);
 178                 assertion<Errors> path_expected               (PathExpected);
 179                 assertion<Errors> closing_paren_expected      (ClosingParenExpected);
 180                 assertion<Errors> quote_expected              (QuoteExpected);
 181
 182                 //-/////////////////////////////////////////////////////////////////////////////////
 183                 // Spirit grammar
 184                 //
 185                 // Syntax summary:
 186                 // This is EBNF with some minor tweaks to accommodate C++ syntax
 187                 //
 188                 //   * a        any number of a's
 189                 //   + a        at least one a
 190                 //   ! a        an optional a
 191                 //   a >> b     a followed by b
 192                 //   a | b      a or b
 193                 //   a % b      any number of a's separated by b's
 194                 //   a - b      a but not b
 195                 //
 196                 // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
 197                 // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
 198                 // quite readable.
 199                 //
 200                 //   ch_p             match character
 201                 //   eps_p            always matches nothing (to attach unconditional actions)
 202                 //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
 203                 //                    string literals and comments
 204                 //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
 205                 //                    literal string escape char, however \x will be replaced by 'x'
 206                 //                    for any char 'x' if it has no special meaning.
 207                 //   keyword_p        match a delimited keyword
 208                 //   comment_p(a,b)   match comment starting with a and terminated with b. b
 209                 //                    defaults to end-of-line
 210                 //
 211                 //   lexeme_d         don't skip whitespace (as defined by the skip parser)
 212                 //
 213                 // Aligned to the right at column 50 are semantic actions.
 214                 //
 215                 // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
 216                 // in most cases.
 217                 //
 218                 // More info is in the Boost.Spirit documentation
 219
 220                 command
 221                     =    builtin >> end_of_statement_expected(statement_end)
 222                     |    group_close
 223                     |    ch_p(';') // Ignore empty commands
 224                     |    statement
 225                     ;
 226
 227                 statement
 228                     =    path_expected(path)      [ bind(&PD::beginCommand)(d_, path_) ]
 229                       >> arguments
 230                       >> end_of_statement_expected(
 231                            ( group_start | statement_end )
 232                                                   [ bind(&PD::endCommand)(d_) ]
 233                          )
 234                     ;
 235
 236                 builtin
 237                     =    self.keyword_p("cd")
 238                       >> path_expected(path)
 239                       >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
 240                     |    self.keyword_p("ls")
 241                       >> ! path
 242                       >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
 243                     |    self.keyword_p("ll")
 244                       >> ! path
 245                       >> eps_p                    [ bind(&PD::builtin_ll)(d_, path_) ]
 246                     |    self.keyword_p("lr")
 247                       >> ! path
 248                       >> eps_p                    [ bind(&PD::builtin_lr)(d_, path_) ]
 249                     |    self.keyword_p("exit")   [ bind(&PD::builtin_exit)(d_) ]
 250                     |    self.keyword_p("help")
 251                       >> ! path
 252                       >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
 253                     ;
 254
 255                 group_start
 256                     =    ch_p('{')                [ bind(&PD::pushDirectory)(d_) ]
 257                     ;
 258
 259                 group_close
 260                     =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
 261                     ;
 262
 263                 arguments
 264                     =    * argument
 265                     ;
 266
 267                 argument
 268                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
 269                     |    balanced_tokens
 270                     ;
 271
 272                 simple_argument         // All these return their value in context.token
 273                     =    string
 274                     |    hexstring
 275                     |    word
 276                     ;
 277
 278                 string                  // Returns value in context.token
 279                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
 280                       >> lexeme_d
 281                          [
 282                              ch_p('"')
 283                           >> * ( ( lex_escape_ch_p[ ch_ = arg1 ]
 284                                    - '"'
 285                                  )                [ str_ += ch_ ]
 286                                )
 287                           >> quote_expected(ch_p('"'))
 288                                                   [ token_ = construct_<Token>(Token::BasicString,
 289                                                                                str_,
 290                                                                                pos_) ]
 291                          ]
 292                     ;
 293
 294                 hexstring               // Returns value in context.token
 295                     =    eps_p                    [ pos_ = positionOf(arg1) ][ clear(str_) ]
 296                       >>  "x\""
 297                       >> * ( hexbyte - ch_p('"') )
 298                       >> quote_expected(ch_p('"'))
 299                                                   [ token_ = construct_<Token>(Token::HexString,
 300                                                                                str_,
 301                                                                                pos_) ]
 302                     ;
 303
 304                 opt_path
 305                     = ! path                      [ bind(&PD::beginCommand)(d_, path_) ]
 306                                                   [ bind(&PD::endCommand)(d_) ]
 307                     ;
 308
 309                 path                    // Returns value in context.path
 310                     =    eps_p                    [ clear(path_) ]
 311                       >> relpath | abspath
 312                     ;
 313
 314                 relpath
 315                     =    (   word_or_string       [ push_back(path_, token_) ]
 316                            % +ch_p('/') )
 317                       >> ( ! (+ch_p('/') )        [ push_back(path_, construct_<Token>()) ] )
 318                     ;
 319
 320                 abspath
 321                     =    (+ch_p('/'))             [ push_back(path_, construct_<Token>()) ]
 322                       >> ( relpath
 323                          | eps_p                  [ push_back(path_, construct_<Token>()) ] )
 324                     ;
 325
 326                 balanced_tokens
 327                     =    eps_p                    [ pos_ = positionOf(arg1) ]
 328                       >> ch_p('(')                [ token_ = construct_<Token>(
 329                                                         Token::ArgumentGroupOpen,
 330                                                         "(",
 331                                                         pos_) ]
 332                                                   [ bind(&PD::pushToken)(d_, token_) ]
 333                       >> * token
 334                       >> eps_p                    [ pos_ = positionOf(arg1) ]
 335                       >> closing_paren_expected(ch_p(')'))
 336                                                   [ token_ = construct_<Token>(
 337                                                         Token::ArgumentGroupClose,
 338                                                         ")",
 339                                                         pos_) ]
 340                                                   [ bind(&PD::pushToken)(d_, token_) ]
 341                     ;
 342
 343                 token
 344                     =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
 345                     |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
 346                     |    balanced_tokens
 347                     ;
 348
 349                 punctuation             // Returns value in context.str
 350                     =    eps_p                      [ pos_ = positionOf(arg1) ]
 351                       >> (
 352                            ch_p('/')                [ token_ = construct_<Token>(
 353                                                           Token::PathSeparator,
 354                                                           "/") ]
 355                          | ch_p('{')                [ token_ = construct_<Token>(
 356                                                           Token::DirectoryGroupOpen,
 357                                                           "{") ]
 358                          | ch_p('}')                [ token_ = construct_<Token>(
 359                                                           Token::DirectoryGroupClose,
 360                                                           "}") ]
 361                          | ch_p(';')                [ token_ = construct_<Token>(
 362                                                           Token::CommandTerminator,
 363                                                           ";") ]
 364                          | self.punctuation_p       [ token_ = construct_<Token>(
 365                                                           Token::OtherPunctuation,
 366                                                           construct_<std::string>(1u, arg1),
 367                                                           pos_) ]
 368                         )
 369                     ;
 370
 371                 word                    // Returns value in context.token
 372                     =    eps_p                    [ pos_ = positionOf(arg1) ]
 373                       >> lexeme_d
 374                          [
 375                              (+ self.word_p)      [ str_ = construct_<std::string>(arg1, arg2) ]
 376                          ]
 377                       >> eps_p                    [ token_ = construct_<Token>(
 378                                                         Token::Word,
 379                                                         str_,
 380                                                         pos_) ]
 381                     ;
 382
 383                 word_or_string
 384                     =    word
 385                     |    string
 386                     ;
 387
 388                 hexbyte
 389                     =    uint_parser<char, 16, 2, 2>()
 390                                                   [ push_back(str_, arg1) ]
 391                     ;
 392
 393                 statement_end
 394                     =    if_p(var(self.incremental)) [
 395                                ch_p(';')
 396                          ]
 397                          .else_p [
 398                                ch_p(';')
 399                              | end_p
 400                          ]
 401                     ;
 402
 403                 skip
 404                     =    self.space_p | comment_p('#')
 405                     ;
 406
 407                 //-/////////////////////////////////////////////////////////////////////////////////
 408
 409                 start_parsers(
 410                     command,            // CommandParser
 411                     skip,               // SkipParser
 412                     arguments,          // ArgumentsParser
 413                     opt_path            // PathParser
 414                 );
 415
 416                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
 417                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
 418                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
 419                 BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
 420                 BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
 421                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
 422                 BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
 423                 BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
 424                 BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
 425                 BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
 426                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
 427                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
 428                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
 429                 BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
 430                 BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
 431                 BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
 432                 BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
 433                 BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
 434             }
 435         };
 436     };
 437
 438     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::special_p (
 439         "/(){};\"");
 440     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::punctuation_p (
 441         ",=");
 442     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::space_p (
 443         " \t\n\r");
 444     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::invalid_p (
 445         (boost_spirit::chset<>('\0') | boost_spirit::chset<>("\x01-\x20")) - space_p );
 446     template <class PD> boost_spirit::chset<> CommandGrammar<PD>::word_p (
 447         boost_spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p);
 448     template <class PD> boost_spirit::distinct_parser<> CommandGrammar<PD>::keyword_p (
 449         word_p | boost_spirit::ch_p('/'));
 450
 451 #endif
 452
 453 }}}
 454
 455 //-/////////////////////////////////////////////////////////////////////////////////////////////////
 456 #endif
 457
 458 \f
 459 // Local Variables:
 460 // mode: c++
 461 // fill-column: 100
 462 // comment-column: 40
 463 // c-file-style: "senf"
 464 // indent-tabs-mode: nil
 465 // ispell-local-dictionary: "american"
 466 // compile-command: "scons -u test"
 467 // End: