Utils: Add some spirit/phoenix helpers
[senf.git] / Console / Parse.ih
index 54c2c45..dbdf89e 100644 (file)
 
 // Custom includes
 #include <vector>
-#include <boost/regex.hpp>
 #include <boost/spirit.hpp>
 #include <boost/spirit/utility/grammar_def.hpp>
-#include <boost/spirit/actor.hpp>
-#include <boost/bind.hpp>
-#include <boost/function.hpp>
-#include <boost/ref.hpp>
+#include <boost/spirit/dynamic.hpp>
+#include <boost/spirit/phoenix.hpp>
+#include "../Utils/Phoenix.hh"
 
 ///////////////////////////////ih.p////////////////////////////////////////
 
@@ -42,30 +40,10 @@ namespace senf {
 namespace console {
 namespace detail {
 
-    struct append_action
-    {
-        template <class T, class Value>
-        void act(T & ref, Value const & value) const
-            { ref += T(1, value); }
+#ifndef DOXYGEN
 
-        template <class T, class Iterator>
-        void act(T & ref, Iterator const & f, Iterator const & l) const
-            { ref += T(f,l); }
-    };
-
-    template <class T>
-    inline boost::spirit::ref_value_actor<T, append_action> 
-    append_a(T & ref)
-    {
-        return boost::spirit::ref_value_actor<T, append_action>(ref);
-    }
-    
-    template <class T, class Value>
-    inline boost::spirit::ref_const_ref_actor<T, Value, append_action> 
-    append_a(T & ref, Value const & value)
-    {
-        return boost::spirit::ref_const_ref_actor<T, Value, append_action>(ref, value);
-    }
+    ///////////////////////////////////////////////////////////////////////////
+    // Grammar
 
     template <class ParseDispatcher>
     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
@@ -73,76 +51,61 @@ namespace detail {
         ///////////////////////////////////////////////////////////////////////////
         // Start rules
 
-        enum { CommandParser, SkipParser };
+        enum { CommandParser, SkipParser, ArgumentsParser };
 
         ///////////////////////////////////////////////////////////////////////////
         // The parse context (variables needed while parsing)
 
+        typedef Token::TokenType TokenType;
+
         struct Context {
             std::string str;
-            std::vector<std::string> path;
+            std::vector<Token> path;
             char ch;
+            Token token;
         };
 
         Context & context;
 
         ///////////////////////////////////////////////////////////////////////////
-        // Dispatching semantic actions
+        // Configuration
 
-        ParseDispatcher & dispatcher;
-
-        struct Dispatch_actor
-        {
-            Dispatch_actor(boost::function<void ()> fn_) : fn (fn_) {}
-
-            template <class Value>
-            void operator()(Value const & value) const
-                { fn(); }
+        bool incremental;
 
-            template <class Iterator>
-            void operator()(Iterator const & f, Iterator const & l) const
-                { fn(); }
-
-            boost::function<void ()> fn;
-        };
-        
-        template <class Callback>
-        Dispatch_actor dispatch(Callback cb) const
-            { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher))); }
+        ///////////////////////////////////////////////////////////////////////////
+        // Dispatching semantic actions
 
-        template <class Callback, class Arg>
-        Dispatch_actor dispatch(Callback cb, Arg const & arg) const
-            { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher), arg)); }
+        ParseDispatcher & dispatcher;
 
         ///////////////////////////////////////////////////////////////////////////
 
         CommandGrammar(ParseDispatcher & d, Context & c) 
-            : context(c), dispatcher(d) {}
+            : context(c), incremental(false), dispatcher(d) {}
 
         template <class Scanner>
         struct definition 
             : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
+                                                 boost::spirit::rule<Scanner>,
                                                  boost::spirit::rule<Scanner> >
         {
-            boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token;
-            boost::spirit::rule<Scanner> punctuation, hexbyte, balanced_tokens, simple_argument;
-            boost::spirit::rule<Scanner> complex_argument, builtin, skip;
-
+            boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
+                punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
+                skip, statement, relpath, abspath, arguments, group_start, group_close, 
+                statement_end;
             boost::spirit::chset<> special_p, punctuation_p, space_p, invalid_p, word_p;
-
             boost::spirit::distinct_parser<> keyword_p;
 
             definition(CommandGrammar const & self) : 
 
                 // Characters with a special meaning within the parser
-                // and characters reserved for future extensions
                 special_p ("/(){};"),
 
-                // Characters which are returned as punctuation tokens
+                // Additional characters which are returned as punctuation tokens
+                // (only allowed within '()').
                 punctuation_p (",="),
 
-                // Whitespace characters (we don't want newlines in there)
-                space_p (" \t"),
+                // Whitespace characters
+                space_p (" \t\n\r"),
 
                 // Invalid characters: All chars below \x20 (space) which are not space_p
                 // (don't put a \0 in the chset<> argument *string* ...)
@@ -158,105 +121,215 @@ namespace detail {
 
             {
                 using namespace boost::spirit;
+                using namespace ::phoenix;
+                using namespace senf::phoenix;
                 typedef ParseDispatcher PD;
 
+                actor< variable< char > >               ch_    (self.context.ch);
+                actor< variable< std::string > >        str_   (self.context.str);
+                actor< variable< std::vector<Token> > > path_  (self.context.path);
+                actor< variable< Token > >              token_ (self.context.token);
+                actor< variable< ParseDispatcher > >    d_     (self.dispatcher);
+
+                ///////////////////////////////////////////////////////////////////
+                // Spirit grammar
+                //
+                // Syntax summary:
+                // This is EBNF with some minor tweaks to accommodate C++ syntax
+                //
+                //   * a        any number of a's
+                //   + a        at least one a
+                //   ! a        an optional a
+                //   a >> b     a followed by b
+                //   a | b      a or b
+                //   a % b      any number of a's separated by b's
+                //   a - b      a but not b
+                //
+                // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
+                // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
+                // quite readable.
+                //   
+                //   ch_p             match character
+                //   eps_p            always matches nothing (to attach unconditional actions)
+                //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
+                //                    string literals and comments
+                //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
+                //                    literal string escape char, however \x will be replaced by 'x'
+                //                    for any char 'x' if it has no special meaning.
+                //   keyword_p        match a delimited keyword
+                //   comment_p(a,b)   match comment starting with a and terminated with b. b
+                //                    defaults to end-of-line
+                //
+                //   lexeme_d         don't skip whitespace (as defined by the skip parser)
+                //
+                // Aligned to the right at column 50 are semantic actions.
+                //
+                // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
+                // in most cases.
+                //
+                // More info is in the Boost.Spirit documentation
+
+                command 
+                    =    builtin >> statement_end
+                    |    path >> ( group_start | statement )
+                    |    group_close
+                    |    ch_p(';') // Ignore empty commands
+                    ;
+
                 builtin
                     =    keyword_p("cd") 
                       >> path
-                      >> eps_p                    [ self.dispatch(&PD::builtin_cd,
-                                                                  boost::ref(self.context.path)) ]
-                    |    keyword_p("ls") 
+                      >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
+                    |    keyword_p("ls")
+                      >> ! path
+                      >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
+                    |    keyword_p("exit")        [ bind(&PD::builtin_exit)(d_) ]
+                    
+                    |    keyword_p("help")
                       >> ! path
-                      >> eps_p                    [ self.dispatch(&PD::builtin_cd,
-                                                                  boost::ref(self.context.path)) ]
+                      >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
                     ;
 
-                command 
-                    =    builtin
-                    |    path                     [ self.dispatch(&PD::beginCommand, 
-                                                                  boost::ref(self.context.path)) ]
-                      >> * argument
-                      >> ! ch_p(';')
-                      >> eps_p                    [ self.dispatch(&PD::endCommand) ]
+                group_start
+                    =    ch_p('{')                [ bind(&PD::pushDirectory)(d_, path_) ]
+                    ;
+
+                group_close
+                    =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
+                    ;
+
+                statement
+                    =    eps_p                    [ bind(&PD::beginCommand)(d_, path_) ]
+                      >> arguments
+                      >> statement_end            [ bind(&PD::endCommand)(d_) ]
+                    ;
+
+                arguments
+                    =    * argument
                     ;
 
                 argument
-                    =    simple_argument          [ self.dispatch(&PD::pushArgument, 
-                                                                  boost::ref(self.context.str)) ]
-                    |    complex_argument
+                    =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
+                    |    balanced_tokens
                     ;
                 
-                simple_argument         // All these return their value in context.str
+                simple_argument         // All these return their value in context.token
                     =    string
                     |    hexstring
                     |    word
                     ;
                 
-                complex_argument        // Argument consists of multiple tokens
-                    =    ch_p('(')                [ self.dispatch(&PD::openGroup) ]
-                       >> * token
-                       >> ch_p(')')               [ self.dispatch(&PD::closeGroup) ]
-                    ;
-
-                string                  // Returns value in context.str
-                    =    eps_p                    [ clear_a(self.context.str) ]
+                string                  // Returns value in context.token
+                    =    eps_p                    [ clear(str_) ]
                       >> lexeme_d
                          [
                              ch_p('"')
-                          >> * ( ( lex_escape_ch_p[ assign_a(self.context.ch) ] 
+                          >> * ( ( lex_escape_ch_p[ ch_ = arg1 ] 
                                    - '"' 
-                                 )                [ append_a(self.context.str,
-                                                             self.context.ch) ] 
+                                 )                [ str_ += ch_ ]
                                )
-                          >> ch_p('"')
+                          >> ch_p('"')            [ token_ = construct_<Token>(Token::BasicString, 
+                                                                               str_) ]
                          ]
                     ;
 
-                hexstring               // Returns value in context.str
-                    =    eps_p                    [ clear_a(self.context.str) ]
+                hexstring               // Returns value in context.token
+                    =    eps_p                    [ clear(str_) ]
                       >> confix_p( "x\"", * hexbyte, '"' )
+                                                  [ token_ = construct_<Token>(Token::HexString,
+                                                                               str_) ]
                     ;
 
                 path                    // Returns value in context.path
-                    =    eps_p                    [ clear_a(self.context.path) ]
-                      >> ( ! ch_p('/')            [ push_back_a(self.context.path, "") ] ) 
-                      >> (   word                 [ push_back_a(self.context.path) ] 
+                    =    eps_p                    [ clear(path_) ]
+                      >> relpath | abspath
+                    ;
+
+                relpath
+                    =    (   word                 [ push_back(path_, token_) ]
                            % ch_p('/') )
-                      >> ( ! ch_p('/')            [ push_back_a(self.context.path,"") ] )
+                      >> ( ! ch_p('/')            [ push_back(path_, construct_<Token>()) ] )
+                    ;
+
+                abspath
+                    =    ch_p('/')                [ push_back(path_, construct_<Token>()) ]
+                      >> ( relpath
+                         | eps_p                  [ push_back(path_, construct_<Token>()) ] )
                     ;
 
                 balanced_tokens 
-                    =    ch_p('(')                [ self.dispatch(&PD::pushPunctuation, "(") ]
+                    =    ch_p('(')                [ token_ = construct_<Token>(
+                                                        Token::ArgumentGroupOpen,
+                                                        "(") ]
+                                                  [ bind(&PD::pushToken)(d_, token_) ]
                       >> * token
-                      >> ch_p(')')                [ self.dispatch(&PD::pushPunctuation, ")") ]
+                      >> ch_p(')')                [ token_ = construct_<Token>(
+                                                        Token::ArgumentGroupClose,
+                                                        ")") ]
+                                                  [ bind(&PD::pushToken)(d_, token_) ]
                     ;
 
                 token
-                    =    simple_argument          [ self.dispatch(&PD::pushWord, 
-                                                                  boost::ref(self.context.str)) ]
-                    |    punctuation              [ self.dispatch(&PD::pushPunctuation,
-                                                                  boost::ref(self.context.str)) ]
+                    =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
+                    |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
                     |    balanced_tokens
                     ;
 
                 punctuation             // Returns value in context.str
-                    =    punctuation_p            [ assign_a(self.context.str) ]
+                    =    ch_p('/')                [ token_ = construct_<Token>(
+                                                        Token::PathSeparator,
+                                                        "/") ]
+                    |    ch_p('{')                [ token_ = construct_<Token>(
+                                                        Token::DirectoryGroupOpen,
+                                                        "{") ]
+                    |    ch_p('}')                [ token_ = construct_<Token>(
+                                                        Token::DirectoryGroupClose,
+                                                        "}") ]
+                    |    ch_p(';')                [ token_ = construct_<Token>(
+                                                        Token::CommandTerminator,
+                                                        ";") ]
+                    |    punctuation_p            [ token_ = construct_<Token>(
+                                                        Token::OtherPunctuation,
+                                                        construct_<std::string>(1u, arg1)) ]
                     ;
 
-                word                    // Returns value in context.str
-                    =    lexeme_d[ + word_p ]     [ assign_a(self.context.str) ]
+                word                    // Returns value in context.token
+                    =    lexeme_d
+                         [
+                             (+ word_p)           [ str_ = construct_<std::string>(arg1, arg2) ]
+                         ]
+                      >> eps_p                    [ token_ = construct_<Token>(
+                                                        Token::Word, 
+                                                        str_) ]
                     ;
 
                 hexbyte
                     =    uint_parser<char, 16, 2, 2>()
-                                                  [ append_a(self.context.str) ]
+                                                  [ push_back(str_, arg1) ]
+                    ;
+
+                statement_end
+                    =    if_p(var(self.incremental)) [
+                               ch_p(';')
+                         ]
+                         .else_p [
+                               ch_p(';') 
+                             | end_p
+                         ]
                     ;
 
                 skip
-                    =    space_p
-                    |    comment_p('#')
+                    =    space_p | comment_p('#')
                     ;
 
+                ///////////////////////////////////////////////////////////////////
+
+                start_parsers(
+                    command,            // CommandParser
+                    skip,               // SkipParser
+                    arguments           // ArgumentsParser
+                );
+
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
@@ -270,16 +343,17 @@ namespace detail {
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
                 BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
-
-                start_parsers(
-                    command,            // CommandParser
-                    skip                // SkipParser
-                );
-
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
             }
         };
     };
 
+#endif
+
 }}}
 
 ///////////////////////////////ih.e////////////////////////////////////////