Utils: Add some spirit/phoenix helpers
[senf.git] / Console / Parse.ih
index d5a9e2d..dbdf89e 100644 (file)
 #define IH_Parse_ 1
 
 // Custom includes
-#include <boost/regex.hpp>
+#include <vector>
 #include <boost/spirit.hpp>
-#include <boost/spirit/utility/regex.hpp>
-#include <boost/spirit/actor.hpp>
-#include <boost/bind.hpp>
-#include <boost/function.hpp>
-#include <boost/ref.hpp>
+#include <boost/spirit/utility/grammar_def.hpp>
+#include <boost/spirit/dynamic.hpp>
+#include <boost/spirit/phoenix.hpp>
+#include "../Utils/Phoenix.hh"
 
 ///////////////////////////////ih.p////////////////////////////////////////
 
@@ -41,206 +40,320 @@ namespace senf {
 namespace console {
 namespace detail {
 
-    struct append_action
-    {
-        template <class T, class Value>
-        void act(T & ref, Value const & value) const
-            { ref += T(1, value); }
-
-        template <class T, class Iterator>
-        void act(T & ref, Iterator const & f, Iterator const & l) const
-            { ref += T(f,l); }
-    };
+#ifndef DOXYGEN
 
-    template <class T>
-    inline boost::spirit::ref_value_actor<T, append_action> 
-    append_a(T & ref)
-    {
-        return boost::spirit::ref_value_actor<T, append_action>(ref);
-    }
-    
-    template <class T, class Value>
-    inline boost::spirit::ref_const_ref_actor<T, Value, append_action> 
-    append_a(T & ref, Value const & value)
-    {
-        return boost::spirit::ref_const_ref_actor<T, Value, append_action>(ref, value);
-    }
+    ///////////////////////////////////////////////////////////////////////////
+    // Grammar
 
     template <class ParseDispatcher>
     struct CommandGrammar : boost::spirit::grammar<CommandGrammar<ParseDispatcher> >
     {
         ///////////////////////////////////////////////////////////////////////////
+        // Start rules
+
+        enum { CommandParser, SkipParser, ArgumentsParser };
+
+        ///////////////////////////////////////////////////////////////////////////
         // The parse context (variables needed while parsing)
 
+        typedef Token::TokenType TokenType;
+
         struct Context {
             std::string str;
+            std::vector<Token> path;
             char ch;
+            Token token;
         };
 
         Context & context;
 
         ///////////////////////////////////////////////////////////////////////////
+        // Configuration
+
+        bool incremental;
+
+        ///////////////////////////////////////////////////////////////////////////
         // Dispatching semantic actions
 
         ParseDispatcher & dispatcher;
 
-        struct Dispatch_actor
+        ///////////////////////////////////////////////////////////////////////////
+
+        CommandGrammar(ParseDispatcher & d, Context & c) 
+            : context(c), incremental(false), dispatcher(d) {}
+
+        template <class Scanner>
+        struct definition 
+            : public boost::spirit::grammar_def< boost::spirit::rule<Scanner>, 
+                                                 boost::spirit::rule<Scanner>,
+                                                 boost::spirit::rule<Scanner> >
         {
-            Dispatch_actor(boost::function<void ()> fn_) : fn (fn_) {}
+            boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token,
+                punctuation, hexbyte, balanced_tokens, simple_argument, complex_argument, builtin, 
+                skip, statement, relpath, abspath, arguments, group_start, group_close, 
+                statement_end;
+            boost::spirit::chset<> special_p, punctuation_p, space_p, invalid_p, word_p;
+            boost::spirit::distinct_parser<> keyword_p;
 
-            template <class Value>
-            void operator()(Value const & value) const
-                { fn(); }
+            definition(CommandGrammar const & self) : 
 
-            template <class Iterator>
-            void operator()(Iterator const & f, Iterator const & l) const
-                { fn(); }
+                // Characters with a special meaning within the parser
+                special_p ("/(){};"),
 
-            boost::function<void ()> fn;
-        };
-        
-        template <class Callback>
-        Dispatch_actor dispatch(Callback cb) const
-            { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher))); }
+                // Additional characters which are returned as punctuation tokens
+                // (only allowed within '()').
+                punctuation_p (",="),
 
-        template <class Callback, class Arg>
-        Dispatch_actor dispatch(Callback cb, Arg const & arg) const
-            { return Dispatch_actor(boost::bind(cb, boost::ref(dispatcher), arg)); }
+                // Whitespace characters
+                space_p (" \t\n\r"),
 
-        ///////////////////////////////////////////////////////////////////////////
+                // Invalid characters: All chars below \x20 (space) which are not space_p
+                // (don't put a \0 in the chset<> argument *string* ...)
+                invalid_p (
+                    boost::spirit::chset<>('\0') | boost::spirit::chset<>("\x01-\x20") - space_p ),
 
-        CommandGrammar(ParseDispatcher & d, Context & c) 
-            : context(c), dispatcher(d) {}
+                // Valid word characters
+                word_p (
+                    boost::spirit::anychar_p - special_p - punctuation_p - space_p - invalid_p),
 
-        template <class Scanner>
-        struct definition
-        {
-            boost::spirit::rule<Scanner> command, path, argument, word, string, hexstring, token;
-            boost::spirit::rule<Scanner> punctuation, hexbyte, balanced_tokens, simple_argument;
-            boost::spirit::rule<Scanner> complex_argument;
+                // Keywords must not be followed by a word char or '/'
+                keyword_p ( word_p | boost::spirit::ch_p('/') )
 
-            definition(CommandGrammar const & self) {
+            {
                 using namespace boost::spirit;
+                using namespace ::phoenix;
+                using namespace senf::phoenix;
                 typedef ParseDispatcher PD;
-                
+
+                actor< variable< char > >               ch_    (self.context.ch);
+                actor< variable< std::string > >        str_   (self.context.str);
+                actor< variable< std::vector<Token> > > path_  (self.context.path);
+                actor< variable< Token > >              token_ (self.context.token);
+                actor< variable< ParseDispatcher > >    d_     (self.dispatcher);
+
+                ///////////////////////////////////////////////////////////////////
+                // Spirit grammar
+                //
+                // Syntax summary:
+                // This is EBNF with some minor tweaks to accommodate C++ syntax
+                //
+                //   * a        any number of a's
+                //   + a        at least one a
+                //   ! a        an optional a
+                //   a >> b     a followed by b
+                //   a | b      a or b
+                //   a % b      any number of a's separated by b's
+                //   a - b      a but not b
+                //
+                // Beside this, we use some special parsers (ch_p, eps_p, confix_p, lex_escape_ch_p,
+                // keyword_p, comment_p) and directives (lexeme_d), however, the parser should be
+                // quite readable.
+                //   
+                //   ch_p             match character
+                //   eps_p            always matches nothing (to attach unconditional actions)
+                //   confix_p(a,b,c)  match b, preceded by a and terminated by c. Used to parse
+                //                    string literals and comments
+                //   lex_escape_ch_p  match a lex style escape char. This is like a C++ style
+                //                    literal string escape char, however \x will be replaced by 'x'
+                //                    for any char 'x' if it has no special meaning.
+                //   keyword_p        match a delimited keyword
+                //   comment_p(a,b)   match comment starting with a and terminated with b. b
+                //                    defaults to end-of-line
+                //
+                //   lexeme_d         don't skip whitespace (as defined by the skip parser)
+                //
+                // Aligned to the right at column 50 are semantic actions.
+                //
+                // For clarity, I have used 'ch_p' explicitly throughout even though it is optional
+                // in most cases.
+                //
+                // More info is in the Boost.Spirit documentation
+
                 command 
-                    =    path                     [ self.dispatch(&PD::beginCommand, 
-                                                                  boost::cref(self.context.str)) ]
-                      >> * argument
-                      >> ! ch_p(';')
-                      >> eps_p                    [ self.dispatch(&PD::endCommand) ]
+                    =    builtin >> statement_end
+                    |    path >> ( group_start | statement )
+                    |    group_close
+                    |    ch_p(';') // Ignore empty commands
+                    ;
+
+                builtin
+                    =    keyword_p("cd") 
+                      >> path
+                      >> eps_p                    [ bind(&PD::builtin_cd)(d_, path_) ]
+                    |    keyword_p("ls")
+                      >> ! path
+                      >> eps_p                    [ bind(&PD::builtin_ls)(d_, path_) ]
+                    |    keyword_p("exit")        [ bind(&PD::builtin_exit)(d_) ]
+                    
+                    |    keyword_p("help")
+                      >> ! path
+                      >> eps_p                    [ bind(&PD::builtin_help)(d_, path_) ]
+                    ;
+
+                group_start
+                    =    ch_p('{')                [ bind(&PD::pushDirectory)(d_, path_) ]
+                    ;
+
+                group_close
+                    =    ch_p('}')                [ bind(&PD::popDirectory)(d_) ]
+                    ;
+
+                statement
+                    =    eps_p                    [ bind(&PD::beginCommand)(d_, path_) ]
+                      >> arguments
+                      >> statement_end            [ bind(&PD::endCommand)(d_) ]
+                    ;
+
+                arguments
+                    =    * argument
                     ;
 
                 argument
-                    =    simple_argument          [ self.dispatch(&PD::pushArgument, 
-                                                                  boost::cref(self.context.str)) ]
-                    |    complex_argument
+                    =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
+                    |    balanced_tokens
                     ;
                 
-                simple_argument         // All these return their value in context.str
+                simple_argument         // All these return their value in context.token
                     =    string
                     |    hexstring
-                    |    path
+                    |    word
                     ;
                 
-                complex_argument        // Argument consists of multiple tokens
-                    =    ch_p('(')                [ self.dispatch(&PD::openGroup) ]
-                       >> * token
-                       >> ch_p(')')               [ self.dispatch(&PD::closeGroup) ]
-                    ;
-
-                string                  // Returns value in context.str
-                    =    eps_p                    [ clear_a(self.context.str) ]
+                string                  // Returns value in context.token
+                    =    eps_p                    [ clear(str_) ]
                       >> lexeme_d
                          [
                              ch_p('"')
-                          >> * ( ( lex_escape_ch_p[ assign_a(self.context.ch) ] 
+                          >> * ( ( lex_escape_ch_p[ ch_ = arg1 ] 
                                    - '"' 
-                                 )                [ append_a(self.context.str,
-                                                             self.context.ch) ] 
+                                 )                [ str_ += ch_ ]
                                )
-                          >> ch_p('"')
+                          >> ch_p('"')            [ token_ = construct_<Token>(Token::BasicString, 
+                                                                               str_) ]
                          ]
                     ;
 
-                hexstring               // Returns value in context.str
-                    =    eps_p                    [ clear_a(self.context.str) ]
+                hexstring               // Returns value in context.token
+                    =    eps_p                    [ clear(str_) ]
                       >> confix_p( "x\"", * hexbyte, '"' )
+                                                  [ token_ = construct_<Token>(Token::HexString,
+                                                                               str_) ]
                     ;
 
-                path                    // Returns value in context.str
-                    =    eps_p                    [ clear_a(self.context.str) ]
-                      >> ( ! ch_p('/')            [ append_a(self.context.str) ] 
-                         ) 
-                      >> (   word                 [ append_a(self.context.str) ] 
-                           % ch_p('/')            [ append_a(self.context.str) ] 
-                         )
+                path                    // Returns value in context.path
+                    =    eps_p                    [ clear(path_) ]
+                      >> relpath | abspath
+                    ;
+
+                relpath
+                    =    (   word                 [ push_back(path_, token_) ]
+                           % ch_p('/') )
+                      >> ( ! ch_p('/')            [ push_back(path_, construct_<Token>()) ] )
+                    ;
+
+                abspath
+                    =    ch_p('/')                [ push_back(path_, construct_<Token>()) ]
+                      >> ( relpath
+                         | eps_p                  [ push_back(path_, construct_<Token>()) ] )
                     ;
 
                 balanced_tokens 
-                    =    ch_p('(')                [ self.dispatch(&PD::pushPunctuation, "(") ]
+                    =    ch_p('(')                [ token_ = construct_<Token>(
+                                                        Token::ArgumentGroupOpen,
+                                                        "(") ]
+                                                  [ bind(&PD::pushToken)(d_, token_) ]
                       >> * token
-                      >> ch_p(')')                [ self.dispatch(&PD::pushPunctuation, ")") ]
+                      >> ch_p(')')                [ token_ = construct_<Token>(
+                                                        Token::ArgumentGroupClose,
+                                                        ")") ]
+                                                  [ bind(&PD::pushToken)(d_, token_) ]
                     ;
 
                 token
-                    =    simple_argument          [ self.dispatch(&PD::pushWord, 
-                                                                  boost::cref(self.context.str)) ]
-                    |    punctuation              [ self.dispatch(&PD::pushPunctuation,
-                                                                  boost::cref(self.context.str)) ]
+                    =    simple_argument          [ bind(&PD::pushToken)(d_, token_) ]
+                    |    punctuation              [ bind(&PD::pushToken)(d_, token_) ]
                     |    balanced_tokens
                     ;
 
                 punctuation             // Returns value in context.str
-                    =    regex_p("[,=]")          [ assign_a(self.context.str) ]
+                    =    ch_p('/')                [ token_ = construct_<Token>(
+                                                        Token::PathSeparator,
+                                                        "/") ]
+                    |    ch_p('{')                [ token_ = construct_<Token>(
+                                                        Token::DirectoryGroupOpen,
+                                                        "{") ]
+                    |    ch_p('}')                [ token_ = construct_<Token>(
+                                                        Token::DirectoryGroupClose,
+                                                        "}") ]
+                    |    ch_p(';')                [ token_ = construct_<Token>(
+                                                        Token::CommandTerminator,
+                                                        ";") ]
+                    |    punctuation_p            [ token_ = construct_<Token>(
+                                                        Token::OtherPunctuation,
+                                                        construct_<std::string>(1u, arg1)) ]
                     ;
 
-                word
-                    =    regex_p("[^ \t\n\r;,=(){}/\"]+")
+                word                    // Returns value in context.token
+                    =    lexeme_d
+                         [
+                             (+ word_p)           [ str_ = construct_<std::string>(arg1, arg2) ]
+                         ]
+                      >> eps_p                    [ token_ = construct_<Token>(
+                                                        Token::Word, 
+                                                        str_) ]
                     ;
 
                 hexbyte
                     =    uint_parser<char, 16, 2, 2>()
-                                                  [ append_a(self.context.str) ]
+                                                  [ push_back(str_, arg1) ]
                     ;
 
-                BOOST_SPIRIT_DEBUG_RULE(command);
-                BOOST_SPIRIT_DEBUG_RULE(path);
-                BOOST_SPIRIT_DEBUG_RULE(argument);
-                BOOST_SPIRIT_DEBUG_RULE(word);
-                BOOST_SPIRIT_DEBUG_RULE(string);
-                BOOST_SPIRIT_DEBUG_RULE(hexstring);
-                BOOST_SPIRIT_DEBUG_RULE(token);
-                BOOST_SPIRIT_DEBUG_RULE(punctuation);
-                BOOST_SPIRIT_DEBUG_RULE(hexbyte);
-                BOOST_SPIRIT_DEBUG_RULE(balanced_tokens);
-                BOOST_SPIRIT_DEBUG_RULE(simple_argument);
-                BOOST_SPIRIT_DEBUG_RULE(complex_argument);
-            }
-
-            boost::spirit::rule<Scanner> const & start() const { return command; }
-        };
-    };
-
-    struct SkipGrammar
-        : public boost::spirit::grammar<SkipGrammar>
-    {
-        template <class Scanner>
-        struct definition
-        {
-            boost::spirit::rule<Scanner> rule;
+                statement_end
+                    =    if_p(var(self.incremental)) [
+                               ch_p(';')
+                         ]
+                         .else_p [
+                               ch_p(';') 
+                             | end_p
+                         ]
+                    ;
 
-            definition(SkipGrammar const & self) {
-                rule 
-                    =    boost::spirit::regex_p("[ \t]+") 
-                    |    boost::spirit::comment_p('#')
+                skip
+                    =    space_p | comment_p('#')
                     ;
-            }
 
-            boost::spirit::rule<Scanner> const & start() const { return rule; }
+                ///////////////////////////////////////////////////////////////////
+
+                start_parsers(
+                    command,            // CommandParser
+                    skip,               // SkipParser
+                    arguments           // ArgumentsParser
+                );
+
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(command,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(path,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(argument,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(word,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(string,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(hexstring,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(token,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(punctuation,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(hexbyte,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(balanced_tokens,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(simple_argument,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(complex_argument,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(builtin,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(commands,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(block,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(statement,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(relpath,1);
+                BOOST_SPIRIT_DEBUG_TRACE_RULE(abspath,1);
+            }
         };
     };
 
+#endif
+
 }}}
 
 ///////////////////////////////ih.e////////////////////////////////////////