X-Git-Url: http://g0dil.de/git?a=blobdiff_plain;f=Console%2FParse.hh;h=a37ed014b6f231f579e97e2e6a171c30cb25de3e;hb=9cda1b12a3e68538ea8157ca96810f0423123a70;hp=a4e452c57d02a9bda01a7188d7a0a276be0fa49f;hpb=31fb20bd535cd2d65595a7d37bd100ff16f13df8;p=senf.git diff --git a/Console/Parse.hh b/Console/Parse.hh index a4e452c..a37ed01 100644 --- a/Console/Parse.hh +++ b/Console/Parse.hh @@ -26,6 +26,168 @@ #ifndef HH_Parse_ #define HH_Parse_ 1 +/** \defgroup console_parser The parser + + The console/config library defines a simple language used to interact with the console or to + configure the application. The parser is not concerned about interpreting commands or + arguments, checking that a command exists or managing directories. The parser just takes the + input and parses it. + + \autotoc + + \section console_language The Language + + The config/console language is used in configuration files and interactively at the + console. Some features of the language are more useful in config files, others at the + interactive console but the language is the same in both cases. + + Let's start with a sample of the config/console language. The following is written as a + configuration file + \code + # My someserver configuration file + + /server/port 1234; + + /logger/targets { + console { + accept senf::log::Debug IMPORTANT; + accept server::ServerLog CRITICAL; + } + + provide serverlog senf::log::FileTarget "/var/log/server.log"; + serverlog { + reject senf::log::Debug senf::Console::Server NOTICE; + accept senf::log::Debug NOTICE; + accept server::ServerLog; + } + } + + /server/stuffing (UDPPacket x"01 02 03 04"); + /server/allow_hosts 10.1.2.3 # our internal server + 10.2.3.4 10.4.3.5 # client workstations + ; + + /help/infoUrl "http://senf.j32.de/src/doc"; + \endcode + + The interactive syntax is the same with some notes: + \li All commands must be complete on a single line. This includes grouping constructs which must + be closed on the same line they are opened. + \li The last ';' is optional. However, multiple commands may be entered on a single line when + they are separated by ';'. + \li An empty line on the interactive console will repeat the last command. + + The language consists of a small number of syntactic entities: + + \subsection console_special_chars Special characters + + These are characters, which have a special meaning. Some are used internally, others are just + returned as punctuation tokens + + + + + + + +
/path component separator
( )argument grouping
{ }directory grouping
;command terminator
, =punctuation tokens
+ + \subsection console_basic Basic elements + + A word is \e any sequence of consecutive characters which does not include any special + character. Examples for words are thus +
+    12.34
+    jens@fokus.fraunhofer.de
+    eth0
+    1>2
+    
+ + The following are \e not valid words: +
+    a/b/c
+    a,b
+    
+ + A string literal is just that: A double-quoted string (C/C++ style) possibly with + embedded escape chars: +
+    "\"foo\nbar\""
+    "\x04test"
+    
+ + A hex-string literal is used to represent binary data. It looks like a string which has + only hexadecimal bytes or whitespace as contents (comments and newlines are Ok when not read + from the interactive console) +
+    x"01 02 03 0405"
+    x"01 02   # ID header
+      0405    # payload
+      "
+    
+ + A token is a \e word, \e string or \e hex-string, or a single special character (that's + true, any special character is allowed as a token). '(' and ')' must be properly nested. + + A path is a sequence of \e words separated by '/' (and optional whitespace). A path may + have an optional initial and/or a terminating '/'. +
+    a/b/c
+    foo / bar /
+    /server
+    
+ + \subsection console_statements Statements + + There are several types of statements: + \li The bulk of all statements are \e path statements + \li There are some \e built-in statements which are mostly useful at the interactive console + \li A special form of statement is the directory group + + A path statement consists of a (possibly relative) path followed by any number of + arguments and terminated with a ';' (or end-of-input) +
+    /path/to/command arg1 "arg2" (complex=(1 2) another) ;
+    
+ Every argument is either + \li A single word, string or hex-string + \li or a parenthesized list of tokens. + + So above command has three arguments: 'arg1', 'arg2' (a single token each) and one argument with + the 7 tokens 'complex', '=', '(', '1', '2', ')', 'another'. The interpretation of the arguments + is completely up to the command. + + A built-in statement is one of + + + + + + +
\c cd \e pathChange current directory
\c ls [ \e path ]List contents of \e path or current directory
\c exitExit interactive console
\c help [ \e path ]Show help for \e path or current directory
+ + A directory group statement is a block of statements all executed relatively to a fixed + directory. +
+    /some/path {
+        statement ;
+        . . .
+    }
+    
+ At the beginning of the block, the current directory is saved and the directory is changed to + the given directory. All commands are executed and at the end of the block, the saved directory + is restored. + + \section console_parse_api The parser API + + The senf::console::CommandParser is responsible for taking text input and turning it into a + sequence of senf::console::ParseCommandInfo structures. The structures are returned by passing + them successively to a callback function. + + Every statement is returned as a senf::console::ParseCommandInfo instance. Directory groups are + handled specially: They are divided into two special built-in commands called PUSHD and POPD. + */ + // Custom includes #include #include @@ -42,12 +204,18 @@ namespace console { namespace detail { struct ParserAccess; } - /** \brief + /** \brief Single argument token + + All command arguments are split into tokens by the parser. Each token is returned as an + ArgumentToken instance. + + \ingroup console_parser */ class ArgumentToken { public: - std::string const & value() const; + std::string const & value() const; ///< String value of token + /**< This value is properly unquoted */ protected: @@ -59,8 +227,26 @@ namespace console { friend class detail::ParserAccess; }; + /** \brief Single parsed console command - /** \brief + Every command parsed is returned in a ParseCommandInfo instance. This information is purely + taken from the parser, no semantic information is attached at this point, the config/console + node tree is not involved in any why. ParseCommandInfo consist of + + \li the type of command: built-in or normal command represented by a possibly relative path + into the command tree. + \li the command + \li the arguments. Every argument consists of a range of ArgumentToken instances. + + \ingroup console_parser + + \todo Completely change the 'arguments()' member implementation: let the parser just + generate a flat list of tokens and implement an 'argument iterator' with the following + features: 1. return argument ranges, automatically detecting paranthesis 2. trying to + increment the iterator beyond it's range just throws an argument syntax error. For this + to work, the parser needs to not drop the outermost '()' pair 3. detect bad paranthesis + (should not be necessary since the parser already does this). This allows to use this + same iterator to parse nested complex arguments. */ class ParseCommandInfo { @@ -89,13 +275,25 @@ namespace console { BuiltinLS, BuiltinPUSHD, BuiltinPOPD, - BuiltinEXIT }; - - BuiltinCommand builtin() const; - CommandPathRange commandPath() const; - ArgumentsRange arguments() const; - TokensRange tokens() const; - + BuiltinEXIT, + BuiltinHELP }; + + BuiltinCommand builtin() const; ///< Command type + /**< \returns \c NoBuiltin, if the command is an ordinary + command, otherwise the id of the built-in command */ + CommandPathRange commandPath() const; ///< Command path + /**< This is the path to the command if it is not a built-in + command. Every element of the returned range + constitutes one path element. If the first element is + empty, the path is an absolute path, otherwise it is + relative. If the last element is an empty string, the + path ends with a '/' char. */ + ArgumentsRange arguments() const; ///< Command arguments + /**< The returned range contains one TokensRange for each + argument. */ + TokensRange tokens() const; ///< All argument tokens + /**< The returned range contains \e all argument tokens in a + single range not divided into separate arguments. */ protected: private: @@ -122,9 +320,27 @@ namespace console { friend class detail::ParserAccess; }; + /**< \brief Output ParseCommandInfo instance + \related ParseCommandInfo + */ std::ostream & operator<<(std::ostream & stream, ParseCommandInfo const & info); - /** \brief + /** \brief Parse commands + + This class implements a parser for the console/config language. It supports parsing strings + as well as files. For every parsed command, a callback function is called. + + \implementation The implementation is based on Boost.Spirit. See the file \ref Parse.ih for + the formal language grammar. + + \implementation Parsing an arbitrary iostream is not supported since arbitrary streams are + not seekable. If this is needed, it can however be provided using stream iterators and + some special iterator adaptors from Boost.Spirit. However, the amount of backtracking + needs to be analyzed before this is viable. + + \todo Implement more detailed error reporting and error recovery. + + \ingroup console_parser */ class CommandParser : boost::noncopyable @@ -145,8 +361,10 @@ namespace console { ///@} /////////////////////////////////////////////////////////////////////////// - bool parse(std::string command, Callback cb); - bool parseFile(std::string filename, Callback cb); + bool parse(std::string command, Callback cb); ///< Parse string + bool parseFile(std::string filename, Callback cb); ///< Parse file + /**< \throws SystemException if the file cannot be + read. */ private: struct Impl;