X-Git-Url: http://g0dil.de/git?a=blobdiff_plain;f=Console%2FParse.hh;h=892c5222e9bf002a23345a5fff23939929563d34;hb=456ee576285b76aa46240f8001f426757810dcc1;hp=a4e452c57d02a9bda01a7188d7a0a276be0fa49f;hpb=31fb20bd535cd2d65595a7d37bd100ff16f13df8;p=senf.git
diff --git a/Console/Parse.hh b/Console/Parse.hh
index a4e452c..892c522 100644
--- a/Console/Parse.hh
+++ b/Console/Parse.hh
@@ -26,13 +26,177 @@
#ifndef HH_Parse_
#define HH_Parse_ 1
+/** \defgroup console_parser The parser
+
+ The console/config library defines a simple language used to interact with the console or to
+ configure the application. The parser is not concerned about interpreting commands or
+ arguments, checking that a command exists or managing directories. The parser just takes the
+ input and parses it.
+
+ \autotoc
+
+ \section console_language The Language
+
+ The config/console language is used in configuration files and interactively at the
+ console. Some features of the language are more useful in config files, others at the
+ interactive console but the language is the same in both cases.
+
+ Let's start with a sample of the config/console language. The following is written as a
+ configuration file
+ \code
+ # My someserver configuration file
+
+ /server/port 1234;
+
+ /logger/targets {
+ console {
+ accept senf::log::Debug IMPORTANT;
+ accept server::ServerLog CRITICAL;
+ }
+
+ provide serverlog senf::log::FileTarget "/var/log/server.log";
+ serverlog {
+ reject senf::log::Debug senf::Console::Server NOTICE;
+ accept senf::log::Debug NOTICE;
+ accept server::ServerLog;
+ }
+ }
+
+ /server/stuffing (UDPPacket x"01 02 03 04");
+ /server/allow_hosts 10.1.2.3 # our internal server
+ 10.2.3.4 10.4.3.5 # client workstations
+ ;
+
+ /help/infoUrl "http://senf.j32.de/src/doc";
+ \endcode
+
+ The interactive syntax is the same with some notes:
+ \li All commands must be complete on a single line. This includes grouping constructs which must
+ be closed on the same line they are opened.
+ \li The last ';' is optional. However, multiple commands may be entered on a single line when
+ they are separated by ';'.
+ \li An empty line on the interactive console will repeat the last command.
+
+ The language consists of a small number of syntactic entities:
+
+ \subsection console_special_chars Special characters
+
+ These are characters, which have a special meaning. Some are used internally, others are just
+ returned as punctuation tokens
+
+
+ / | path component separator |
+ ( ) | argument grouping |
+ { } | directory grouping |
+ ; | command terminator |
+ , = | punctuation tokens |
+
+
+ \subsection console_basic Basic elements
+
+ A word is \e any sequence of consecutive characters which does not include any special
+ character. Examples for words are thus
+
+ 12.34
+ jens@fokus.fraunhofer.de
+ eth0
+ 1>2
+
+
+ The following are \e not valid words:
+
+ a/b/c
+ a,b
+
+
+ A string literal is just that: A double-quoted string (C/C++ style) possibly with
+ embedded escape chars:
+
+ "\"foo\nbar\""
+ "\x04test"
+
+
+ A hex-string literal is used to represent binary data. It looks like a string which has
+ only hexadecimal bytes or whitespace as contents (comments and newlines are Ok when not read
+ from the interactive console)
+
+ x"01 02 03 0405"
+ x"01 02 # ID header
+ 0405 # payload
+ "
+
+
+ A token is a \e word, \e string or \e hex-string, or a single special character (that's
+ true, any special character is allowed as a token). '(' and ')' must be properly nested.
+
+ A path is a sequence of \e words separated by '/' (and optional whitespace). A path may
+ have an optional initial and/or a terminating '/'.
+
+ a/b/c
+ foo / bar /
+ /server
+
+
+ \subsection console_statements Statements
+
+ There are several types of statements:
+ \li The bulk of all statements are \e path statements
+ \li There are some \e built-in statements which are mostly useful at the interactive console
+ \li A special form of statement is the directory group
+
+ A path statement consists of a (possibly relative) path followed by any number of
+ arguments and terminated with a ';' (or end-of-input)
+
+ /path/to/command arg1 "arg2" (complex=(1 2) another) ;
+
+ Every argument is either
+ \li A single word, string or hex-string
+ \li or a parenthesized list of tokens.
+
+ So above command has three arguments: 'arg1', 'arg2' (a single token each) and one argument with
+ the 7 tokens 'complex', '=', '(', '1', '2', ')', 'another'. The interpretation of the arguments
+ is completely up to the command.
+
+ A built-in statement is one of
+
+
+ \c cd \e path | Change current directory |
+ \c ls [ \e path ] | List contents of \e path or current directory |
+ \c exit | Exit interactive console |
+ \c help [ \e path ] | Show help for \e path or current directory |
+
+
+ A directory group statement is a block of statements all executed relatively to a fixed
+ directory.
+
+ /some/path {
+ statement ;
+ . . .
+ }
+
+ At the beginning of the block, the current directory is saved and the directory is changed to
+ the given directory. All commands are executed and at the end of the block, the saved directory
+ is restored.
+
+ \section console_parse_api The parser API
+
+ The senf::console::CommandParser is responsible for taking text input and turning it into a
+ sequence of senf::console::ParseCommandInfo structures. The structures are returned by passing
+ them successively to a callback function.
+
+ Every statement is returned as a senf::console::ParseCommandInfo instance. Directory groups are
+ handled specially: They are divided into two special built-in commands called PUSHD and POPD.
+ */
+
// Custom includes
#include
#include
#include
#include
#include
+#include
#include
+#include "../Utils/safe_bool.hh"
//#include "Parse.mpp"
///////////////////////////////hh.p////////////////////////////////////////
@@ -42,43 +206,94 @@ namespace console {
namespace detail { struct ParserAccess; }
- /** \brief
+ /** \brief Single argument token
+
+ All command arguments are split into tokens by the parser. Each token is returned as an
+ ArgumentToken instance.
+
+ \ingroup console_parser
*/
class ArgumentToken
{
public:
- std::string const & value() const;
+ enum TokenType {
+ PathSeparator = 0x0001,
+ ArgumentGroupOpen = 0x0002,
+ ArgumentGroupClose = 0x0004,
+ DirectoryGroupOpen = 0x0008,
+ DirectoryGroupClose = 0x0010,
+ CommandTerminator = 0x0020,
+ OtherPunctuation = 0x0040,
+ BasicString = 0x0080,
+ HexString = 0x0100,
+ Word = 0x0200
+ };
+
+ enum TokenGroup {
+ ArgumentGrouper = ArgumentGroupOpen
+ | ArgumentGroupClose,
+
+ DirectoryGrouper = DirectoryGroupOpen
+ | DirectoryGroupClose,
+
+ Punctuation = DirectoryGroupOpen
+ | DirectoryGroupClose
+ | PathSeparator
+ | CommandTerminator
+ | OtherPunctuation,
+
+ String = BasicString
+ | HexString,
+
+ SimpleArgument = Word
+ | BasicString
+ | HexString
+ };
+
+ std::string const & value() const; ///< String value of token
+ /**< This value is properly unquoted */
+
+ TokenType type() const; ///< Token type
+
+ bool is(unsigned tokens) const; ///< Check, whether tokens type matches \a tokens
+ /**< \a tokens is a bit-mask of token types to check. */
protected:
private:
- explicit ArgumentToken(std::string token);
+ ArgumentToken(TokenType type, std::string token);
+ TokenType type_;
std::string token_;
friend class detail::ParserAccess;
};
+ /** \brief Single parsed console command
+
+ Every command parsed is returned in a ParseCommandInfo instance. This information is purely
+ taken from the parser, no semantic information is attached at this point, the config/console
+ node tree is not involved in any why. ParseCommandInfo consist of
+
+ \li the type of command: built-in or normal command represented by a possibly relative path
+ into the command tree.
+ \li the command
+ \li the arguments. Every argument consists of a range of ArgumentToken instances.
- /** \brief
+ \ingroup console_parser
*/
class ParseCommandInfo
{
typedef std::vector Tokens;
typedef std::vector CommandPath;
-
+
public:
+ class ArgumentIterator;
+
typedef CommandPath::const_iterator path_iterator;
typedef Tokens::const_iterator token_iterator;
- typedef boost::iterator_range argument_value_type;
-
-
- private:
- typedef std::vector Arguments;
-
- public:
- typedef Arguments::const_iterator argument_iterator;
- typedef Arguments::size_type size_type;
+ typedef ArgumentIterator argument_iterator;
+ typedef Tokens::size_type size_type;
typedef boost::iterator_range CommandPathRange;
typedef boost::iterator_range ArgumentsRange;
@@ -89,42 +304,220 @@ namespace console {
BuiltinLS,
BuiltinPUSHD,
BuiltinPOPD,
- BuiltinEXIT };
-
- BuiltinCommand builtin() const;
- CommandPathRange commandPath() const;
- ArgumentsRange arguments() const;
- TokensRange tokens() const;
-
+ BuiltinEXIT,
+ BuiltinHELP };
+
+ BuiltinCommand builtin() const; ///< Command type
+ /**< \returns \c NoBuiltin, if the command is an ordinary
+ command, otherwise the id of the built-in command */
+ CommandPathRange commandPath() const; ///< Command path
+ /**< This is the path to the command if it is not a built-in
+ command. Every element of the returned range
+ constitutes one path element. If the first element is
+ empty, the path is an absolute path, otherwise it is
+ relative. If the last element is an empty string, the
+ path ends with a '/' char. */
+ ArgumentsRange arguments() const; ///< Command arguments
+ /**< The returned range contains one TokensRange for each
+ argument. */
+ TokensRange tokens() const; ///< All argument tokens
+ /**< The returned range contains \e all argument tokens in a
+ single range not divided into separate arguments. */
protected:
private:
void init();
void setBuiltin(BuiltinCommand builtin);
void setCommand(std::vector & commandPath);
- void startArgument();
- void endArgument();
void addToken(ArgumentToken const & token);
- void finalize();
struct MakeRange;
std::vector commandPath_;
-
- typedef std::pair TempArgumentRange;
- typedef std::vector TempArguments;
-
BuiltinCommand builtin_;
Tokens tokens_;
- Arguments arguments_;
- TempArguments tempArguments_;
friend class detail::ParserAccess;
};
+ /** \brief Iterator parsing argument groups
+
+ This special iterator parses a token range returned by the parser into argument ranges. An
+ argument range is either a single token or it is a range of tokens enclosed in matching
+ parenthesis. The ParseCommandInfo::arguments() uses this iterator type. To recursively parse
+ complex arguments, you can however use this iterator to divide a multi-token argument into
+ further argument groups (e.g. to parse a list or vector of items).
+
+ This iterator is a bidirectional iterator \e not a random access iterator.
+ */
+ class ParseCommandInfo::ArgumentIterator
+ : public boost::iterator_facade< ParseCommandInfo::ArgumentIterator,
+ ParseCommandInfo::TokensRange,
+ boost::bidirectional_traversal_tag,
+ ParseCommandInfo::TokensRange >
+ {
+ public:
+ ArgumentIterator();
+ explicit ArgumentIterator(ParseCommandInfo::TokensRange::iterator i);
+
+ private:
+ reference dereference() const;
+ bool equal(ArgumentIterator const & other) const;
+ void increment();
+ void decrement();
+
+ mutable ParseCommandInfo::TokensRange::iterator b_;
+ mutable ParseCommandInfo::TokensRange::iterator e_;
+
+ void setRange() const;
+
+ friend class boost::iterator_core_access;
+ friend class ParseCommandInfo;
+ };
+
+ /** \brief Syntax error parsing command arguments exception
+
+ All errors while parsing the arguments of a command must be signaled by throwing an instance
+ of SyntaxErrorException. This is important, so command overloading works.
+ */
+ struct SyntaxErrorException : public std::exception
+ {
+ explicit SyntaxErrorException(std::string const & msg = "");
+ virtual ~SyntaxErrorException() throw();
+
+ virtual char const * what() const throw();
+ std::string const & message() const;
+
+ private:
+ std::string message_;
+ };
+
+ /** \brief Wrapper checking argument iterator access for validity
+
+ CheckedArgumentIteratorWrapper is a wrapper around a range of arguments parsed using the
+ ParseCommandInfo::ArgumentIterator. It is used to parse arguments either in a command
+ (registered with manual argument parsing) or when defining a custom parser.
+ \code
+ void fn(std::ostream & out, senf::console::ParseCommandInfo command)
+ {
+ std:;string arg1;
+ unsigned arg2 (0);
+
+ {
+ senf::console::CheckedArgumentIteratorWrapper arg (command.arguments());
+ senf::console::parse( *(arg++), arg1 );
+ senf::console::parse( *(arg++), arg2 );
+ }
+
+ // ...
+ }
+ \endcode
+
+ To use the wrapper, you must ensure that:
+ \li You increment the iterator \e past all arguments you parse. The iterator must point to
+ the end of the range when parsing is complete.
+ \li The iterator wrapper is destroyed after parsing but before executing the command itself
+ begins.
+
+ Accessing a non-existent argument or failing to parse all arguments will raise a
+ senf::console::SyntaxErrorException.
+
+ \see \link console_arg_custom Example customer parser \endlink
+ */
+ class CheckedArgumentIteratorWrapper
+ : boost::noncopyable,
+ public boost::iterator_facade< CheckedArgumentIteratorWrapper,
+ ParseCommandInfo::TokensRange,
+ boost::forward_traversal_tag,
+ ParseCommandInfo::TokensRange >,
+ public senf::safe_bool
+
+ {
+ typedef boost::iterator_facade< CheckedArgumentIteratorWrapper,
+ ParseCommandInfo::TokensRange,
+ boost::forward_traversal_tag,
+ ParseCommandInfo::TokensRange > IteratorFacade;
+
+ public:
+ explicit CheckedArgumentIteratorWrapper(
+ ParseCommandInfo::ArgumentsRange const & range,
+ std::string const & msg = "invalid number of arguments");
+ ///< Make wrapper from ArgumentsRange
+ /**< This constructs a wrapper from a
+ ParseCommandInfo::ArgumentsRange.
+ \param[in] range Range of arguments to parse
+ \param[in] msg Error message */
+ explicit CheckedArgumentIteratorWrapper(
+ ParseCommandInfo::TokensRange const & range,
+ std::string const & msg = "invalid number of arguments");
+ ///< Make wrapper from TokensRange
+ /**< This constructs a wrapper from a
+ ParseCommandInfo::TokensRange. The TokensRange is first
+ converted into an ParseCommandInfo::ArgumentsRange
+ which is then wrapped.
+ \param[in] range Range of tokens to parse
+ \param[in] msg Error message */
+
+ ~CheckedArgumentIteratorWrapper(); ///< Check, if all arguments are parsed
+ /**< The destructor validates, that all arguments are parsed
+ correctly when leaving the scope, in which the wrapper
+ is instantiated normally (not by an exception).
+
+ \warning This destructor will throw a
+ SyntaxErrorException, if not all arguments are parsed
+ and when no other exception is in progress. */
+
+ operator ParseCommandInfo::ArgumentIterator();
+ ///< Use wrapper as ParseCommandInfo::ArgumentIterator
+
+ bool boolean_test() const; ///< \c true, if more arguments are available
+ bool done() const; ///< \c true, if all arguments are parsed
+
+ void clear(); ///< Set range empty
+ /**< This call will point the current iterator to the end of
+ the tokens range.
+ \post done() == \c true; */
+
+ bool operator==(ParseCommandInfo::ArgumentIterator const & other) const;
+ ///< Compare wrapper against ArgumentIterator
+ bool operator!=(ParseCommandInfo::ArgumentIterator const & other) const;
+ ///< Compare wrapper against ArgumentIterator
+
+ using IteratorFacade::operator++;
+ ParseCommandInfo::ArgumentIterator operator++(int);
+
+ private:
+ reference dereference() const;
+ void increment();
+
+ ParseCommandInfo::ArgumentIterator i_;
+ ParseCommandInfo::ArgumentIterator e_;
+ std::string msg_;
+
+ friend class boost::iterator_core_access;
+ };
+
+ /**< \brief Output ParseCommandInfo instance
+ \related ParseCommandInfo
+ */
std::ostream & operator<<(std::ostream & stream, ParseCommandInfo const & info);
- /** \brief
+ /** \brief Parse commands
+
+ This class implements a parser for the console/config language. It supports parsing strings
+ as well as files. For every parsed command, a callback function is called.
+
+ \implementation The implementation is based on Boost.Spirit. See the file \ref Parse.ih for
+ the formal language grammar.
+
+ \implementation Parsing an arbitrary iostream is not supported since arbitrary streams are
+ not seekable. If this is needed, it can however be provided using stream iterators and
+ some special iterator adaptors from Boost.Spirit. However, the amount of backtracking
+ needs to be analyzed before this is viable.
+
+ \todo Implement more detailed error reporting and error recovery.
+
+ \ingroup console_parser
*/
class CommandParser
: boost::noncopyable
@@ -145,8 +538,10 @@ namespace console {
///@}
///////////////////////////////////////////////////////////////////////////
- bool parse(std::string command, Callback cb);
- bool parseFile(std::string filename, Callback cb);
+ bool parse(std::string command, Callback cb); ///< Parse string
+ bool parseFile(std::string filename, Callback cb); ///< Parse file
+ /**< \throws SystemException if the file cannot be
+ read. */
private:
struct Impl;