From: g0dil Date: Tue, 3 Mar 2009 22:10:24 +0000 (+0000) Subject: Scheduler: Add watchdog/signal documentation X-Git-Url: http://g0dil.de/git?a=commitdiff_plain;h=ddfdc16c9d4500968376c6ae6c75f9c0ba49c977;p=senf.git Scheduler: Add watchdog/signal documentation Scheduler: Rename watchdog scheduler functions git-svn-id: https://svn.berlios.de/svnroot/repos/senf/trunk@1142 270642c3-0616-0410-b53a-bc976706d245 --- diff --git a/Scheduler/FIFORunner.cc b/Scheduler/FIFORunner.cc index b39b5c2..a6fb824 100644 --- a/Scheduler/FIFORunner.cc +++ b/Scheduler/FIFORunner.cc @@ -40,7 +40,7 @@ prefix_ senf::scheduler::detail::FIFORunner::FIFORunner() : tasks_ (), next_ (tasks_.end()), watchdogRunning_ (false), watchdogMs_ (1000), - watchdogCount_(0), hangCount_ (0) + watchdogAbort_ (false), watchdogCount_(0), hangCount_ (0) { struct sigevent ev; ::memset(&ev, 0, sizeof(ev)); @@ -75,18 +75,20 @@ prefix_ senf::scheduler::detail::FIFORunner::~FIFORunner() prefix_ void senf::scheduler::detail::FIFORunner::startWatchdog() { - struct itimerspec timer; - ::memset(&timer, 0, sizeof(timer)); - - timer.it_interval.tv_sec = watchdogMs_ / 1000; - timer.it_interval.tv_nsec = (watchdogMs_ % 1000) * 1000000ul; - timer.it_value.tv_sec = timer.it_interval.tv_sec; - timer.it_value.tv_nsec = timer.it_interval.tv_nsec; - - if (timer_settime(watchdogId_, 0, &timer, 0) < 0) - SENF_THROW_SYSTEM_EXCEPTION("timer_settime()"); + if (watchdogMs_ > 0) { + struct itimerspec timer; + ::memset(&timer, 0, sizeof(timer)); + + timer.it_interval.tv_sec = watchdogMs_ / 1000; + timer.it_interval.tv_nsec = (watchdogMs_ % 1000) * 1000000ul; + timer.it_value.tv_sec = timer.it_interval.tv_sec; + timer.it_value.tv_nsec = timer.it_interval.tv_nsec; + + if (timer_settime(watchdogId_, 0, &timer, 0) < 0) + SENF_THROW_SYSTEM_EXCEPTION("timer_settime()"); - watchdogRunning_ = true; + watchdogRunning_ = true; + } } prefix_ void senf::scheduler::detail::FIFORunner::stopWatchdog() @@ -220,6 +222,8 @@ prefix_ void senf::scheduler::detail::FIFORunner::watchdog(int, siginfo_t * si, write(1, runner.runningBacktrace_.c_str(), runner.runningBacktrace_.size()); #endif write(1, "\n", 1); + if (runner.watchdogAbort_) + assert(false); } } } diff --git a/Scheduler/FIFORunner.cci b/Scheduler/FIFORunner.cci index 04afdcd..5eea900 100644 --- a/Scheduler/FIFORunner.cci +++ b/Scheduler/FIFORunner.cci @@ -123,6 +123,17 @@ prefix_ unsigned senf::scheduler::detail::FIFORunner::taskTimeout() return watchdogMs_; } +prefix_ void senf::scheduler::detail::FIFORunner::abortOnTimeout(bool flag) +{ + watchdogAbort_ = flag; +} + +prefix_ bool senf::scheduler::detail::FIFORunner::abortOnTimeout() + const +{ + return watchdogAbort_; +} + prefix_ unsigned senf::scheduler::detail::FIFORunner::hangCount() { unsigned hc (hangCount_); diff --git a/Scheduler/FIFORunner.hh b/Scheduler/FIFORunner.hh index 0573d9a..39e2e3a 100644 --- a/Scheduler/FIFORunner.hh +++ b/Scheduler/FIFORunner.hh @@ -99,6 +99,8 @@ namespace detail { void taskTimeout(unsigned ms); unsigned taskTimeout() const; + void abortOnTimeout(bool flag); + bool abortOnTimeout() const; void startWatchdog(); void stopWatchdog(); @@ -137,6 +139,7 @@ namespace detail { timer_t watchdogId_; bool watchdogRunning_; unsigned watchdogMs_; + bool watchdogAbort_; std::string runningName_; # ifdef SENF_DEBUG std::string runningBacktrace_; diff --git a/Scheduler/Scheduler.cci b/Scheduler/Scheduler.cci index 75dcb06..873b64f 100644 --- a/Scheduler/Scheduler.cci +++ b/Scheduler/Scheduler.cci @@ -39,21 +39,31 @@ prefix_ senf::ClockService::clock_type senf::scheduler::eventTime() return scheduler::detail::FdManager::instance().eventTime(); } -prefix_ void senf::scheduler::taskTimeout(unsigned ms) +prefix_ void senf::scheduler::watchdogTimeout(unsigned ms) { scheduler::detail::FIFORunner::instance().taskTimeout(ms); } -prefix_ unsigned senf::scheduler::taskTimeout() +prefix_ unsigned senf::scheduler::watchdogTimeout() { return scheduler::detail::FIFORunner::instance().taskTimeout(); } -prefix_ unsigned senf::scheduler::hangCount() +prefix_ unsigned senf::scheduler::watchdogEvents() { return scheduler::detail::FIFORunner::instance().hangCount(); } +prefix_ void senf::scheduler::watchdogAbort(bool flag) +{ + scheduler::detail::FIFORunner::instance().abortOnTimeout(flag); +} + +prefix_ bool senf::scheduler::watchdogAbort() +{ + return scheduler::detail::FIFORunner::instance().abortOnTimeout(); +} + prefix_ void senf::scheduler::loresTimers() { detail::TimerDispatcher::instance().timerSource( diff --git a/Scheduler/Scheduler.hh b/Scheduler/Scheduler.hh index 033c016..29ba555 100644 --- a/Scheduler/Scheduler.hh +++ b/Scheduler/Scheduler.hh @@ -196,6 +196,30 @@ namespace senf { href="http://www.boost.org/doc/libs/1_36_0/libs/ptr_container/doc/ptr_container.html">Boost.PointerContainer for the pointer container library reference. + + \section sched_signals Signals and the Watchdog + + To secure against blocking callbacks, the %scheduler implementation includes a watchdog + timer. This timer will produce a warning message on the standard error stream when a single + callback is executing for more than the watchdog timeout value. Since the scheduler + implementation is completely single threaded, we cannot terminate the callback but at least we + can produce an informative message and optionally the program can be aborted. + + The watchdog is controlled using the watchdogTimeout(), watchdogEvents() and watchdogAbort(). + functions. + + The watchdog is implemented using a free running interval timer. The watchdog signal must \e not + be blocked. If signals need to be blocked for some reason, those regions will not be checked by + the watchdog. If a callback blocks, the watchdog has no chance to interrupt the process. + + \warning Since the watchdog is free running for performance reasons, every callback must expect + signals to happen. Signals \e will certainly happen since the watchdog signal is generated + periodically (which does not necessarily generate a watchdog event ...) + + Additional signals may occur when using using hires timers on kernel/glibc combinations which do + not support timerfd(). On such systems, hires timers are implemented using POSIX timers which + generate a considerable number of additional signals. + \todo Fix the file support to use threads (?) fork (?) and a pipe so it works reliably even over e.g. NFS. */ @@ -224,17 +248,30 @@ namespace scheduler { */ ClockService::clock_type eventTime(); - /** \brief Set task watchdog timeout */ - void taskTimeout(unsigned ms); + /** \brief Set watchdog timeout to \a ms milliseconds. + + Setting the watchdog timeout to 0 will disable the watchdog. + */ + void watchdogTimeout(unsigned ms); - /** \brief Current task watchdog timeout */ - unsigned taskTimeout(); + /** \brief Current watchdog timeout in milliseconds */ + unsigned watchdogTimeout(); /** \brief Number of watchdog events - calling hangCount() will reset the counter to 0 + calling watchtogEvents() will reset the counter to 0 + */ + unsigned watchdogEvents(); + + /** \brief Enable/disable abort on watchdog event. + + Calling watchdogAbort(\c true) will enable aborting the program execution on a watchdog + event. */ - unsigned hangCount(); + void watchdogAbort(bool flag); + + /** \brief Get current watchdog abort on event status */ + bool watchdogAbort(); /** \brief Switch to using hi resolution timers diff --git a/Scheduler/Scheduler.test.cc b/Scheduler/Scheduler.test.cc index 08e9dc8..f328d32 100644 --- a/Scheduler/Scheduler.test.cc +++ b/Scheduler/Scheduler.test.cc @@ -298,7 +298,7 @@ void schedulerTest() SENF_CHECK_NO_THROW( timer1.action(&blockingHandler) ); SENF_CHECK_NO_THROW( timer1.timeout(senf::ClockService::now()) ); SENF_CHECK_NO_THROW( senf::scheduler::process() ); - BOOST_CHECK_EQUAL( senf::scheduler::hangCount(), 1u ); + BOOST_CHECK_EQUAL( senf::scheduler::watchdogEvents(), 1u ); } {