Scheduler: Add watchdog/signal documentation
g0dil [Tue, 3 Mar 2009 22:10:24 +0000 (22:10 +0000)]
Scheduler: Rename watchdog scheduler functions

git-svn-id: https://svn.berlios.de/svnroot/repos/senf/trunk@1142 270642c3-0616-0410-b53a-bc976706d245

Scheduler/FIFORunner.cc
Scheduler/FIFORunner.cci
Scheduler/FIFORunner.hh
Scheduler/Scheduler.cci
Scheduler/Scheduler.hh
Scheduler/Scheduler.test.cc

index b39b5c2..a6fb824 100644 (file)
@@ -40,7 +40,7 @@
 
 prefix_ senf::scheduler::detail::FIFORunner::FIFORunner()
     : tasks_ (), next_ (tasks_.end()), watchdogRunning_ (false), watchdogMs_ (1000), 
-      watchdogCount_(0), hangCount_ (0)
+      watchdogAbort_ (false), watchdogCount_(0), hangCount_ (0)
 {
     struct sigevent ev;
     ::memset(&ev, 0, sizeof(ev));
@@ -75,18 +75,20 @@ prefix_ senf::scheduler::detail::FIFORunner::~FIFORunner()
 
 prefix_ void senf::scheduler::detail::FIFORunner::startWatchdog()
 {
-    struct itimerspec timer;
-    ::memset(&timer, 0, sizeof(timer));
-
-    timer.it_interval.tv_sec = watchdogMs_ / 1000;
-    timer.it_interval.tv_nsec = (watchdogMs_ % 1000) * 1000000ul;
-    timer.it_value.tv_sec = timer.it_interval.tv_sec;
-    timer.it_value.tv_nsec = timer.it_interval.tv_nsec;
-
-    if (timer_settime(watchdogId_, 0, &timer, 0) < 0)
-        SENF_THROW_SYSTEM_EXCEPTION("timer_settime()");
+    if (watchdogMs_ > 0) {
+        struct itimerspec timer;
+        ::memset(&timer, 0, sizeof(timer));
+
+        timer.it_interval.tv_sec = watchdogMs_ / 1000;
+        timer.it_interval.tv_nsec = (watchdogMs_ % 1000) * 1000000ul;
+        timer.it_value.tv_sec = timer.it_interval.tv_sec;
+        timer.it_value.tv_nsec = timer.it_interval.tv_nsec;
+        
+        if (timer_settime(watchdogId_, 0, &timer, 0) < 0)
+            SENF_THROW_SYSTEM_EXCEPTION("timer_settime()");
 
-    watchdogRunning_ = true;
+        watchdogRunning_ = true;
+    }
 }
 
 prefix_ void senf::scheduler::detail::FIFORunner::stopWatchdog()
@@ -220,6 +222,8 @@ prefix_ void senf::scheduler::detail::FIFORunner::watchdog(int, siginfo_t * si,
             write(1, runner.runningBacktrace_.c_str(), runner.runningBacktrace_.size());
 #endif
             write(1, "\n", 1);
+            if (runner.watchdogAbort_)
+                assert(false);
         }
     }
 }
index 04afdcd..5eea900 100644 (file)
@@ -123,6 +123,17 @@ prefix_ unsigned senf::scheduler::detail::FIFORunner::taskTimeout()
     return watchdogMs_;
 }
 
+prefix_ void senf::scheduler::detail::FIFORunner::abortOnTimeout(bool flag)
+{
+    watchdogAbort_ = flag;
+}
+
+prefix_ bool senf::scheduler::detail::FIFORunner::abortOnTimeout()
+    const
+{
+    return watchdogAbort_;
+}
+
 prefix_ unsigned senf::scheduler::detail::FIFORunner::hangCount()
 {
     unsigned hc (hangCount_);
index 0573d9a..39e2e3a 100644 (file)
@@ -99,6 +99,8 @@ namespace detail {
 
         void taskTimeout(unsigned ms);
         unsigned taskTimeout() const;
+        void abortOnTimeout(bool flag);
+        bool abortOnTimeout() const;
 
         void startWatchdog();
         void stopWatchdog();
@@ -137,6 +139,7 @@ namespace detail {
         timer_t watchdogId_;
         bool watchdogRunning_;
         unsigned watchdogMs_;
+        bool watchdogAbort_;
         std::string runningName_;
 #   ifdef SENF_DEBUG
         std::string runningBacktrace_;
index 75dcb06..873b64f 100644 (file)
@@ -39,21 +39,31 @@ prefix_ senf::ClockService::clock_type senf::scheduler::eventTime()
     return scheduler::detail::FdManager::instance().eventTime();
 }
 
-prefix_ void senf::scheduler::taskTimeout(unsigned ms)
+prefix_ void senf::scheduler::watchdogTimeout(unsigned ms)
 {
     scheduler::detail::FIFORunner::instance().taskTimeout(ms);
 }
 
-prefix_ unsigned senf::scheduler::taskTimeout()
+prefix_ unsigned senf::scheduler::watchdogTimeout()
 {
     return scheduler::detail::FIFORunner::instance().taskTimeout();
 }
 
-prefix_ unsigned senf::scheduler::hangCount()
+prefix_ unsigned senf::scheduler::watchdogEvents()
 {
     return scheduler::detail::FIFORunner::instance().hangCount();
 }
 
+prefix_ void senf::scheduler::watchdogAbort(bool flag)
+{
+    scheduler::detail::FIFORunner::instance().abortOnTimeout(flag);
+}
+
+prefix_ bool senf::scheduler::watchdogAbort()
+{
+    return scheduler::detail::FIFORunner::instance().abortOnTimeout();
+}
+
 prefix_ void senf::scheduler::loresTimers()
 {
     detail::TimerDispatcher::instance().timerSource(
index 033c016..29ba555 100644 (file)
@@ -196,6 +196,30 @@ namespace senf {
     href="http://www.boost.org/doc/libs/1_36_0/libs/ptr_container/doc/ptr_container.html">Boost.PointerContainer</a>
     for the pointer container library reference.
 
+
+    \section sched_signals Signals and the Watchdog
+
+    To secure against blocking callbacks, the %scheduler implementation includes a watchdog
+    timer. This timer will produce a warning message on the standard error stream when a single
+    callback is executing for more than the watchdog timeout value. Since the scheduler
+    implementation is completely single threaded, we cannot terminate the callback but at least we
+    can produce an informative message and optionally the program can be aborted.
+
+    The watchdog is controlled using the watchdogTimeout(), watchdogEvents() and watchdogAbort().
+    functions. 
+
+    The watchdog is implemented using a free running interval timer. The watchdog signal must \e not
+    be blocked. If signals need to be blocked for some reason, those regions will not be checked by
+    the watchdog. If a callback blocks, the watchdog has no chance to interrupt the process.
+
+    \warning Since the watchdog is free running for performance reasons, every callback must expect
+        signals to happen. Signals \e will certainly happen since the watchdog signal is generated
+        periodically (which does not necessarily generate a watchdog event ...)
+
+    Additional signals may occur when using using hires timers on kernel/glibc combinations which do
+    not support timerfd(). On such systems, hires timers are implemented using POSIX timers which
+    generate a considerable number of additional signals.
+
     \todo Fix the file support to use threads (?) fork (?) and a pipe so it works reliably even
         over e.g. NFS.
   */
@@ -224,17 +248,30 @@ namespace scheduler {
      */
     ClockService::clock_type eventTime(); 
 
-    /** \brief Set task watchdog timeout */
-    void taskTimeout(unsigned ms); 
+    /** \brief Set watchdog timeout to \a ms milliseconds.
+        
+        Setting the watchdog timeout to 0 will disable the watchdog.
+     */
+    void watchdogTimeout(unsigned ms); 
 
-    /** \brief Current task watchdog timeout */
-    unsigned taskTimeout(); 
+    /** \brief Current watchdog timeout in milliseconds */
+    unsigned watchdogTimeout(); 
 
     /** \brief Number of watchdog events 
 
-        calling hangCount() will reset the counter to 0
+        calling watchtogEvents() will reset the counter to 0
+     */
+    unsigned watchdogEvents(); 
+
+    /** \brief Enable/disable abort on watchdog event.
+        
+        Calling watchdogAbort(\c true) will enable aborting the program execution on a watchdog
+        event.
      */
-    unsigned hangCount(); 
+    void watchdogAbort(bool flag);
+
+    /** \brief Get current watchdog abort on event status */
+    bool watchdogAbort();
 
     /** \brief Switch to using hi resolution timers
         
index 08e9dc8..f328d32 100644 (file)
@@ -298,7 +298,7 @@ void schedulerTest()
         SENF_CHECK_NO_THROW( timer1.action(&blockingHandler) );
         SENF_CHECK_NO_THROW( timer1.timeout(senf::ClockService::now()) );
         SENF_CHECK_NO_THROW( senf::scheduler::process() );
-        BOOST_CHECK_EQUAL( senf::scheduler::hangCount(), 1u );
+        BOOST_CHECK_EQUAL( senf::scheduler::watchdogEvents(), 1u );
     }
 
     {