CalejoControl/src/monitoring/watchdog.py

"""
Database Watchdog for Calejo Control Adapter.

Monitors database updates and triggers failsafe mode when updates stop,
preventing stale optimization plans from controlling pumps indefinitely.
"""

import asyncio
import structlog
from datetime import datetime, timedelta
from typing import Dict, Optional, Any

from src.database.flexible_client import FlexibleDatabaseClient

logger = structlog.get_logger()


class DatabaseWatchdog:
    """
    Monitors database updates and triggers failsafe mode when updates stop.
    
    Safety Feature: If optimization system stops updating plans for more than
    20 minutes, automatically revert to default safe setpoints to prevent
    pumps from running on stale optimization plans.
    """
    
    def __init__(self, db_client: FlexibleDatabaseClient, alert_manager: Any, timeout_seconds: int = 1200):  # 20 minutes default
        self.db_client = db_client
        self.timeout_seconds = timeout_seconds
        self.last_update_times: Dict[tuple, datetime] = {}  # (station_id, pump_id) -> last_update
        self.failsafe_active: Dict[tuple, bool] = {}
        self.running = False
        self.check_interval_seconds = 60  # Check every minute
    
    async def start(self):
        """Start the watchdog monitoring."""
        self.running = True
        logger.info("database_watchdog_started", timeout_seconds=self.timeout_seconds)
        
        # Initial check
        await self._check_updates()
        
        # Start periodic monitoring
        asyncio.create_task(self._monitor_loop())
    
    async def stop(self):
        """Stop the watchdog monitoring."""
        self.running = False
        logger.info("database_watchdog_stopped")
    
    async def _monitor_loop(self):
        """Main monitoring loop."""
        while self.running:
            try:
                await asyncio.sleep(self.check_interval_seconds)
                await self._check_updates()
            except Exception as e:
                logger.error("watchdog_monitor_loop_error", error=str(e))
    
    async def _check_updates(self):
        """Check for recent updates and trigger failsafe if needed."""
        try:
            # Get latest pump plans to check for recent updates
            latest_plans = self.db_client.get_latest_pump_plans()
            
            current_time = datetime.now()
            
            for plan in latest_plans:
                key = (plan['station_id'], plan['pump_id'])
                plan_updated_at = plan.get('plan_updated_at') or plan.get('plan_created_at')
                
                if plan_updated_at:
                    # Update last known update time
                    self.last_update_times[key] = plan_updated_at
                    
                    # Check if failsafe should be deactivated
                    if self.failsafe_active.get(key, False):
                        # Recent update detected - deactivate failsafe
                        await self._deactivate_failsafe(plan['station_id'], plan['pump_id'])
                else:
                    # No update time available - treat as no recent update
                    self.last_update_times[key] = current_time - timedelta(seconds=self.timeout_seconds + 1)
            
            # Check for stale updates
            for key, last_update in self.last_update_times.items():
                station_id, pump_id = key
                time_since_update = (current_time - last_update).total_seconds()
                
                if time_since_update > self.timeout_seconds and not self.failsafe_active.get(key, False):
                    # Trigger failsafe mode
                    await self._activate_failsafe(station_id, pump_id, time_since_update)
                
                # Log status for monitoring
                if time_since_update > self.timeout_seconds * 0.8:  # 80% of timeout
                    logger.warning(
                        "watchdog_update_stale",
                        station_id=station_id,
                        pump_id=pump_id,
                        seconds_since_update=time_since_update,
                        timeout_seconds=self.timeout_seconds
                    )
        
        except Exception as e:
            logger.error("watchdog_check_updates_failed", error=str(e))
    
    async def _activate_failsafe(self, station_id: str, pump_id: str, time_since_update: float):
        """Activate failsafe mode for a pump."""
        try:
            key = (station_id, pump_id)
            self.failsafe_active[key] = True
            
            # Get default setpoint from pump configuration
            pump_config = self.db_client.get_pump(station_id, pump_id)
            if pump_config:
                default_setpoint = pump_config.get('default_setpoint_hz', 30.0)
                
                # Log failsafe activation
                logger.critical(
                    "failsafe_mode_activated",
                    station_id=station_id,
                    pump_id=pump_id,
                    time_since_update_seconds=time_since_update,
                    default_setpoint_hz=default_setpoint
                )
                
                # Record failsafe event in database
                self._record_failsafe_event(station_id, pump_id, default_setpoint)
                
                # TODO: In Phase 3, this will trigger the SetpointManager to use default setpoints
                # For now, we just log the event
                
            else:
                logger.error(
                    "failsafe_activation_failed_no_pump_config",
                    station_id=station_id,
                    pump_id=pump_id
                )
        
        except Exception as e:
            logger.error(
                "failsafe_activation_failed",
                station_id=station_id,
                pump_id=pump_id,
                error=str(e)
            )
    
    async def _deactivate_failsafe(self, station_id: str, pump_id: str):
        """Deactivate failsafe mode for a pump."""
        try:
            key = (station_id, pump_id)
            self.failsafe_active[key] = False
            
            logger.info(
                "failsafe_mode_deactivated",
                station_id=station_id,
                pump_id=pump_id
            )
            
            # Record failsafe deactivation in database
            self._record_failsafe_deactivation(station_id, pump_id)
            
        except Exception as e:
            logger.error(
                "failsafe_deactivation_failed",
                station_id=station_id,
                pump_id=pump_id,
                error=str(e)
            )
    
    def _record_failsafe_event(self, station_id: str, pump_id: str, default_setpoint: float):
        """Record failsafe activation in database."""
        try:
            query = """
                INSERT INTO failsafe_events 
                (station_id, pump_id, default_setpoint, timestamp)
                VALUES (:station_id, :pump_id, :default_setpoint, :timestamp)
            """
            self.db_client.execute(query, {
                'station_id': station_id,
                'pump_id': pump_id,
                'default_setpoint': default_setpoint,
                'timestamp': datetime.now()
            })
        except Exception as e:
            logger.error("failed_to_record_failsafe_event", error=str(e))
    
    def _record_failsafe_deactivation(self, station_id: str, pump_id: str):
        """Record failsafe deactivation in database."""
        try:
            query = """
                INSERT INTO failsafe_events 
                (station_id, pump_id, event_type, timestamp)
                VALUES (:station_id, :pump_id, 'DEACTIVATED', :timestamp)
            """
            self.db_client.execute(query, {
                'station_id': station_id,
                'pump_id': pump_id,
                'timestamp': datetime.now()
            })
        except Exception as e:
            logger.error("failed_to_record_failsafe_deactivation", error=str(e))
    
    def is_failsafe_active(self, station_id: str, pump_id: str) -> bool:
        """Check if failsafe mode is active for a pump."""
        key = (station_id, pump_id)
        return self.failsafe_active.get(key, False)
    
    def get_last_update_time(self, station_id: str, pump_id: str) -> Optional[datetime]:
        """Get the last known update time for a pump."""
        key = (station_id, pump_id)
        return self.last_update_times.get(key)
    
    async def activate_failsafe_mode(self, station_id: str, pump_id: str, reason: str):
        """
        Manually activate failsafe mode for testing purposes.
        
        This method is intended for testing scenarios where failsafe mode
        needs to be triggered manually, rather than waiting for automatic
        detection of stale data.
        
        Args:
            station_id: Station identifier
            pump_id: Pump identifier
            reason: Reason for manual activation (for logging)
        """
        logger.info(
            "manual_failsafe_activation",
            station_id=station_id,
            pump_id=pump_id,
            reason=reason
        )
        # Use a large time_since_update to trigger failsafe
        await self._activate_failsafe(station_id, pump_id, self.timeout_seconds + 1)
    
    async def activate_failsafe_mode_station(self, station_id: str, reason: str):
        """
        Manually activate failsafe mode for all pumps in a station.
        
        This method is intended for testing scenarios where station-wide
        failsafe mode needs to be triggered manually.
        
        Args:
            station_id: Station identifier
            reason: Reason for manual activation (for logging)
        """
        logger.info(
            "manual_failsafe_activation_station",
            station_id=station_id,
            reason=reason
        )
        # Get all pumps in the station
        pumps = self.db_client.get_pumps(station_id)
        for pump in pumps:
            await self.activate_failsafe_mode(station_id, pump['pump_id'], reason)
    
    async def clear_failsafe_mode(self, station_id: str, pump_id: str):
        """
        Manually clear failsafe mode for a pump.
        
        This method is intended for testing scenarios where failsafe mode
        needs to be cleared manually.
        
        Args:
            station_id: Station identifier
            pump_id: Pump identifier
        """
        logger.info(
            "manual_failsafe_clear",
            station_id=station_id,
            pump_id=pump_id
        )
        await self._deactivate_failsafe(station_id, pump_id)
    
    async def clear_failsafe_mode_station(self, station_id: str):
        """
        Manually clear failsafe mode for all pumps in a station.
        
        This method is intended for testing scenarios where station-wide
        failsafe mode needs to be cleared manually.
        
        Args:
            station_id: Station identifier
        """
        logger.info(
            "manual_failsafe_clear_station",
            station_id=station_id
        )
        # Get all pumps in the station
        pumps = self.db_client.get_pumps(station_id)
        for pump in pumps:
            await self.clear_failsafe_mode(station_id, pump['pump_id'])
    
    def get_status(self) -> Dict[str, Any]:
        """Get watchdog status information."""
        current_time = datetime.now()
        
        status_info = {
            'running': self.running,
            'timeout_seconds': self.timeout_seconds,
            'check_interval_seconds': self.check_interval_seconds,
            'monitored_pumps': len(self.last_update_times),
            'failsafe_active_pumps': sum(self.failsafe_active.values()),
            'pump_status': {}
        }
        
        for key, last_update in self.last_update_times.items():
            station_id, pump_id = key
            time_since_update = (current_time - last_update).total_seconds()
            
            status_info['pump_status'][f"{station_id}_{pump_id}"] = {
                'last_update': last_update.isoformat(),
                'seconds_since_update': time_since_update,
                'failsafe_active': self.failsafe_active.get(key, False),
                'timeout_percentage': min(100, (time_since_update / self.timeout_seconds) * 100)
            }
        
        return status_info
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`"""`
			`Database Watchdog for Calejo Control Adapter.`

			`Monitors database updates and triggers failsafe mode when updates stop,`
			`preventing stale optimization plans from controlling pumps indefinitely.`
			`"""`

			`import asyncio`
			`import structlog`
			`from datetime import datetime, timedelta`
			`from typing import Dict, Optional, Any`

Repository structure improvements and cleanup - Migrated all components to FlexibleDatabaseClient - Consolidated main application files into unified main.py - Fixed import path inconsistencies - Updated README with current implementation status - Cleaned up coverage directories - All 133 tests passing Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 13:11:17 +00:00			`from src.database.flexible_client import FlexibleDatabaseClient`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00
			`logger = structlog.get_logger()`


			`class DatabaseWatchdog:`
			`"""`
			`Monitors database updates and triggers failsafe mode when updates stop.`

			`Safety Feature: If optimization system stops updating plans for more than`
			`20 minutes, automatically revert to default safe setpoints to prevent`
			`pumps from running on stale optimization plans.`
			`"""`

Repository structure improvements and cleanup - Migrated all components to FlexibleDatabaseClient - Consolidated main application files into unified main.py - Fixed import path inconsistencies - Updated README with current implementation status - Cleaned up coverage directories - All 133 tests passing Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 13:11:17 +00:00			`def __init__(self, db_client: FlexibleDatabaseClient, alert_manager: Any, timeout_seconds: int = 1200): # 20 minutes default`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`self.db_client = db_client`
			`self.timeout_seconds = timeout_seconds`
			`self.last_update_times: Dict[tuple, datetime] = {} # (station_id, pump_id) -> last_update`
			`self.failsafe_active: Dict[tuple, bool] = {}`
			`self.running = False`
			`self.check_interval_seconds = 60 # Check every minute`

			`async def start(self):`
			`"""Start the watchdog monitoring."""`
			`self.running = True`
			`logger.info("database_watchdog_started", timeout_seconds=self.timeout_seconds)`

			`# Initial check`
			`await self._check_updates()`

			`# Start periodic monitoring`
			`asyncio.create_task(self._monitor_loop())`

			`async def stop(self):`
			`"""Stop the watchdog monitoring."""`
			`self.running = False`
			`logger.info("database_watchdog_stopped")`

			`async def _monitor_loop(self):`
			`"""Main monitoring loop."""`
			`while self.running:`
			`try:`
			`await asyncio.sleep(self.check_interval_seconds)`
			`await self._check_updates()`
			`except Exception as e:`
			`logger.error("watchdog_monitor_loop_error", error=str(e))`

			`async def _check_updates(self):`
			`"""Check for recent updates and trigger failsafe if needed."""`
			`try:`
			`# Get latest pump plans to check for recent updates`
			`latest_plans = self.db_client.get_latest_pump_plans()`

			`current_time = datetime.now()`

			`for plan in latest_plans:`
			`key = (plan['station_id'], plan['pump_id'])`
			`plan_updated_at = plan.get('plan_updated_at') or plan.get('plan_created_at')`

			`if plan_updated_at:`
			`# Update last known update time`
			`self.last_update_times[key] = plan_updated_at`

			`# Check if failsafe should be deactivated`
			`if self.failsafe_active.get(key, False):`
			`# Recent update detected - deactivate failsafe`
			`await self._deactivate_failsafe(plan['station_id'], plan['pump_id'])`
			`else:`
			`# No update time available - treat as no recent update`
			`self.last_update_times[key] = current_time - timedelta(seconds=self.timeout_seconds + 1)`

			`# Check for stale updates`
			`for key, last_update in self.last_update_times.items():`
			`station_id, pump_id = key`
			`time_since_update = (current_time - last_update).total_seconds()`

			`if time_since_update > self.timeout_seconds and not self.failsafe_active.get(key, False):`
			`# Trigger failsafe mode`
			`await self._activate_failsafe(station_id, pump_id, time_since_update)`

			`# Log status for monitoring`
			`if time_since_update > self.timeout_seconds * 0.8: # 80% of timeout`
			`logger.warning(`
			`"watchdog_update_stale",`
			`station_id=station_id,`
			`pump_id=pump_id,`
			`seconds_since_update=time_since_update,`
			`timeout_seconds=self.timeout_seconds`
			`)`

			`except Exception as e:`
			`logger.error("watchdog_check_updates_failed", error=str(e))`

			`async def _activate_failsafe(self, station_id: str, pump_id: str, time_since_update: float):`
			`"""Activate failsafe mode for a pump."""`
			`try:`
			`key = (station_id, pump_id)`
			`self.failsafe_active[key] = True`

			`# Get default setpoint from pump configuration`
			`pump_config = self.db_client.get_pump(station_id, pump_id)`
			`if pump_config:`
			`default_setpoint = pump_config.get('default_setpoint_hz', 30.0)`

			`# Log failsafe activation`
			`logger.critical(`
			`"failsafe_mode_activated",`
			`station_id=station_id,`
			`pump_id=pump_id,`
			`time_since_update_seconds=time_since_update,`
			`default_setpoint_hz=default_setpoint`
			`)`

			`# Record failsafe event in database`
			`self._record_failsafe_event(station_id, pump_id, default_setpoint)`

			`# TODO: In Phase 3, this will trigger the SetpointManager to use default setpoints`
			`# For now, we just log the event`

			`else:`
			`logger.error(`
			`"failsafe_activation_failed_no_pump_config",`
			`station_id=station_id,`
			`pump_id=pump_id`
			`)`

			`except Exception as e:`
			`logger.error(`
			`"failsafe_activation_failed",`
			`station_id=station_id,`
			`pump_id=pump_id,`
			`error=str(e)`
			`)`

			`async def _deactivate_failsafe(self, station_id: str, pump_id: str):`
			`"""Deactivate failsafe mode for a pump."""`
			`try:`
			`key = (station_id, pump_id)`
			`self.failsafe_active[key] = False`

			`logger.info(`
			`"failsafe_mode_deactivated",`
			`station_id=station_id,`
			`pump_id=pump_id`
			`)`

			`# Record failsafe deactivation in database`
			`self._record_failsafe_deactivation(station_id, pump_id)`

			`except Exception as e:`
			`logger.error(`
			`"failsafe_deactivation_failed",`
			`station_id=station_id,`
			`pump_id=pump_id,`
			`error=str(e)`
			`)`

			`def _record_failsafe_event(self, station_id: str, pump_id: str, default_setpoint: float):`
			`"""Record failsafe activation in database."""`
			`try:`
			`query = """`
			`INSERT INTO failsafe_events`
Complete Phase 6 integration testing with 51/52 tests passing - Fix all failsafe operations tests (6/6 passing) - Fix safety limit dynamic updates test - Add performance load testing framework - Fix database parameter format issues - Add async clear methods to DatabaseWatchdog - Fix SQLite compatibility issues - Update test assertions for better clarity - Add missing safety limits table to Phase1 integration test 2025-10-29 08:54:12 +00:00			`(station_id, pump_id, default_setpoint, timestamp)`
			`VALUES (:station_id, :pump_id, :default_setpoint, :timestamp)`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`"""`
Complete Phase 6 integration testing with 51/52 tests passing - Fix all failsafe operations tests (6/6 passing) - Fix safety limit dynamic updates test - Add performance load testing framework - Fix database parameter format issues - Add async clear methods to DatabaseWatchdog - Fix SQLite compatibility issues - Update test assertions for better clarity - Add missing safety limits table to Phase1 integration test 2025-10-29 08:54:12 +00:00			`self.db_client.execute(query, {`
			`'station_id': station_id,`
			`'pump_id': pump_id,`
			`'default_setpoint': default_setpoint,`
			`'timestamp': datetime.now()`
			`})`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`except Exception as e:`
			`logger.error("failed_to_record_failsafe_event", error=str(e))`

			`def _record_failsafe_deactivation(self, station_id: str, pump_id: str):`
			`"""Record failsafe deactivation in database."""`
			`try:`
			`query = """`
			`INSERT INTO failsafe_events`
			`(station_id, pump_id, event_type, timestamp)`
Complete Phase 6 integration testing with 51/52 tests passing - Fix all failsafe operations tests (6/6 passing) - Fix safety limit dynamic updates test - Add performance load testing framework - Fix database parameter format issues - Add async clear methods to DatabaseWatchdog - Fix SQLite compatibility issues - Update test assertions for better clarity - Add missing safety limits table to Phase1 integration test 2025-10-29 08:54:12 +00:00			`VALUES (:station_id, :pump_id, 'DEACTIVATED', :timestamp)`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`"""`
Complete Phase 6 integration testing with 51/52 tests passing - Fix all failsafe operations tests (6/6 passing) - Fix safety limit dynamic updates test - Add performance load testing framework - Fix database parameter format issues - Add async clear methods to DatabaseWatchdog - Fix SQLite compatibility issues - Update test assertions for better clarity - Add missing safety limits table to Phase1 integration test 2025-10-29 08:54:12 +00:00			`self.db_client.execute(query, {`
			`'station_id': station_id,`
			`'pump_id': pump_id,`
			`'timestamp': datetime.now()`
			`})`
Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`except Exception as e:`
			`logger.error("failed_to_record_failsafe_deactivation", error=str(e))`

			`def is_failsafe_active(self, station_id: str, pump_id: str) -> bool:`
			`"""Check if failsafe mode is active for a pump."""`
			`key = (station_id, pump_id)`
			`return self.failsafe_active.get(key, False)`

			`def get_last_update_time(self, station_id: str, pump_id: str) -> Optional[datetime]:`
			`"""Get the last known update time for a pump."""`
			`key = (station_id, pump_id)`
			`return self.last_update_times.get(key)`

Complete Phase 6 integration testing with 51/52 tests passing - Fix all failsafe operations tests (6/6 passing) - Fix safety limit dynamic updates test - Add performance load testing framework - Fix database parameter format issues - Add async clear methods to DatabaseWatchdog - Fix SQLite compatibility issues - Update test assertions for better clarity - Add missing safety limits table to Phase1 integration test 2025-10-29 08:54:12 +00:00			`async def activate_failsafe_mode(self, station_id: str, pump_id: str, reason: str):`
			`"""`
			`Manually activate failsafe mode for testing purposes.`

			`This method is intended for testing scenarios where failsafe mode`
			`needs to be triggered manually, rather than waiting for automatic`
			`detection of stale data.`

			`Args:`
			`station_id: Station identifier`
			`pump_id: Pump identifier`
			`reason: Reason for manual activation (for logging)`
			`"""`
			`logger.info(`
			`"manual_failsafe_activation",`
			`station_id=station_id,`
			`pump_id=pump_id,`
			`reason=reason`
			`)`
			`# Use a large time_since_update to trigger failsafe`
			`await self._activate_failsafe(station_id, pump_id, self.timeout_seconds + 1)`

			`async def activate_failsafe_mode_station(self, station_id: str, reason: str):`
			`"""`
			`Manually activate failsafe mode for all pumps in a station.`

			`This method is intended for testing scenarios where station-wide`
			`failsafe mode needs to be triggered manually.`

			`Args:`
			`station_id: Station identifier`
			`reason: Reason for manual activation (for logging)`
			`"""`
			`logger.info(`
			`"manual_failsafe_activation_station",`
			`station_id=station_id,`
			`reason=reason`
			`)`
			`# Get all pumps in the station`
			`pumps = self.db_client.get_pumps(station_id)`
			`for pump in pumps:`
			`await self.activate_failsafe_mode(station_id, pump['pump_id'], reason)`

			`async def clear_failsafe_mode(self, station_id: str, pump_id: str):`
			`"""`
			`Manually clear failsafe mode for a pump.`

			`This method is intended for testing scenarios where failsafe mode`
			`needs to be cleared manually.`

			`Args:`
			`station_id: Station identifier`
			`pump_id: Pump identifier`
			`"""`
			`logger.info(`
			`"manual_failsafe_clear",`
			`station_id=station_id,`
			`pump_id=pump_id`
			`)`
			`await self._deactivate_failsafe(station_id, pump_id)`

			`async def clear_failsafe_mode_station(self, station_id: str):`
			`"""`
			`Manually clear failsafe mode for all pumps in a station.`

			`This method is intended for testing scenarios where station-wide`
			`failsafe mode needs to be cleared manually.`

			`Args:`
			`station_id: Station identifier`
			`"""`
			`logger.info(`
			`"manual_failsafe_clear_station",`
			`station_id=station_id`
			`)`
			`# Get all pumps in the station`
			`pumps = self.db_client.get_pumps(station_id)`
			`for pump in pumps:`
			`await self.clear_failsafe_mode(station_id, pump['pump_id'])`

Complete Phase 2: Safety Framework Implementation - Implement DatabaseWatchdog with 20-minute timeout detection and failsafe mode - Add EmergencyStopManager with system-wide and targeted emergency stop functionality - Create AlertManager with multi-channel alert delivery (email, SMS, webhook, SCADA) - Integrate emergency stop checking into SafetyLimitEnforcer (highest priority) - Add comprehensive unit tests for all new safety components - All 95 unit tests passing (100% success rate) Co-authored-by: openhands <openhands@all-hands.dev> 2025-10-27 07:32:01 +00:00			`def get_status(self) -> Dict[str, Any]:`
			`"""Get watchdog status information."""`
			`current_time = datetime.now()`

			`status_info = {`
			`'running': self.running,`
			`'timeout_seconds': self.timeout_seconds,`
			`'check_interval_seconds': self.check_interval_seconds,`
			`'monitored_pumps': len(self.last_update_times),`
			`'failsafe_active_pumps': sum(self.failsafe_active.values()),`
			`'pump_status': {}`
			`}`

			`for key, last_update in self.last_update_times.items():`
			`station_id, pump_id = key`
			`time_since_update = (current_time - last_update).total_seconds()`

			`status_info['pump_status'][f"{station_id}_{pump_id}"] = {`
			`'last_update': last_update.isoformat(),`
			`'seconds_since_update': time_since_update,`
			`'failsafe_active': self.failsafe_active.get(key, False),`
			`'timeout_percentage': min(100, (time_since_update / self.timeout_seconds) * 100)`
			`}`

			`return status_info`