Skip to main content

saluki_core/runtime/
restart.rs

1use std::{collections::VecDeque, time::Duration};
2
3use tokio::time::Instant;
4use tracing::debug;
5
6/// Restart mode for child processes.
7#[derive(Clone, Copy)]
8pub enum RestartMode {
9    /// Restarts the failed child process only.
10    OneForOne,
11
12    /// Restarts all child processes, including the failed one.
13    OneForAll,
14}
15
16/// Restart strategy for a supervisor.
17///
18/// Defaults to one-to-one mode (only restart the failed process) and a restart intensity of 1 over a period of 5
19/// seconds.
20///
21/// # Restarts and permanent failure
22///
23/// A supervisor will allow up to `intensity` process restarts, across all child processes, over a given `period`. When
24/// this limit is exceeded, the supervisor will stop all child processes and return an error itself, indicating that the
25/// supervisor has failed overall.
26///
27/// Permanent failure bubbles up to the parent supervisor, until reaching the root supervisor. Once permanent failure
28/// reaches the root supervisor, and the root supervisor exceeds its own restart limits, the root supervisor will fail
29/// and cease execution.
30#[derive(Clone, Copy)]
31pub struct RestartStrategy {
32    mode: RestartMode,
33    intensity: usize,
34    period: Duration,
35}
36
37impl RestartStrategy {
38    /// Creates a new `RestartStrategy` with the given mode, intensity, and period.
39    pub const fn new(mode: RestartMode, intensity: usize, period: Duration) -> Self {
40        Self {
41            mode,
42            intensity,
43            period,
44        }
45    }
46
47    /// Creates a new `RestartStrategy` with the one-to-one restart mode, and the default intensity/period.
48    pub fn one_to_one() -> Self {
49        Self {
50            mode: RestartMode::OneForOne,
51            ..Default::default()
52        }
53    }
54
55    /// Creates a new `RestartStrategy` with the one-for-all restart mode, and the default intensity/period.
56    pub fn one_for_all() -> Self {
57        Self {
58            mode: RestartMode::OneForAll,
59            ..Default::default()
60        }
61    }
62
63    /// Sets the restart intensity and period for the strategy.
64    pub const fn with_intensity_and_period(mut self, intensity: usize, period: Duration) -> Self {
65        self.intensity = intensity;
66        self.period = period;
67        self
68    }
69}
70
71impl Default for RestartStrategy {
72    fn default() -> Self {
73        Self::new(RestartMode::OneForOne, 1, Duration::from_secs(5))
74    }
75}
76
77pub(super) enum RestartAction {
78    /// Execute a restart with the given mode.
79    Restart(RestartMode),
80
81    /// Supervisor must shutdown as the maximum number of restarts has been reached.
82    Shutdown,
83}
84
85pub(super) struct RestartState {
86    strategy: RestartStrategy,
87    restart_history: VecDeque<Instant>,
88}
89
90impl RestartState {
91    /// Creates a new `RestartState` with the given strategy.
92    pub fn new(strategy: RestartStrategy) -> Self {
93        Self {
94            strategy,
95            restart_history: VecDeque::with_capacity(strategy.intensity),
96        }
97    }
98
99    /// Evaluates a restart based on the current state and determine the action the supervisor should take in response.
100    pub fn evaluate_restart(&mut self) -> RestartAction {
101        // Short circuit if our intensity is zero.
102        if self.strategy.intensity == 0 {
103            debug!("Restart strategy configured with restart intensity of zero, shutting down.");
104            return RestartAction::Shutdown;
105        }
106
107        // Since we only keep track of the last `intensity` restarts, we simply need to check if the oldest restart
108        // we're tracking is within `period` of the current time, and if the number of tracked restarts is equal to
109        // `intensity`.
110        //
111        // When both of these are true, we have exceeded the restart intensity limit and must shutdown.
112        let now = Instant::now();
113        if self.restart_history.len() == self.strategy.intensity {
114            let oldest = self.restart_history.front().expect("restart history cannot be empty");
115            if now.saturating_duration_since(*oldest) < self.strategy.period {
116                debug!(
117                    "Restart limit exceeded ({} in {:?}), shutting down.",
118                    self.strategy.intensity, self.strategy.period
119                );
120                return RestartAction::Shutdown;
121            }
122
123            // Remove the oldest restart from the history since it is outside the period.
124            self.restart_history.pop_front();
125        }
126
127        // Track this latest restart.
128        self.restart_history.push_back(now);
129
130        debug!("Restart limit not exceeded, restarting worker.");
131        RestartAction::Restart(self.strategy.mode)
132    }
133}