saluki_core/runtime/restart.rs
1use std::{collections::VecDeque, time::Duration};
2
3use tokio::time::Instant;
4use tracing::debug;
5
6/// Restart mode for child processes.
7#[derive(Clone, Copy)]
8pub enum RestartMode {
9 /// Restarts the failed child process only.
10 OneForOne,
11
12 /// Restarts all child processes, including the failed one.
13 OneForAll,
14}
15
16/// Restart strategy for a supervisor.
17///
18/// Defaults to one-to-one mode (only restart the failed process) and a restart intensity of 1 over a period of 5
19/// seconds.
20///
21/// # Restarts and permanent failure
22///
23/// A supervisor will allow up to `intensity` process restarts, across all child processes, over a given `period`. When
24/// this limit is exceeded, the supervisor will stop all child processes and return an error itself, indicating that the
25/// supervisor has failed overall.
26///
27/// Permanent failure bubbles up to the parent supervisor, until reaching the root supervisor. Once permanent failure
28/// reaches the root supervisor, and the root supervisor exceeds its own restart limits, the root supervisor will fail
29/// and cease execution.
30#[derive(Clone, Copy)]
31pub struct RestartStrategy {
32 mode: RestartMode,
33 intensity: usize,
34 period: Duration,
35}
36
37impl RestartStrategy {
38 /// Creates a new `RestartStrategy` with the given mode, intensity, and period.
39 pub const fn new(mode: RestartMode, intensity: usize, period: Duration) -> Self {
40 Self {
41 mode,
42 intensity,
43 period,
44 }
45 }
46
47 /// Creates a new `RestartStrategy` with the one-to-one restart mode, and the default intensity/period.
48 pub fn one_to_one() -> Self {
49 Self {
50 mode: RestartMode::OneForOne,
51 ..Default::default()
52 }
53 }
54
55 /// Creates a new `RestartStrategy` with the one-for-all restart mode, and the default intensity/period.
56 pub fn one_for_all() -> Self {
57 Self {
58 mode: RestartMode::OneForAll,
59 ..Default::default()
60 }
61 }
62
63 /// Sets the restart intensity and period for the strategy.
64 pub const fn with_intensity_and_period(mut self, intensity: usize, period: Duration) -> Self {
65 self.intensity = intensity;
66 self.period = period;
67 self
68 }
69}
70
71impl Default for RestartStrategy {
72 fn default() -> Self {
73 Self::new(RestartMode::OneForOne, 1, Duration::from_secs(5))
74 }
75}
76
77pub(super) enum RestartAction {
78 /// Execute a restart with the given mode.
79 Restart(RestartMode),
80
81 /// Supervisor must shutdown as the maximum number of restarts has been reached.
82 Shutdown,
83}
84
85pub(super) struct RestartState {
86 strategy: RestartStrategy,
87 restart_history: VecDeque<Instant>,
88}
89
90impl RestartState {
91 /// Creates a new `RestartState` with the given strategy.
92 pub fn new(strategy: RestartStrategy) -> Self {
93 Self {
94 strategy,
95 restart_history: VecDeque::with_capacity(strategy.intensity),
96 }
97 }
98
99 /// Evaluates a restart based on the current state and determine the action the supervisor should take in response.
100 pub fn evaluate_restart(&mut self) -> RestartAction {
101 // Short circuit if our intensity is zero.
102 if self.strategy.intensity == 0 {
103 debug!("Restart strategy configured with restart intensity of zero, shutting down.");
104 return RestartAction::Shutdown;
105 }
106
107 // Since we only keep track of the last `intensity` restarts, we simply need to check if the oldest restart
108 // we're tracking is within `period` of the current time, and if the number of tracked restarts is equal to
109 // `intensity`.
110 //
111 // When both of these are true, we have exceeded the restart intensity limit and must shutdown.
112 let now = Instant::now();
113 if self.restart_history.len() == self.strategy.intensity {
114 let oldest = self.restart_history.front().expect("restart history cannot be empty");
115 if now.saturating_duration_since(*oldest) < self.strategy.period {
116 debug!(
117 "Restart limit exceeded ({} in {:?}), shutting down.",
118 self.strategy.intensity, self.strategy.period
119 );
120 return RestartAction::Shutdown;
121 }
122
123 // Remove the oldest restart from the history since it is outside the period.
124 self.restart_history.pop_front();
125 }
126
127 // Track this latest restart.
128 self.restart_history.push_back(now);
129
130 debug!("Restart limit not exceeded, restarting worker.");
131 RestartAction::Restart(self.strategy.mode)
132 }
133}