dd_sds/
match_action.rs

1#![allow(deprecated)]
2// The module level deprecation allow is needed to suppress warnings from `MatchAction::Utf16Hash`
3// that I couldn't find a specific line to suppress. It can be removed when the variant is removed.
4
5use std::borrow::Cow;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15    /// Do not modify the input.
16    #[default]
17    None,
18    /// Replace matches with a new string.
19    Redact { replacement: String },
20    /// Hash the result
21    Hash,
22    /// Hash the result based on UTF-16 bytes encoded match result
23    #[deprecated(
24        note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25    )]
26    #[cfg(any(test, feature = "utf16_hash_match_action"))]
27    Utf16Hash,
28    /// Replace the first or last n characters with asterisks.
29    PartialRedact {
30        direction: PartialRedactDirection,
31        character_count: usize,
32    },
33}
34
35impl MatchAction {
36    pub fn redact(replacement: &str) -> Self {
37        Self::Redact {
38            replacement: replacement.to_string(),
39        }
40    }
41}
42
43#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
44pub enum PartialRedactDirection {
45    FirstCharacters,
46    LastCharacters,
47}
48
49const PARTIAL_REDACT_CHARACTER: char = '*';
50
51#[derive(Debug, PartialEq, Eq, Error)]
52pub enum MatchActionValidationError {
53    #[error("Partial redaction chars must be non-zero")]
54    PartialRedactionNumCharsZero,
55}
56
57impl MatchAction {
58    pub fn validate(&self) -> Result<(), MatchActionValidationError> {
59        match self {
60            MatchAction::PartialRedact {
61                direction: _,
62                character_count,
63            } => {
64                if *character_count == 0 {
65                    Err(MatchActionValidationError::PartialRedactionNumCharsZero)
66                } else {
67                    Ok(())
68                }
69            }
70            MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
71                Ok(())
72            }
73            #[cfg(any(test, feature = "utf16_hash_match_action"))]
74            #[allow(deprecated)]
75            MatchAction::Utf16Hash => Ok(()),
76        }
77    }
78
79    /// If the match action will modify the content
80    pub fn is_mutating(&self) -> bool {
81        match self {
82            MatchAction::None => false,
83            MatchAction::Redact { .. } => true,
84            MatchAction::Hash => true,
85            #[cfg(any(test, feature = "utf16_hash_match_action"))]
86            #[allow(deprecated)]
87            MatchAction::Utf16Hash => true,
88            MatchAction::PartialRedact { .. } => true,
89        }
90    }
91
92    pub fn replacement_type(&self) -> ReplacementType {
93        match self {
94            MatchAction::None => ReplacementType::None,
95            MatchAction::Redact { .. } => ReplacementType::Placeholder,
96            MatchAction::Hash => ReplacementType::Hash,
97            #[cfg(any(test, feature = "utf16_hash_match_action"))]
98            #[allow(deprecated)]
99            MatchAction::Utf16Hash => ReplacementType::Hash,
100            MatchAction::PartialRedact { direction, .. } => match direction {
101                PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
102                PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
103            },
104        }
105    }
106
107    pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement<'_>> {
108        match self {
109            MatchAction::None => None,
110            MatchAction::Redact { replacement } => Some(Replacement {
111                start: 0,
112                end: matched_content.len(),
113                replacement: Cow::Borrowed(replacement),
114            }),
115            MatchAction::Hash => Some(Replacement {
116                start: 0,
117                end: matched_content.len(),
118                replacement: Cow::Owned(Self::hash(matched_content)),
119            }),
120            #[cfg(any(test, feature = "utf16_hash_match_action"))]
121            #[allow(deprecated)]
122            MatchAction::Utf16Hash => Some(Replacement {
123                start: 0,
124                end: matched_content.len(),
125                replacement: Cow::Owned(Self::utf16_hash(matched_content)),
126            }),
127            MatchAction::PartialRedact {
128                direction,
129                character_count: num_characters,
130            } => match direction {
131                PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
132                    num_characters,
133                    matched_content,
134                )),
135                PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
136                    num_characters,
137                    matched_content,
138                )),
139            },
140        }
141    }
142
143    fn hash(match_result: &str) -> String {
144        let hash = farmhash::fingerprint64(match_result.as_bytes());
145        format!("{hash:x}")
146    }
147
148    #[cfg(any(test, feature = "utf16_hash_match_action"))]
149    fn utf16_hash(match_result: &str) -> String {
150        let utf16_bytes = match_result
151            .encode_utf16()
152            .flat_map(u16::to_le_bytes)
153            .collect::<Vec<_>>();
154        let hash = farmhash::fingerprint64(&utf16_bytes);
155        format!("{hash:x}")
156    }
157
158    fn partial_redaction_first(
159        num_characters: &usize,
160        matched_content: &str,
161    ) -> Replacement<'static> {
162        let match_len = matched_content.chars().count();
163
164        let last_replacement_byte = if match_len > *num_characters {
165            matched_content
166                .char_indices()
167                .nth(*num_characters)
168                .unwrap()
169                .0
170        } else {
171            matched_content.len()
172        };
173
174        Replacement {
175            start: 0,
176            end: last_replacement_byte,
177            replacement: String::from(PARTIAL_REDACT_CHARACTER)
178                .repeat(*num_characters)
179                .into(),
180        }
181    }
182
183    fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
184        let match_len = match_result.chars().count();
185
186        let start_replacement_byte = if match_len > *num_characters {
187            match_result
188                .char_indices()
189                .nth_back(*num_characters - 1)
190                .unwrap()
191                .0
192        } else {
193            0
194        };
195
196        Replacement {
197            start: start_replacement_byte,
198            end: match_result.len(),
199            replacement: String::from(PARTIAL_REDACT_CHARACTER)
200                .repeat(*num_characters)
201                .into(),
202        }
203    }
204}
205
206#[derive(PartialEq, Debug)]
207pub struct Replacement<'a> {
208    pub start: usize,
209    pub end: usize,
210    pub replacement: Cow<'a, str>,
211}
212
213#[cfg(test)]
214mod test {
215    use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
216    use crate::match_action::{MatchAction, Replacement};
217
218    #[test]
219    fn match_with_no_action() {
220        let match_action = MatchAction::None;
221
222        assert_eq!(match_action.get_replacement("rene coty"), None);
223        assert_eq!(match_action.get_replacement("rene"), None);
224    }
225
226    #[test]
227    fn match_with_redaction() {
228        let match_action = MatchAction::Redact {
229            replacement: "[REPLACEMENT]".to_string(),
230        };
231
232        assert_eq!(
233            match_action.get_replacement("rene coty"),
234            Some(Replacement {
235                start: 0,
236                end: 9,
237                replacement: "[REPLACEMENT]".into()
238            })
239        );
240
241        assert_eq!(
242            match_action.get_replacement("coty"),
243            Some(Replacement {
244                start: 0,
245                end: 4,
246                replacement: "[REPLACEMENT]".into()
247            })
248        );
249    }
250
251    #[test]
252    fn match_with_hash() {
253        let match_action = MatchAction::Hash;
254
255        assert_eq!(
256            match_action.get_replacement("coty"),
257            Some(Replacement {
258                start: 0,
259                end: 4,
260                replacement: "fdf7528ad7f83901".into()
261            })
262        );
263
264        assert_eq!(
265            match_action.get_replacement("rene"),
266            Some(Replacement {
267                start: 0,
268                end: 4,
269                replacement: "51a2842f626aaaec".into()
270            })
271        );
272
273        assert_eq!(
274            match_action.get_replacement("😊"),
275            Some(Replacement {
276                start: 0,
277                end: 4,
278                replacement: "6ce17744696c2107".into()
279            })
280        );
281    }
282
283    #[test]
284    #[cfg(feature = "utf16_hash_match_action")]
285    fn match_with_utf16_hash() {
286        #[allow(deprecated)]
287        let match_action = MatchAction::Utf16Hash;
288
289        assert_eq!(
290            match_action.get_replacement("coty"),
291            Some(Replacement {
292                start: 0,
293                end: 4,
294                replacement: "d6bf038129a9eb52".into()
295            })
296        );
297
298        assert_eq!(
299            match_action.get_replacement("rene"),
300            Some(Replacement {
301                start: 0,
302                end: 4,
303                replacement: "8627c79c79ff4b8b".into()
304            })
305        );
306
307        assert_eq!(
308            match_action.get_replacement("😊"),
309            Some(Replacement {
310                start: 0,
311                end: 4,
312                replacement: "268a21f211fdbc0a".into()
313            })
314        );
315    }
316
317    #[test]
318    fn match_with_partial_redaction_first_characters_should_always_redact_num_characters() {
319        let match_action = MatchAction::PartialRedact {
320            character_count: 5,
321            direction: FirstCharacters,
322        };
323
324        assert_eq!(
325            match_action.get_replacement("ene coty"),
326            Some(Replacement {
327                start: 0,
328                end: 5,
329                replacement: "*****".into()
330            })
331        );
332
333        assert_eq!(
334            match_action.get_replacement("rene"),
335            Some(Replacement {
336                start: 0,
337                end: 4,
338                replacement: "*****".into()
339            })
340        );
341
342        assert_eq!(
343            match_action.get_replacement("rene "),
344            Some(Replacement {
345                start: 0,
346                end: 5,
347                replacement: "*****".into()
348            })
349        );
350    }
351
352    #[test]
353    fn match_with_partial_redaction_last_characters_should_always_redact_num_characters() {
354        let match_action = MatchAction::PartialRedact {
355            character_count: 5,
356            direction: LastCharacters,
357        };
358
359        assert_eq!(
360            match_action.get_replacement("rene cot"),
361            Some(Replacement {
362                start: 3,
363                end: 8,
364                replacement: "*****".into()
365            })
366        );
367
368        assert_eq!(
369            match_action.get_replacement("rene"),
370            Some(Replacement {
371                start: 0,
372                end: 4,
373                replacement: "*****".into()
374            })
375        );
376
377        assert_eq!(
378            match_action.get_replacement("rene "),
379            Some(Replacement {
380                start: 0,
381                end: 5,
382                replacement: "*****".into()
383            })
384        );
385    }
386
387    #[test]
388    fn partially_redacts_first_emoji() {
389        let match_action = MatchAction::PartialRedact {
390            character_count: 1,
391            direction: FirstCharacters,
392        };
393
394        assert_eq!(
395            match_action.get_replacement("😊🤞"),
396            Some(Replacement {
397                start: 0,
398                end: 4,
399                replacement: "*".into()
400            })
401        );
402    }
403
404    #[test]
405    fn partially_redacts_last_emoji() {
406        let match_action = MatchAction::PartialRedact {
407            character_count: 1,
408            direction: LastCharacters,
409        };
410
411        assert_eq!(
412            match_action.get_replacement("😊🤞"),
413            Some(Replacement {
414                start: 4,
415                end: 8,
416                replacement: "*".into()
417            })
418        );
419    }
420
421    #[test]
422    fn test_farmhash_bugfix() {
423        // Testing the bugfix from https://github.com/seiflotfy/rust-farmhash/pull/16
424        assert_eq!(
425            MatchAction::Hash.get_replacement(&"x".repeat(128)),
426            Some(Replacement {
427                start: 0,
428                end: 128,
429                replacement: "5170af09fd870c17".into()
430            })
431        );
432    }
433}