dd_sds/
match_action.rs

1#![allow(deprecated)]
2// The module level deprecation allow is needed to suppress warnings from `MatchAction::Utf16Hash`
3// that I couldn't find a specific line to suppress. It can be removed when the variant is removed.
4
5use std::borrow::Cow;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15    /// Do not modify the input.
16    #[default]
17    None,
18    /// Replace matches with a new string.
19    Redact { replacement: String },
20    /// Hash the result
21    Hash,
22    /// Hash the result based on UTF-16 bytes encoded match result
23    #[deprecated(
24        note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25    )]
26    #[cfg(any(test, feature = "utf16_hash_match_action"))]
27    Utf16Hash,
28    /// Replace the first or last n characters with asterisks.
29    PartialRedact {
30        direction: PartialRedactDirection,
31        character_count: usize,
32    },
33}
34
35#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
36pub enum PartialRedactDirection {
37    FirstCharacters,
38    LastCharacters,
39}
40
41const PARTIAL_REDACT_CHARACTER: char = '*';
42
43#[derive(Debug, PartialEq, Eq, Error)]
44pub enum MatchActionValidationError {
45    #[error("Partial redaction chars must be non-zero")]
46    PartialRedactionNumCharsZero,
47}
48
49impl MatchAction {
50    pub fn validate(&self) -> Result<(), MatchActionValidationError> {
51        match self {
52            MatchAction::PartialRedact {
53                direction: _,
54                character_count,
55            } => {
56                if *character_count == 0 {
57                    Err(MatchActionValidationError::PartialRedactionNumCharsZero)
58                } else {
59                    Ok(())
60                }
61            }
62            MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
63                Ok(())
64            }
65            #[cfg(any(test, feature = "utf16_hash_match_action"))]
66            #[allow(deprecated)]
67            MatchAction::Utf16Hash => Ok(()),
68        }
69    }
70
71    /// If the match action will modify the content
72    pub fn is_mutating(&self) -> bool {
73        match self {
74            MatchAction::None => false,
75            MatchAction::Redact { .. } => true,
76            MatchAction::Hash => true,
77            #[cfg(any(test, feature = "utf16_hash_match_action"))]
78            #[allow(deprecated)]
79            MatchAction::Utf16Hash => true,
80            MatchAction::PartialRedact { .. } => true,
81        }
82    }
83
84    pub fn replacement_type(&self) -> ReplacementType {
85        match self {
86            MatchAction::None => ReplacementType::None,
87            MatchAction::Redact { .. } => ReplacementType::Placeholder,
88            MatchAction::Hash => ReplacementType::Hash,
89            #[cfg(any(test, feature = "utf16_hash_match_action"))]
90            #[allow(deprecated)]
91            MatchAction::Utf16Hash => ReplacementType::Hash,
92            MatchAction::PartialRedact { direction, .. } => match direction {
93                PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
94                PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
95            },
96        }
97    }
98
99    pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement> {
100        match self {
101            MatchAction::None => None,
102            MatchAction::Redact { replacement } => Some(Replacement {
103                start: 0,
104                end: matched_content.len(),
105                replacement: Cow::Borrowed(replacement),
106            }),
107            MatchAction::Hash => Some(Replacement {
108                start: 0,
109                end: matched_content.len(),
110                replacement: Cow::Owned(Self::hash(matched_content)),
111            }),
112            #[cfg(any(test, feature = "utf16_hash_match_action"))]
113            #[allow(deprecated)]
114            MatchAction::Utf16Hash => Some(Replacement {
115                start: 0,
116                end: matched_content.len(),
117                replacement: Cow::Owned(Self::utf16_hash(matched_content)),
118            }),
119            MatchAction::PartialRedact {
120                direction,
121                character_count: num_characters,
122            } => match direction {
123                PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
124                    num_characters,
125                    matched_content,
126                )),
127                PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
128                    num_characters,
129                    matched_content,
130                )),
131            },
132        }
133    }
134
135    fn hash(match_result: &str) -> String {
136        let hash = farmhash::fingerprint64(match_result.as_bytes());
137        format!("{hash:x}")
138    }
139
140    #[cfg(any(test, feature = "utf16_hash_match_action"))]
141    fn utf16_hash(match_result: &str) -> String {
142        let utf16_bytes = match_result
143            .encode_utf16()
144            .flat_map(u16::to_le_bytes)
145            .collect::<Vec<_>>();
146        let hash = farmhash::fingerprint64(&utf16_bytes);
147        format!("{hash:x}")
148    }
149
150    fn partial_redaction_first(
151        num_characters: &usize,
152        matched_content: &str,
153    ) -> Replacement<'static> {
154        let match_len = matched_content.chars().count();
155
156        let last_replacement_byte = if match_len > *num_characters {
157            matched_content
158                .char_indices()
159                .nth(*num_characters)
160                .unwrap()
161                .0
162        } else {
163            matched_content.len()
164        };
165
166        Replacement {
167            start: 0,
168            end: last_replacement_byte,
169            replacement: String::from(PARTIAL_REDACT_CHARACTER)
170                .repeat(*num_characters)
171                .into(),
172        }
173    }
174
175    fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
176        let match_len = match_result.chars().count();
177
178        let start_replacement_byte = if match_len > *num_characters {
179            match_result
180                .char_indices()
181                .nth_back(*num_characters - 1)
182                .unwrap()
183                .0
184        } else {
185            0
186        };
187
188        Replacement {
189            start: start_replacement_byte,
190            end: match_result.len(),
191            replacement: String::from(PARTIAL_REDACT_CHARACTER)
192                .repeat(*num_characters)
193                .into(),
194        }
195    }
196}
197
198#[derive(PartialEq, Debug)]
199pub struct Replacement<'a> {
200    pub start: usize,
201    pub end: usize,
202    pub replacement: Cow<'a, str>,
203}
204
205#[cfg(test)]
206mod test {
207    use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
208    use crate::match_action::{MatchAction, Replacement};
209
210    #[test]
211    fn match_with_no_action() {
212        let match_action = MatchAction::None;
213
214        assert_eq!(match_action.get_replacement("rene coty"), None);
215        assert_eq!(match_action.get_replacement("rene"), None);
216    }
217
218    #[test]
219    fn match_with_redaction() {
220        let match_action = MatchAction::Redact {
221            replacement: "[REPLACEMENT]".to_string(),
222        };
223
224        assert_eq!(
225            match_action.get_replacement("rene coty"),
226            Some(Replacement {
227                start: 0,
228                end: 9,
229                replacement: "[REPLACEMENT]".into()
230            })
231        );
232
233        assert_eq!(
234            match_action.get_replacement("coty"),
235            Some(Replacement {
236                start: 0,
237                end: 4,
238                replacement: "[REPLACEMENT]".into()
239            })
240        );
241    }
242
243    #[test]
244    fn match_with_hash() {
245        let match_action = MatchAction::Hash;
246
247        assert_eq!(
248            match_action.get_replacement("coty"),
249            Some(Replacement {
250                start: 0,
251                end: 4,
252                replacement: "fdf7528ad7f83901".into()
253            })
254        );
255
256        assert_eq!(
257            match_action.get_replacement("rene"),
258            Some(Replacement {
259                start: 0,
260                end: 4,
261                replacement: "51a2842f626aaaec".into()
262            })
263        );
264
265        assert_eq!(
266            match_action.get_replacement("😊"),
267            Some(Replacement {
268                start: 0,
269                end: 4,
270                replacement: "6ce17744696c2107".into()
271            })
272        );
273    }
274
275    #[test]
276    #[cfg(feature = "utf16_hash_match_action")]
277    fn match_with_utf16_hash() {
278        #[allow(deprecated)]
279        let match_action = MatchAction::Utf16Hash;
280
281        assert_eq!(
282            match_action.get_replacement("coty"),
283            Some(Replacement {
284                start: 0,
285                end: 4,
286                replacement: "d6bf038129a9eb52".into()
287            })
288        );
289
290        assert_eq!(
291            match_action.get_replacement("rene"),
292            Some(Replacement {
293                start: 0,
294                end: 4,
295                replacement: "8627c79c79ff4b8b".into()
296            })
297        );
298
299        assert_eq!(
300            match_action.get_replacement("😊"),
301            Some(Replacement {
302                start: 0,
303                end: 4,
304                replacement: "268a21f211fdbc0a".into()
305            })
306        );
307    }
308
309    #[test]
310    fn match_with_partial_redaction_first_characters_should_always_redact_num_characters() {
311        let match_action = MatchAction::PartialRedact {
312            character_count: 5,
313            direction: FirstCharacters,
314        };
315
316        assert_eq!(
317            match_action.get_replacement("ene coty"),
318            Some(Replacement {
319                start: 0,
320                end: 5,
321                replacement: "*****".into()
322            })
323        );
324
325        assert_eq!(
326            match_action.get_replacement("rene"),
327            Some(Replacement {
328                start: 0,
329                end: 4,
330                replacement: "*****".into()
331            })
332        );
333
334        assert_eq!(
335            match_action.get_replacement("rene "),
336            Some(Replacement {
337                start: 0,
338                end: 5,
339                replacement: "*****".into()
340            })
341        );
342    }
343
344    #[test]
345    fn match_with_partial_redaction_last_characters_should_always_redact_num_characters() {
346        let match_action = MatchAction::PartialRedact {
347            character_count: 5,
348            direction: LastCharacters,
349        };
350
351        assert_eq!(
352            match_action.get_replacement("rene cot"),
353            Some(Replacement {
354                start: 3,
355                end: 8,
356                replacement: "*****".into()
357            })
358        );
359
360        assert_eq!(
361            match_action.get_replacement("rene"),
362            Some(Replacement {
363                start: 0,
364                end: 4,
365                replacement: "*****".into()
366            })
367        );
368
369        assert_eq!(
370            match_action.get_replacement("rene "),
371            Some(Replacement {
372                start: 0,
373                end: 5,
374                replacement: "*****".into()
375            })
376        );
377    }
378
379    #[test]
380    fn partially_redacts_first_emoji() {
381        let match_action = MatchAction::PartialRedact {
382            character_count: 1,
383            direction: FirstCharacters,
384        };
385
386        assert_eq!(
387            match_action.get_replacement("😊🤞"),
388            Some(Replacement {
389                start: 0,
390                end: 4,
391                replacement: "*".into()
392            })
393        );
394    }
395
396    #[test]
397    fn partially_redacts_last_emoji() {
398        let match_action = MatchAction::PartialRedact {
399            character_count: 1,
400            direction: LastCharacters,
401        };
402
403        assert_eq!(
404            match_action.get_replacement("😊🤞"),
405            Some(Replacement {
406                start: 4,
407                end: 8,
408                replacement: "*".into()
409            })
410        );
411    }
412
413    #[test]
414    fn test_farmhash_bugfix() {
415        // Testing the bugfix from https://github.com/seiflotfy/rust-farmhash/pull/16
416        assert_eq!(
417            MatchAction::Hash.get_replacement(&"x".repeat(128)),
418            Some(Replacement {
419                start: 0,
420                end: 128,
421                replacement: "5170af09fd870c17".into()
422            })
423        );
424    }
425}