dd_sds/
match_action.rs

1#![allow(deprecated)]
2// The module level deprecation allow is needed to suppress warnings from `MatchAction::Utf16Hash`
3// that I couldn't find a specific line to suppress. It can be removed when the variant is removed.
4
5use std::{borrow::Cow, cmp::min};
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15    /// Do not modify the input.
16    #[default]
17    None,
18    /// Replace matches with a new string.
19    Redact { replacement: String },
20    /// Hash the result
21    Hash,
22    /// Hash the result based on UTF-16 bytes encoded match result
23    #[deprecated(
24        note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25    )]
26    #[cfg(any(test, feature = "utf16_hash_match_action"))]
27    Utf16Hash,
28    /// Replace the first or last n characters with asterisks.
29    PartialRedact {
30        direction: PartialRedactDirection,
31        character_count: usize,
32    },
33}
34
35impl MatchAction {
36    pub fn redact(replacement: &str) -> Self {
37        Self::Redact {
38            replacement: replacement.to_string(),
39        }
40    }
41}
42
43#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
44pub enum PartialRedactDirection {
45    FirstCharacters,
46    LastCharacters,
47}
48
49const PARTIAL_REDACT_CHARACTER: char = '*';
50
51#[derive(Debug, PartialEq, Eq, Error)]
52pub enum MatchActionValidationError {
53    #[error("Partial redaction chars must be non-zero")]
54    PartialRedactionNumCharsZero,
55}
56
57impl MatchAction {
58    pub fn validate(&self) -> Result<(), MatchActionValidationError> {
59        match self {
60            MatchAction::PartialRedact {
61                direction: _,
62                character_count,
63            } => {
64                if *character_count == 0 {
65                    Err(MatchActionValidationError::PartialRedactionNumCharsZero)
66                } else {
67                    Ok(())
68                }
69            }
70            MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
71                Ok(())
72            }
73            #[cfg(any(test, feature = "utf16_hash_match_action"))]
74            #[allow(deprecated)]
75            MatchAction::Utf16Hash => Ok(()),
76        }
77    }
78
79    /// If the match action will modify the content
80    pub fn is_mutating(&self) -> bool {
81        match self {
82            MatchAction::None => false,
83            MatchAction::Redact { .. } => true,
84            MatchAction::Hash => true,
85            #[cfg(any(test, feature = "utf16_hash_match_action"))]
86            #[allow(deprecated)]
87            MatchAction::Utf16Hash => true,
88            MatchAction::PartialRedact { .. } => true,
89        }
90    }
91
92    pub fn replacement_type(&self) -> ReplacementType {
93        match self {
94            MatchAction::None => ReplacementType::None,
95            MatchAction::Redact { .. } => ReplacementType::Placeholder,
96            MatchAction::Hash => ReplacementType::Hash,
97            #[cfg(any(test, feature = "utf16_hash_match_action"))]
98            #[allow(deprecated)]
99            MatchAction::Utf16Hash => ReplacementType::Hash,
100            MatchAction::PartialRedact { direction, .. } => match direction {
101                PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
102                PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
103            },
104        }
105    }
106
107    pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement<'_>> {
108        match self {
109            MatchAction::None => None,
110            MatchAction::Redact { replacement } => Some(Replacement {
111                start: 0,
112                end: matched_content.len(),
113                replacement: Cow::Borrowed(replacement),
114            }),
115            MatchAction::Hash => Some(Replacement {
116                start: 0,
117                end: matched_content.len(),
118                replacement: Cow::Owned(Self::hash(matched_content)),
119            }),
120            #[cfg(any(test, feature = "utf16_hash_match_action"))]
121            #[allow(deprecated)]
122            MatchAction::Utf16Hash => Some(Replacement {
123                start: 0,
124                end: matched_content.len(),
125                replacement: Cow::Owned(Self::utf16_hash(matched_content)),
126            }),
127            MatchAction::PartialRedact {
128                direction,
129                character_count: num_characters,
130            } => match direction {
131                PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
132                    num_characters,
133                    matched_content,
134                )),
135                PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
136                    num_characters,
137                    matched_content,
138                )),
139            },
140        }
141    }
142
143    fn hash(match_result: &str) -> String {
144        let hash = farmhash::fingerprint64(match_result.as_bytes());
145        format!("{hash:x}")
146    }
147
148    #[cfg(any(test, feature = "utf16_hash_match_action"))]
149    fn utf16_hash(match_result: &str) -> String {
150        let utf16_bytes = match_result
151            .encode_utf16()
152            .flat_map(u16::to_le_bytes)
153            .collect::<Vec<_>>();
154        let hash = farmhash::fingerprint64(&utf16_bytes);
155        format!("{hash:x}")
156    }
157
158    fn partial_redaction_first(
159        num_characters: &usize,
160        matched_content: &str,
161    ) -> Replacement<'static> {
162        let match_len = matched_content.chars().count();
163
164        let last_replacement_byte = if match_len > *num_characters {
165            matched_content
166                .char_indices()
167                .nth(*num_characters)
168                .unwrap()
169                .0
170        } else {
171            matched_content.len()
172        };
173
174        let replacement_length = min(*num_characters, match_len);
175
176        Replacement {
177            start: 0,
178            end: last_replacement_byte,
179            replacement: String::from(PARTIAL_REDACT_CHARACTER)
180                .repeat(replacement_length)
181                .into(),
182        }
183    }
184
185    fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
186        let match_len = match_result.chars().count();
187
188        let start_replacement_byte = if match_len > *num_characters {
189            match_result
190                .char_indices()
191                .nth_back(*num_characters - 1)
192                .unwrap()
193                .0
194        } else {
195            0
196        };
197
198        let replacement_length = min(*num_characters, match_len);
199
200        Replacement {
201            start: start_replacement_byte,
202            end: match_result.len(),
203            replacement: String::from(PARTIAL_REDACT_CHARACTER)
204                .repeat(replacement_length)
205                .into(),
206        }
207    }
208}
209
210#[derive(PartialEq, Debug)]
211pub struct Replacement<'a> {
212    pub start: usize,
213    pub end: usize,
214    pub replacement: Cow<'a, str>,
215}
216
217#[cfg(test)]
218mod test {
219    use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
220    use crate::match_action::{MatchAction, Replacement};
221
222    #[test]
223    fn match_with_no_action() {
224        let match_action = MatchAction::None;
225
226        assert_eq!(match_action.get_replacement("rene coty"), None);
227        assert_eq!(match_action.get_replacement("rene"), None);
228    }
229
230    #[test]
231    fn match_with_redaction() {
232        let match_action = MatchAction::Redact {
233            replacement: "[REPLACEMENT]".to_string(),
234        };
235
236        assert_eq!(
237            match_action.get_replacement("rene coty"),
238            Some(Replacement {
239                start: 0,
240                end: 9,
241                replacement: "[REPLACEMENT]".into()
242            })
243        );
244
245        assert_eq!(
246            match_action.get_replacement("coty"),
247            Some(Replacement {
248                start: 0,
249                end: 4,
250                replacement: "[REPLACEMENT]".into()
251            })
252        );
253    }
254
255    #[test]
256    fn match_with_hash() {
257        let match_action = MatchAction::Hash;
258
259        assert_eq!(
260            match_action.get_replacement("coty"),
261            Some(Replacement {
262                start: 0,
263                end: 4,
264                replacement: "fdf7528ad7f83901".into()
265            })
266        );
267
268        assert_eq!(
269            match_action.get_replacement("rene"),
270            Some(Replacement {
271                start: 0,
272                end: 4,
273                replacement: "51a2842f626aaaec".into()
274            })
275        );
276
277        assert_eq!(
278            match_action.get_replacement("😊"),
279            Some(Replacement {
280                start: 0,
281                end: 4,
282                replacement: "6ce17744696c2107".into()
283            })
284        );
285    }
286
287    #[test]
288    #[cfg(feature = "utf16_hash_match_action")]
289    fn match_with_utf16_hash() {
290        #[allow(deprecated)]
291        let match_action = MatchAction::Utf16Hash;
292
293        assert_eq!(
294            match_action.get_replacement("coty"),
295            Some(Replacement {
296                start: 0,
297                end: 4,
298                replacement: "d6bf038129a9eb52".into()
299            })
300        );
301
302        assert_eq!(
303            match_action.get_replacement("rene"),
304            Some(Replacement {
305                start: 0,
306                end: 4,
307                replacement: "8627c79c79ff4b8b".into()
308            })
309        );
310
311        assert_eq!(
312            match_action.get_replacement("😊"),
313            Some(Replacement {
314                start: 0,
315                end: 4,
316                replacement: "268a21f211fdbc0a".into()
317            })
318        );
319    }
320
321    #[test]
322    fn match_with_partial_redaction_first_characters_should_always_redact_num_characters_max() {
323        let match_action = MatchAction::PartialRedact {
324            character_count: 5,
325            direction: FirstCharacters,
326        };
327
328        assert_eq!(
329            match_action.get_replacement("ene coty"),
330            Some(Replacement {
331                start: 0,
332                end: 5,
333                replacement: "*****".into()
334            })
335        );
336
337        assert_eq!(
338            match_action.get_replacement("rene"),
339            Some(Replacement {
340                start: 0,
341                end: 4,
342                replacement: "****".into()
343            })
344        );
345
346        assert_eq!(
347            match_action.get_replacement("rene "),
348            Some(Replacement {
349                start: 0,
350                end: 5,
351                replacement: "*****".into()
352            })
353        );
354    }
355
356    #[test]
357    fn match_with_partial_redaction_last_characters_should_always_redact_num_characters_max() {
358        let match_action = MatchAction::PartialRedact {
359            character_count: 5,
360            direction: LastCharacters,
361        };
362
363        assert_eq!(
364            match_action.get_replacement("rene cot"),
365            Some(Replacement {
366                start: 3,
367                end: 8,
368                replacement: "*****".into()
369            })
370        );
371
372        assert_eq!(
373            match_action.get_replacement("rene"),
374            Some(Replacement {
375                start: 0,
376                end: 4,
377                replacement: "****".into()
378            })
379        );
380
381        assert_eq!(
382            match_action.get_replacement("rene "),
383            Some(Replacement {
384                start: 0,
385                end: 5,
386                replacement: "*****".into()
387            })
388        );
389    }
390
391    #[test]
392    fn match_with_partial_redaction_should_redact_match_length_maximum() {
393        let match_action = MatchAction::PartialRedact {
394            character_count: 350,
395            direction: FirstCharacters,
396        };
397
398        assert_eq!(
399            match_action.get_replacement("rene coty"),
400            Some(Replacement {
401                start: 0,
402                end: 9,
403                replacement: "*********".into()
404            })
405        );
406
407        assert_eq!(
408            match_action.get_replacement("👍 rene coty"),
409            Some(Replacement {
410                start: 0,
411                end: 14,
412                replacement: "***********".into()
413            })
414        )
415    }
416
417    #[test]
418    fn partially_redacts_first_emoji() {
419        let match_action = MatchAction::PartialRedact {
420            character_count: 1,
421            direction: FirstCharacters,
422        };
423
424        assert_eq!(
425            match_action.get_replacement("😊🤞"),
426            Some(Replacement {
427                start: 0,
428                end: 4,
429                replacement: "*".into()
430            })
431        );
432    }
433
434    #[test]
435    fn partially_redacts_last_emoji() {
436        let match_action = MatchAction::PartialRedact {
437            character_count: 2,
438            direction: LastCharacters,
439        };
440
441        assert_eq!(
442            match_action.get_replacement("😊🤞👋"),
443            Some(Replacement {
444                start: 4,
445                end: 12,
446                replacement: "**".into()
447            })
448        );
449    }
450
451    #[test]
452    fn test_farmhash_bugfix() {
453        // Testing the bugfix from https://github.com/seiflotfy/rust-farmhash/pull/16
454        assert_eq!(
455            MatchAction::Hash.get_replacement(&"x".repeat(128)),
456            Some(Replacement {
457                start: 0,
458                end: 128,
459                replacement: "5170af09fd870c17".into()
460            })
461        );
462    }
463}