1#![allow(deprecated)]
2use std::borrow::Cow;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15 #[default]
17 None,
18 Redact { replacement: String },
20 Hash,
22 #[deprecated(
24 note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25 )]
26 #[cfg(any(test, feature = "utf16_hash_match_action"))]
27 Utf16Hash,
28 PartialRedact {
30 direction: PartialRedactDirection,
31 character_count: usize,
32 },
33}
34
35#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
36pub enum PartialRedactDirection {
37 FirstCharacters,
38 LastCharacters,
39}
40
41const PARTIAL_REDACT_CHARACTER: char = '*';
42
43#[derive(Debug, PartialEq, Eq, Error)]
44pub enum MatchActionValidationError {
45 #[error("Partial redaction chars must be non-zero")]
46 PartialRedactionNumCharsZero,
47}
48
49impl MatchAction {
50 pub fn validate(&self) -> Result<(), MatchActionValidationError> {
51 match self {
52 MatchAction::PartialRedact {
53 direction: _,
54 character_count,
55 } => {
56 if *character_count == 0 {
57 Err(MatchActionValidationError::PartialRedactionNumCharsZero)
58 } else {
59 Ok(())
60 }
61 }
62 MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
63 Ok(())
64 }
65 #[cfg(any(test, feature = "utf16_hash_match_action"))]
66 #[allow(deprecated)]
67 MatchAction::Utf16Hash => Ok(()),
68 }
69 }
70
71 pub fn is_mutating(&self) -> bool {
73 match self {
74 MatchAction::None => false,
75 MatchAction::Redact { .. } => true,
76 MatchAction::Hash => true,
77 #[cfg(any(test, feature = "utf16_hash_match_action"))]
78 #[allow(deprecated)]
79 MatchAction::Utf16Hash => true,
80 MatchAction::PartialRedact { .. } => true,
81 }
82 }
83
84 pub fn replacement_type(&self) -> ReplacementType {
85 match self {
86 MatchAction::None => ReplacementType::None,
87 MatchAction::Redact { .. } => ReplacementType::Placeholder,
88 MatchAction::Hash => ReplacementType::Hash,
89 #[cfg(any(test, feature = "utf16_hash_match_action"))]
90 #[allow(deprecated)]
91 MatchAction::Utf16Hash => ReplacementType::Hash,
92 MatchAction::PartialRedact { direction, .. } => match direction {
93 PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
94 PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
95 },
96 }
97 }
98
99 pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement> {
100 match self {
101 MatchAction::None => None,
102 MatchAction::Redact { replacement } => Some(Replacement {
103 start: 0,
104 end: matched_content.len(),
105 replacement: Cow::Borrowed(replacement),
106 }),
107 MatchAction::Hash => Some(Replacement {
108 start: 0,
109 end: matched_content.len(),
110 replacement: Cow::Owned(Self::hash(matched_content)),
111 }),
112 #[cfg(any(test, feature = "utf16_hash_match_action"))]
113 #[allow(deprecated)]
114 MatchAction::Utf16Hash => Some(Replacement {
115 start: 0,
116 end: matched_content.len(),
117 replacement: Cow::Owned(Self::utf16_hash(matched_content)),
118 }),
119 MatchAction::PartialRedact {
120 direction,
121 character_count: num_characters,
122 } => match direction {
123 PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
124 num_characters,
125 matched_content,
126 )),
127 PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
128 num_characters,
129 matched_content,
130 )),
131 },
132 }
133 }
134
135 fn hash(match_result: &str) -> String {
136 let hash = farmhash::fingerprint64(match_result.as_bytes());
137 format!("{hash:x}")
138 }
139
140 #[cfg(any(test, feature = "utf16_hash_match_action"))]
141 fn utf16_hash(match_result: &str) -> String {
142 let utf16_bytes = match_result
143 .encode_utf16()
144 .flat_map(u16::to_le_bytes)
145 .collect::<Vec<_>>();
146 let hash = farmhash::fingerprint64(&utf16_bytes);
147 format!("{hash:x}")
148 }
149
150 fn partial_redaction_first(
151 num_characters: &usize,
152 matched_content: &str,
153 ) -> Replacement<'static> {
154 let match_len = matched_content.chars().count();
155
156 let last_replacement_byte = if match_len > *num_characters {
157 matched_content
158 .char_indices()
159 .nth(*num_characters)
160 .unwrap()
161 .0
162 } else {
163 matched_content.len()
164 };
165
166 Replacement {
167 start: 0,
168 end: last_replacement_byte,
169 replacement: String::from(PARTIAL_REDACT_CHARACTER)
170 .repeat(*num_characters)
171 .into(),
172 }
173 }
174
175 fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
176 let match_len = match_result.chars().count();
177
178 let start_replacement_byte = if match_len > *num_characters {
179 match_result
180 .char_indices()
181 .nth_back(*num_characters - 1)
182 .unwrap()
183 .0
184 } else {
185 0
186 };
187
188 Replacement {
189 start: start_replacement_byte,
190 end: match_result.len(),
191 replacement: String::from(PARTIAL_REDACT_CHARACTER)
192 .repeat(*num_characters)
193 .into(),
194 }
195 }
196}
197
198#[derive(PartialEq, Debug)]
199pub struct Replacement<'a> {
200 pub start: usize,
201 pub end: usize,
202 pub replacement: Cow<'a, str>,
203}
204
205#[cfg(test)]
206mod test {
207 use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
208 use crate::match_action::{MatchAction, Replacement};
209
210 #[test]
211 fn match_with_no_action() {
212 let match_action = MatchAction::None;
213
214 assert_eq!(match_action.get_replacement("rene coty"), None);
215 assert_eq!(match_action.get_replacement("rene"), None);
216 }
217
218 #[test]
219 fn match_with_redaction() {
220 let match_action = MatchAction::Redact {
221 replacement: "[REPLACEMENT]".to_string(),
222 };
223
224 assert_eq!(
225 match_action.get_replacement("rene coty"),
226 Some(Replacement {
227 start: 0,
228 end: 9,
229 replacement: "[REPLACEMENT]".into()
230 })
231 );
232
233 assert_eq!(
234 match_action.get_replacement("coty"),
235 Some(Replacement {
236 start: 0,
237 end: 4,
238 replacement: "[REPLACEMENT]".into()
239 })
240 );
241 }
242
243 #[test]
244 fn match_with_hash() {
245 let match_action = MatchAction::Hash;
246
247 assert_eq!(
248 match_action.get_replacement("coty"),
249 Some(Replacement {
250 start: 0,
251 end: 4,
252 replacement: "fdf7528ad7f83901".into()
253 })
254 );
255
256 assert_eq!(
257 match_action.get_replacement("rene"),
258 Some(Replacement {
259 start: 0,
260 end: 4,
261 replacement: "51a2842f626aaaec".into()
262 })
263 );
264
265 assert_eq!(
266 match_action.get_replacement("😊"),
267 Some(Replacement {
268 start: 0,
269 end: 4,
270 replacement: "6ce17744696c2107".into()
271 })
272 );
273 }
274
275 #[test]
276 #[cfg(feature = "utf16_hash_match_action")]
277 fn match_with_utf16_hash() {
278 #[allow(deprecated)]
279 let match_action = MatchAction::Utf16Hash;
280
281 assert_eq!(
282 match_action.get_replacement("coty"),
283 Some(Replacement {
284 start: 0,
285 end: 4,
286 replacement: "d6bf038129a9eb52".into()
287 })
288 );
289
290 assert_eq!(
291 match_action.get_replacement("rene"),
292 Some(Replacement {
293 start: 0,
294 end: 4,
295 replacement: "8627c79c79ff4b8b".into()
296 })
297 );
298
299 assert_eq!(
300 match_action.get_replacement("😊"),
301 Some(Replacement {
302 start: 0,
303 end: 4,
304 replacement: "268a21f211fdbc0a".into()
305 })
306 );
307 }
308
309 #[test]
310 fn match_with_partial_redaction_first_characters_should_always_redact_num_characters() {
311 let match_action = MatchAction::PartialRedact {
312 character_count: 5,
313 direction: FirstCharacters,
314 };
315
316 assert_eq!(
317 match_action.get_replacement("ene coty"),
318 Some(Replacement {
319 start: 0,
320 end: 5,
321 replacement: "*****".into()
322 })
323 );
324
325 assert_eq!(
326 match_action.get_replacement("rene"),
327 Some(Replacement {
328 start: 0,
329 end: 4,
330 replacement: "*****".into()
331 })
332 );
333
334 assert_eq!(
335 match_action.get_replacement("rene "),
336 Some(Replacement {
337 start: 0,
338 end: 5,
339 replacement: "*****".into()
340 })
341 );
342 }
343
344 #[test]
345 fn match_with_partial_redaction_last_characters_should_always_redact_num_characters() {
346 let match_action = MatchAction::PartialRedact {
347 character_count: 5,
348 direction: LastCharacters,
349 };
350
351 assert_eq!(
352 match_action.get_replacement("rene cot"),
353 Some(Replacement {
354 start: 3,
355 end: 8,
356 replacement: "*****".into()
357 })
358 );
359
360 assert_eq!(
361 match_action.get_replacement("rene"),
362 Some(Replacement {
363 start: 0,
364 end: 4,
365 replacement: "*****".into()
366 })
367 );
368
369 assert_eq!(
370 match_action.get_replacement("rene "),
371 Some(Replacement {
372 start: 0,
373 end: 5,
374 replacement: "*****".into()
375 })
376 );
377 }
378
379 #[test]
380 fn partially_redacts_first_emoji() {
381 let match_action = MatchAction::PartialRedact {
382 character_count: 1,
383 direction: FirstCharacters,
384 };
385
386 assert_eq!(
387 match_action.get_replacement("😊🤞"),
388 Some(Replacement {
389 start: 0,
390 end: 4,
391 replacement: "*".into()
392 })
393 );
394 }
395
396 #[test]
397 fn partially_redacts_last_emoji() {
398 let match_action = MatchAction::PartialRedact {
399 character_count: 1,
400 direction: LastCharacters,
401 };
402
403 assert_eq!(
404 match_action.get_replacement("😊🤞"),
405 Some(Replacement {
406 start: 4,
407 end: 8,
408 replacement: "*".into()
409 })
410 );
411 }
412
413 #[test]
414 fn test_farmhash_bugfix() {
415 assert_eq!(
417 MatchAction::Hash.get_replacement(&"x".repeat(128)),
418 Some(Replacement {
419 start: 0,
420 end: 128,
421 replacement: "5170af09fd870c17".into()
422 })
423 );
424 }
425}