1#![allow(deprecated)]
2use std::{borrow::Cow, cmp::min};
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15 #[default]
17 None,
18 Redact { replacement: String },
20 Hash,
22 #[deprecated(
24 note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25 )]
26 #[cfg(any(test, feature = "utf16_hash_match_action"))]
27 Utf16Hash,
28 PartialRedact {
30 direction: PartialRedactDirection,
31 character_count: usize,
32 },
33}
34
35impl MatchAction {
36 pub fn redact(replacement: &str) -> Self {
37 Self::Redact {
38 replacement: replacement.to_string(),
39 }
40 }
41}
42
43#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
44pub enum PartialRedactDirection {
45 FirstCharacters,
46 LastCharacters,
47}
48
49const PARTIAL_REDACT_CHARACTER: char = '*';
50
51#[derive(Debug, PartialEq, Eq, Error)]
52pub enum MatchActionValidationError {
53 #[error("Partial redaction chars must be non-zero")]
54 PartialRedactionNumCharsZero,
55}
56
57impl MatchAction {
58 pub fn validate(&self) -> Result<(), MatchActionValidationError> {
59 match self {
60 MatchAction::PartialRedact {
61 direction: _,
62 character_count,
63 } => {
64 if *character_count == 0 {
65 Err(MatchActionValidationError::PartialRedactionNumCharsZero)
66 } else {
67 Ok(())
68 }
69 }
70 MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
71 Ok(())
72 }
73 #[cfg(any(test, feature = "utf16_hash_match_action"))]
74 #[allow(deprecated)]
75 MatchAction::Utf16Hash => Ok(()),
76 }
77 }
78
79 pub fn is_mutating(&self) -> bool {
81 match self {
82 MatchAction::None => false,
83 MatchAction::Redact { .. } => true,
84 MatchAction::Hash => true,
85 #[cfg(any(test, feature = "utf16_hash_match_action"))]
86 #[allow(deprecated)]
87 MatchAction::Utf16Hash => true,
88 MatchAction::PartialRedact { .. } => true,
89 }
90 }
91
92 pub fn replacement_type(&self) -> ReplacementType {
93 match self {
94 MatchAction::None => ReplacementType::None,
95 MatchAction::Redact { .. } => ReplacementType::Placeholder,
96 MatchAction::Hash => ReplacementType::Hash,
97 #[cfg(any(test, feature = "utf16_hash_match_action"))]
98 #[allow(deprecated)]
99 MatchAction::Utf16Hash => ReplacementType::Hash,
100 MatchAction::PartialRedact { direction, .. } => match direction {
101 PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
102 PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
103 },
104 }
105 }
106
107 pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement<'_>> {
108 match self {
109 MatchAction::None => None,
110 MatchAction::Redact { replacement } => Some(Replacement {
111 start: 0,
112 end: matched_content.len(),
113 replacement: Cow::Borrowed(replacement),
114 }),
115 MatchAction::Hash => Some(Replacement {
116 start: 0,
117 end: matched_content.len(),
118 replacement: Cow::Owned(Self::hash(matched_content)),
119 }),
120 #[cfg(any(test, feature = "utf16_hash_match_action"))]
121 #[allow(deprecated)]
122 MatchAction::Utf16Hash => Some(Replacement {
123 start: 0,
124 end: matched_content.len(),
125 replacement: Cow::Owned(Self::utf16_hash(matched_content)),
126 }),
127 MatchAction::PartialRedact {
128 direction,
129 character_count: num_characters,
130 } => match direction {
131 PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
132 num_characters,
133 matched_content,
134 )),
135 PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
136 num_characters,
137 matched_content,
138 )),
139 },
140 }
141 }
142
143 fn hash(match_result: &str) -> String {
144 let hash = farmhash::fingerprint64(match_result.as_bytes());
145 format!("{hash:x}")
146 }
147
148 #[cfg(any(test, feature = "utf16_hash_match_action"))]
149 fn utf16_hash(match_result: &str) -> String {
150 let utf16_bytes = match_result
151 .encode_utf16()
152 .flat_map(u16::to_le_bytes)
153 .collect::<Vec<_>>();
154 let hash = farmhash::fingerprint64(&utf16_bytes);
155 format!("{hash:x}")
156 }
157
158 fn partial_redaction_first(
159 num_characters: &usize,
160 matched_content: &str,
161 ) -> Replacement<'static> {
162 let match_len = matched_content.chars().count();
163
164 let last_replacement_byte = if match_len > *num_characters {
165 matched_content
166 .char_indices()
167 .nth(*num_characters)
168 .unwrap()
169 .0
170 } else {
171 matched_content.len()
172 };
173
174 let replacement_length = min(*num_characters, match_len);
175
176 Replacement {
177 start: 0,
178 end: last_replacement_byte,
179 replacement: String::from(PARTIAL_REDACT_CHARACTER)
180 .repeat(replacement_length)
181 .into(),
182 }
183 }
184
185 fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
186 let match_len = match_result.chars().count();
187
188 let start_replacement_byte = if match_len > *num_characters {
189 match_result
190 .char_indices()
191 .nth_back(*num_characters - 1)
192 .unwrap()
193 .0
194 } else {
195 0
196 };
197
198 let replacement_length = min(*num_characters, match_len);
199
200 Replacement {
201 start: start_replacement_byte,
202 end: match_result.len(),
203 replacement: String::from(PARTIAL_REDACT_CHARACTER)
204 .repeat(replacement_length)
205 .into(),
206 }
207 }
208}
209
210#[derive(PartialEq, Debug)]
211pub struct Replacement<'a> {
212 pub start: usize,
213 pub end: usize,
214 pub replacement: Cow<'a, str>,
215}
216
217#[cfg(test)]
218mod test {
219 use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
220 use crate::match_action::{MatchAction, Replacement};
221
222 #[test]
223 fn match_with_no_action() {
224 let match_action = MatchAction::None;
225
226 assert_eq!(match_action.get_replacement("rene coty"), None);
227 assert_eq!(match_action.get_replacement("rene"), None);
228 }
229
230 #[test]
231 fn match_with_redaction() {
232 let match_action = MatchAction::Redact {
233 replacement: "[REPLACEMENT]".to_string(),
234 };
235
236 assert_eq!(
237 match_action.get_replacement("rene coty"),
238 Some(Replacement {
239 start: 0,
240 end: 9,
241 replacement: "[REPLACEMENT]".into()
242 })
243 );
244
245 assert_eq!(
246 match_action.get_replacement("coty"),
247 Some(Replacement {
248 start: 0,
249 end: 4,
250 replacement: "[REPLACEMENT]".into()
251 })
252 );
253 }
254
255 #[test]
256 fn match_with_hash() {
257 let match_action = MatchAction::Hash;
258
259 assert_eq!(
260 match_action.get_replacement("coty"),
261 Some(Replacement {
262 start: 0,
263 end: 4,
264 replacement: "fdf7528ad7f83901".into()
265 })
266 );
267
268 assert_eq!(
269 match_action.get_replacement("rene"),
270 Some(Replacement {
271 start: 0,
272 end: 4,
273 replacement: "51a2842f626aaaec".into()
274 })
275 );
276
277 assert_eq!(
278 match_action.get_replacement("😊"),
279 Some(Replacement {
280 start: 0,
281 end: 4,
282 replacement: "6ce17744696c2107".into()
283 })
284 );
285 }
286
287 #[test]
288 #[cfg(feature = "utf16_hash_match_action")]
289 fn match_with_utf16_hash() {
290 #[allow(deprecated)]
291 let match_action = MatchAction::Utf16Hash;
292
293 assert_eq!(
294 match_action.get_replacement("coty"),
295 Some(Replacement {
296 start: 0,
297 end: 4,
298 replacement: "d6bf038129a9eb52".into()
299 })
300 );
301
302 assert_eq!(
303 match_action.get_replacement("rene"),
304 Some(Replacement {
305 start: 0,
306 end: 4,
307 replacement: "8627c79c79ff4b8b".into()
308 })
309 );
310
311 assert_eq!(
312 match_action.get_replacement("😊"),
313 Some(Replacement {
314 start: 0,
315 end: 4,
316 replacement: "268a21f211fdbc0a".into()
317 })
318 );
319 }
320
321 #[test]
322 fn match_with_partial_redaction_first_characters_should_always_redact_num_characters_max() {
323 let match_action = MatchAction::PartialRedact {
324 character_count: 5,
325 direction: FirstCharacters,
326 };
327
328 assert_eq!(
329 match_action.get_replacement("ene coty"),
330 Some(Replacement {
331 start: 0,
332 end: 5,
333 replacement: "*****".into()
334 })
335 );
336
337 assert_eq!(
338 match_action.get_replacement("rene"),
339 Some(Replacement {
340 start: 0,
341 end: 4,
342 replacement: "****".into()
343 })
344 );
345
346 assert_eq!(
347 match_action.get_replacement("rene "),
348 Some(Replacement {
349 start: 0,
350 end: 5,
351 replacement: "*****".into()
352 })
353 );
354 }
355
356 #[test]
357 fn match_with_partial_redaction_last_characters_should_always_redact_num_characters_max() {
358 let match_action = MatchAction::PartialRedact {
359 character_count: 5,
360 direction: LastCharacters,
361 };
362
363 assert_eq!(
364 match_action.get_replacement("rene cot"),
365 Some(Replacement {
366 start: 3,
367 end: 8,
368 replacement: "*****".into()
369 })
370 );
371
372 assert_eq!(
373 match_action.get_replacement("rene"),
374 Some(Replacement {
375 start: 0,
376 end: 4,
377 replacement: "****".into()
378 })
379 );
380
381 assert_eq!(
382 match_action.get_replacement("rene "),
383 Some(Replacement {
384 start: 0,
385 end: 5,
386 replacement: "*****".into()
387 })
388 );
389 }
390
391 #[test]
392 fn match_with_partial_redaction_should_redact_match_length_maximum() {
393 let match_action = MatchAction::PartialRedact {
394 character_count: 350,
395 direction: FirstCharacters,
396 };
397
398 assert_eq!(
399 match_action.get_replacement("rene coty"),
400 Some(Replacement {
401 start: 0,
402 end: 9,
403 replacement: "*********".into()
404 })
405 );
406
407 assert_eq!(
408 match_action.get_replacement("👍 rene coty"),
409 Some(Replacement {
410 start: 0,
411 end: 14,
412 replacement: "***********".into()
413 })
414 )
415 }
416
417 #[test]
418 fn partially_redacts_first_emoji() {
419 let match_action = MatchAction::PartialRedact {
420 character_count: 1,
421 direction: FirstCharacters,
422 };
423
424 assert_eq!(
425 match_action.get_replacement("😊🤞"),
426 Some(Replacement {
427 start: 0,
428 end: 4,
429 replacement: "*".into()
430 })
431 );
432 }
433
434 #[test]
435 fn partially_redacts_last_emoji() {
436 let match_action = MatchAction::PartialRedact {
437 character_count: 2,
438 direction: LastCharacters,
439 };
440
441 assert_eq!(
442 match_action.get_replacement("😊🤞👋"),
443 Some(Replacement {
444 start: 4,
445 end: 12,
446 replacement: "**".into()
447 })
448 );
449 }
450
451 #[test]
452 fn test_farmhash_bugfix() {
453 assert_eq!(
455 MatchAction::Hash.get_replacement(&"x".repeat(128)),
456 Some(Replacement {
457 start: 0,
458 end: 128,
459 replacement: "5170af09fd870c17".into()
460 })
461 );
462 }
463}