1#![allow(deprecated)]
2use std::borrow::Cow;
6
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10use crate::rule_match::ReplacementType;
11
12#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq)]
13#[serde(tag = "type")]
14pub enum MatchAction {
15 #[default]
17 None,
18 Redact { replacement: String },
20 Hash,
22 #[deprecated(
24 note = "Support hash from UTF-16 encoded bytes for backward compatibility. Users should use instead hash match action."
25 )]
26 #[cfg(any(test, feature = "utf16_hash_match_action"))]
27 Utf16Hash,
28 PartialRedact {
30 direction: PartialRedactDirection,
31 character_count: usize,
32 },
33}
34
35impl MatchAction {
36 pub fn redact(replacement: &str) -> Self {
37 Self::Redact {
38 replacement: replacement.to_string(),
39 }
40 }
41}
42
43#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
44pub enum PartialRedactDirection {
45 FirstCharacters,
46 LastCharacters,
47}
48
49const PARTIAL_REDACT_CHARACTER: char = '*';
50
51#[derive(Debug, PartialEq, Eq, Error)]
52pub enum MatchActionValidationError {
53 #[error("Partial redaction chars must be non-zero")]
54 PartialRedactionNumCharsZero,
55}
56
57impl MatchAction {
58 pub fn validate(&self) -> Result<(), MatchActionValidationError> {
59 match self {
60 MatchAction::PartialRedact {
61 direction: _,
62 character_count,
63 } => {
64 if *character_count == 0 {
65 Err(MatchActionValidationError::PartialRedactionNumCharsZero)
66 } else {
67 Ok(())
68 }
69 }
70 MatchAction::None | MatchAction::Redact { replacement: _ } | MatchAction::Hash => {
71 Ok(())
72 }
73 #[cfg(any(test, feature = "utf16_hash_match_action"))]
74 #[allow(deprecated)]
75 MatchAction::Utf16Hash => Ok(()),
76 }
77 }
78
79 pub fn is_mutating(&self) -> bool {
81 match self {
82 MatchAction::None => false,
83 MatchAction::Redact { .. } => true,
84 MatchAction::Hash => true,
85 #[cfg(any(test, feature = "utf16_hash_match_action"))]
86 #[allow(deprecated)]
87 MatchAction::Utf16Hash => true,
88 MatchAction::PartialRedact { .. } => true,
89 }
90 }
91
92 pub fn replacement_type(&self) -> ReplacementType {
93 match self {
94 MatchAction::None => ReplacementType::None,
95 MatchAction::Redact { .. } => ReplacementType::Placeholder,
96 MatchAction::Hash => ReplacementType::Hash,
97 #[cfg(any(test, feature = "utf16_hash_match_action"))]
98 #[allow(deprecated)]
99 MatchAction::Utf16Hash => ReplacementType::Hash,
100 MatchAction::PartialRedact { direction, .. } => match direction {
101 PartialRedactDirection::FirstCharacters => ReplacementType::PartialStart,
102 PartialRedactDirection::LastCharacters => ReplacementType::PartialEnd,
103 },
104 }
105 }
106
107 pub fn get_replacement(&self, matched_content: &str) -> Option<Replacement<'_>> {
108 match self {
109 MatchAction::None => None,
110 MatchAction::Redact { replacement } => Some(Replacement {
111 start: 0,
112 end: matched_content.len(),
113 replacement: Cow::Borrowed(replacement),
114 }),
115 MatchAction::Hash => Some(Replacement {
116 start: 0,
117 end: matched_content.len(),
118 replacement: Cow::Owned(Self::hash(matched_content)),
119 }),
120 #[cfg(any(test, feature = "utf16_hash_match_action"))]
121 #[allow(deprecated)]
122 MatchAction::Utf16Hash => Some(Replacement {
123 start: 0,
124 end: matched_content.len(),
125 replacement: Cow::Owned(Self::utf16_hash(matched_content)),
126 }),
127 MatchAction::PartialRedact {
128 direction,
129 character_count: num_characters,
130 } => match direction {
131 PartialRedactDirection::FirstCharacters => Some(Self::partial_redaction_first(
132 num_characters,
133 matched_content,
134 )),
135 PartialRedactDirection::LastCharacters => Some(Self::partial_redaction_last(
136 num_characters,
137 matched_content,
138 )),
139 },
140 }
141 }
142
143 fn hash(match_result: &str) -> String {
144 let hash = farmhash::fingerprint64(match_result.as_bytes());
145 format!("{hash:x}")
146 }
147
148 #[cfg(any(test, feature = "utf16_hash_match_action"))]
149 fn utf16_hash(match_result: &str) -> String {
150 let utf16_bytes = match_result
151 .encode_utf16()
152 .flat_map(u16::to_le_bytes)
153 .collect::<Vec<_>>();
154 let hash = farmhash::fingerprint64(&utf16_bytes);
155 format!("{hash:x}")
156 }
157
158 fn partial_redaction_first(
159 num_characters: &usize,
160 matched_content: &str,
161 ) -> Replacement<'static> {
162 let match_len = matched_content.chars().count();
163
164 let last_replacement_byte = if match_len > *num_characters {
165 matched_content
166 .char_indices()
167 .nth(*num_characters)
168 .unwrap()
169 .0
170 } else {
171 matched_content.len()
172 };
173
174 Replacement {
175 start: 0,
176 end: last_replacement_byte,
177 replacement: String::from(PARTIAL_REDACT_CHARACTER)
178 .repeat(*num_characters)
179 .into(),
180 }
181 }
182
183 fn partial_redaction_last(num_characters: &usize, match_result: &str) -> Replacement<'static> {
184 let match_len = match_result.chars().count();
185
186 let start_replacement_byte = if match_len > *num_characters {
187 match_result
188 .char_indices()
189 .nth_back(*num_characters - 1)
190 .unwrap()
191 .0
192 } else {
193 0
194 };
195
196 Replacement {
197 start: start_replacement_byte,
198 end: match_result.len(),
199 replacement: String::from(PARTIAL_REDACT_CHARACTER)
200 .repeat(*num_characters)
201 .into(),
202 }
203 }
204}
205
206#[derive(PartialEq, Debug)]
207pub struct Replacement<'a> {
208 pub start: usize,
209 pub end: usize,
210 pub replacement: Cow<'a, str>,
211}
212
213#[cfg(test)]
214mod test {
215 use crate::match_action::PartialRedactDirection::{FirstCharacters, LastCharacters};
216 use crate::match_action::{MatchAction, Replacement};
217
218 #[test]
219 fn match_with_no_action() {
220 let match_action = MatchAction::None;
221
222 assert_eq!(match_action.get_replacement("rene coty"), None);
223 assert_eq!(match_action.get_replacement("rene"), None);
224 }
225
226 #[test]
227 fn match_with_redaction() {
228 let match_action = MatchAction::Redact {
229 replacement: "[REPLACEMENT]".to_string(),
230 };
231
232 assert_eq!(
233 match_action.get_replacement("rene coty"),
234 Some(Replacement {
235 start: 0,
236 end: 9,
237 replacement: "[REPLACEMENT]".into()
238 })
239 );
240
241 assert_eq!(
242 match_action.get_replacement("coty"),
243 Some(Replacement {
244 start: 0,
245 end: 4,
246 replacement: "[REPLACEMENT]".into()
247 })
248 );
249 }
250
251 #[test]
252 fn match_with_hash() {
253 let match_action = MatchAction::Hash;
254
255 assert_eq!(
256 match_action.get_replacement("coty"),
257 Some(Replacement {
258 start: 0,
259 end: 4,
260 replacement: "fdf7528ad7f83901".into()
261 })
262 );
263
264 assert_eq!(
265 match_action.get_replacement("rene"),
266 Some(Replacement {
267 start: 0,
268 end: 4,
269 replacement: "51a2842f626aaaec".into()
270 })
271 );
272
273 assert_eq!(
274 match_action.get_replacement("😊"),
275 Some(Replacement {
276 start: 0,
277 end: 4,
278 replacement: "6ce17744696c2107".into()
279 })
280 );
281 }
282
283 #[test]
284 #[cfg(feature = "utf16_hash_match_action")]
285 fn match_with_utf16_hash() {
286 #[allow(deprecated)]
287 let match_action = MatchAction::Utf16Hash;
288
289 assert_eq!(
290 match_action.get_replacement("coty"),
291 Some(Replacement {
292 start: 0,
293 end: 4,
294 replacement: "d6bf038129a9eb52".into()
295 })
296 );
297
298 assert_eq!(
299 match_action.get_replacement("rene"),
300 Some(Replacement {
301 start: 0,
302 end: 4,
303 replacement: "8627c79c79ff4b8b".into()
304 })
305 );
306
307 assert_eq!(
308 match_action.get_replacement("😊"),
309 Some(Replacement {
310 start: 0,
311 end: 4,
312 replacement: "268a21f211fdbc0a".into()
313 })
314 );
315 }
316
317 #[test]
318 fn match_with_partial_redaction_first_characters_should_always_redact_num_characters() {
319 let match_action = MatchAction::PartialRedact {
320 character_count: 5,
321 direction: FirstCharacters,
322 };
323
324 assert_eq!(
325 match_action.get_replacement("ene coty"),
326 Some(Replacement {
327 start: 0,
328 end: 5,
329 replacement: "*****".into()
330 })
331 );
332
333 assert_eq!(
334 match_action.get_replacement("rene"),
335 Some(Replacement {
336 start: 0,
337 end: 4,
338 replacement: "*****".into()
339 })
340 );
341
342 assert_eq!(
343 match_action.get_replacement("rene "),
344 Some(Replacement {
345 start: 0,
346 end: 5,
347 replacement: "*****".into()
348 })
349 );
350 }
351
352 #[test]
353 fn match_with_partial_redaction_last_characters_should_always_redact_num_characters() {
354 let match_action = MatchAction::PartialRedact {
355 character_count: 5,
356 direction: LastCharacters,
357 };
358
359 assert_eq!(
360 match_action.get_replacement("rene cot"),
361 Some(Replacement {
362 start: 3,
363 end: 8,
364 replacement: "*****".into()
365 })
366 );
367
368 assert_eq!(
369 match_action.get_replacement("rene"),
370 Some(Replacement {
371 start: 0,
372 end: 4,
373 replacement: "*****".into()
374 })
375 );
376
377 assert_eq!(
378 match_action.get_replacement("rene "),
379 Some(Replacement {
380 start: 0,
381 end: 5,
382 replacement: "*****".into()
383 })
384 );
385 }
386
387 #[test]
388 fn partially_redacts_first_emoji() {
389 let match_action = MatchAction::PartialRedact {
390 character_count: 1,
391 direction: FirstCharacters,
392 };
393
394 assert_eq!(
395 match_action.get_replacement("😊🤞"),
396 Some(Replacement {
397 start: 0,
398 end: 4,
399 replacement: "*".into()
400 })
401 );
402 }
403
404 #[test]
405 fn partially_redacts_last_emoji() {
406 let match_action = MatchAction::PartialRedact {
407 character_count: 1,
408 direction: LastCharacters,
409 };
410
411 assert_eq!(
412 match_action.get_replacement("😊🤞"),
413 Some(Replacement {
414 start: 4,
415 end: 8,
416 replacement: "*".into()
417 })
418 );
419 }
420
421 #[test]
422 fn test_farmhash_bugfix() {
423 assert_eq!(
425 MatchAction::Hash.get_replacement(&"x".repeat(128)),
426 Some(Replacement {
427 start: 0,
428 end: 128,
429 replacement: "5170af09fd870c17".into()
430 })
431 );
432 }
433}