Line data Source code
1 : #include "w3c_propagation.h"
2 :
3 : #include <algorithm>
4 : #include <cassert>
5 : #include <cstddef>
6 : #include <regex>
7 : #include <utility>
8 :
9 : #include "dict_reader.h"
10 : #include "hex.h"
11 : #include "parse_util.h"
12 : #include "propagation_style.h"
13 : #include "tags.h"
14 :
15 : namespace datadog {
16 : namespace tracing {
17 : namespace {
18 :
19 : // Return a predicate that returns whether its `char` argument is any of the
20 : // following:
21 : //
22 : // - outside of the ASCII inclusive range `[lowest_ascii, highest_ascii]`
23 : // - equal to one of the `disallowed_characters`.
24 : //
25 : // `verboten` is used with `std::replace_if` to sanitize field values within the
26 : // tracestate header.
27 20050 : auto verboten(int lowest_ascii, int highest_ascii,
28 : StringView disallowed_characters) {
29 84075 : return [=, chars = disallowed_characters](char ch) {
30 168129 : return int(ch) < lowest_ascii || int(ch) > highest_ascii ||
31 168129 : std::find(chars.begin(), chars.end(), ch) != chars.end();
32 20050 : };
33 : }
34 :
35 : // Populate the specified `result` with data extracted from the "traceparent"
36 : // entry of the specified `headers`. Return `nullopt` on success. Return a value
37 : // for the `tags::internal::w3c_extraction_error` tag if an error occurs.
38 80 : Optional<std::string> extract_traceparent(ExtractedData& result,
39 : const DictReader& headers) {
40 80 : const auto maybe_traceparent = headers.lookup("traceparent");
41 80 : if (!maybe_traceparent) {
42 31 : return nullopt;
43 : }
44 :
45 49 : const auto traceparent = strip(*maybe_traceparent);
46 :
47 : // Note that leading and trailing whitespace was already removed above.
48 : // Note that match group 0 is the entire match.
49 : static const auto& pattern =
50 : "([0-9a-f]{2})" // hex version number (match group 1)
51 : "-"
52 : "([0-9a-f]{32})" // hex trace ID (match group 2)
53 : "-"
54 : "([0-9a-f]{16})" // hex parent span ID (match group 3)
55 : "-"
56 : "([0-9a-f]{2})" // hex "trace-flags" (match group 4)
57 : "($|-.*)"; // either the end, or a hyphen preceding further fields (match
58 : // group 5)
59 49 : static const std::regex regex{pattern};
60 :
61 49 : std::match_results<StringView::iterator> match;
62 49 : if (!std::regex_match(traceparent.begin(), traceparent.end(), match, regex)) {
63 3 : return "malformed_traceparent";
64 : }
65 :
66 46 : assert(match.ready());
67 46 : assert(match.size() == 5 + 1);
68 :
69 218 : const auto to_string_view = [](const auto& submatch) {
70 218 : assert(submatch.first <= submatch.second);
71 218 : return StringView{submatch.first,
72 218 : std::size_t(submatch.second - submatch.first)};
73 : };
74 :
75 46 : const auto version = to_string_view(match[1]);
76 46 : if (version == "ff") {
77 1 : return "invalid_version";
78 : }
79 :
80 45 : if (version == "00" && !to_string_view(match[5]).empty()) {
81 1 : return "malformed_traceparent";
82 : }
83 :
84 44 : result.trace_id = *TraceID::parse_hex(to_string_view(match[2]));
85 44 : if (result.trace_id == 0) {
86 1 : return "trace_id_zero";
87 : }
88 :
89 43 : result.parent_id = *parse_uint64(to_string_view(match[3]), 16);
90 43 : if (*result.parent_id == 0) {
91 1 : return "parent_id_zero";
92 : }
93 :
94 42 : const auto flags = *parse_uint64(to_string_view(match[4]), 16);
95 42 : result.sampling_priority = int(flags & 1);
96 :
97 42 : return nullopt;
98 49 : }
99 :
100 : // `struct PartiallyParsedTracestat` contains the separated Datadog-specific and
101 : // non-Datadog-specific portions of tracestate.
102 : struct PartiallyParsedTracestate {
103 : StringView datadog_value;
104 : std::string other_entries;
105 : };
106 :
107 : // Return the separate Datadog-specific and non-Datadog-specific portions of the
108 : // specified `tracestate`. If `tracestate` does not have a Datadog-specific
109 : // portion, return `nullopt`.
110 35 : Optional<PartiallyParsedTracestate> parse_tracestate(StringView tracestate) {
111 35 : Optional<PartiallyParsedTracestate> result;
112 :
113 35 : const char* const begin = tracestate.begin();
114 35 : const char* const end = tracestate.end();
115 35 : const char* pair_begin = begin;
116 52 : while (pair_begin != end) {
117 47 : const char* const pair_end = std::find(pair_begin, end, ',');
118 : // Note that since this `pair` is `strip`ped, `pair_begin` is not
119 : // necessarily equal to `pair.begin()` (similarly for the ends).
120 47 : const auto pair = strip(range(pair_begin, pair_end));
121 47 : if (pair.empty()) {
122 1 : pair_begin = pair_end == end ? end : pair_end + 1;
123 17 : continue;
124 : }
125 :
126 46 : const auto kv_separator = std::find(pair.begin(), pair.end(), '=');
127 46 : if (kv_separator == pair.end()) {
128 : // This is an invalid entry because it contains a non-whitespace character
129 : // but not a "=".
130 : // Let's move on to the next entry.
131 1 : pair_begin = pair_end == end ? end : pair_end + 1;
132 1 : continue;
133 : }
134 :
135 45 : const auto key = range(pair.begin(), kv_separator);
136 45 : if (key != "dd") {
137 : // On to the next.
138 15 : pair_begin = pair_end == end ? end : pair_end + 1;
139 15 : continue;
140 : }
141 :
142 : // We found the "dd" entry.
143 30 : result.emplace();
144 30 : result->datadog_value = range(kv_separator + 1, pair.end());
145 : // `result->other_entries` is whatever was before the "dd" entry and
146 : // whatever is after the "dd" entry, but without an extra comma in the
147 : // middle.
148 30 : if (pair_begin != begin) {
149 : // There's a prefix
150 5 : append(result->other_entries, range(begin, pair_begin - 1));
151 5 : if (pair_end != end) {
152 : // and a suffix
153 1 : append(result->other_entries, range(pair_end, end));
154 : }
155 25 : } else if (pair_end != end) {
156 : // There's just a suffix
157 2 : append(result->other_entries, range(pair_end + 1, end));
158 : }
159 :
160 30 : break;
161 : }
162 :
163 35 : return result;
164 0 : }
165 :
166 : // Fill the specified `result` with information parsed from the specified
167 : // `datadog_value`. `datadog_value` is the value of the "dd" entry in the
168 : // "tracestate" header.
169 : //
170 : // `parse_datadog_tracestate` populates the following `ExtractedData` fields:
171 : //
172 : // - `origin`
173 : // - `trace_tags`
174 : // - `sampling_priority`
175 : // - `additional_datadog_w3c_tracestate`
176 30 : void parse_datadog_tracestate(ExtractedData& result, StringView datadog_value) {
177 30 : const char* const begin = datadog_value.begin();
178 30 : const char* const end = datadog_value.end();
179 30 : const char* pair_begin = begin;
180 85 : while (pair_begin != end) {
181 55 : const char* const pair_end = std::find(pair_begin, end, ';');
182 55 : const auto pair = range(pair_begin, pair_end);
183 55 : if (pair.empty()) {
184 : // chaff!
185 6 : pair_begin = pair_end == end ? end : pair_end + 1;
186 11 : continue;
187 : }
188 :
189 49 : const auto kv_separator = std::find(pair_begin, pair_end, ':');
190 49 : if (kv_separator == pair_end) {
191 : // chaff!
192 3 : pair_begin = pair_end == end ? end : pair_end + 1;
193 3 : continue;
194 : }
195 :
196 46 : const auto key = range(pair_begin, kv_separator);
197 46 : const auto value = range(kv_separator + 1, pair_end);
198 46 : if (key == "o") {
199 6 : result.origin = std::string{value};
200 : // Equal signs are allowed in the value of "origin," but equal signs are
201 : // also special characters in the `tracestate` encoding. So, equal signs
202 : // that would appear in the "origin" value are converted to tildes during
203 : // encoding. Here, in decoding, we undo the conversion.
204 6 : std::replace(result.origin->begin(), result.origin->end(), '~', '=');
205 40 : } else if (key == "s") {
206 13 : const auto maybe_priority = parse_int(value, 10);
207 13 : if (!maybe_priority) {
208 : // chaff!
209 2 : pair_begin = pair_end == end ? end : pair_end + 1;
210 2 : continue;
211 : }
212 11 : const int priority = *maybe_priority;
213 : // If we didn't parse a sampling priority from traceparent, or if the one
214 : // we just parsed from tracestate is consistent with the previous, then
215 : // set the sampling priority to the one we just parsed.
216 : // Alternatively, if we already parsed a sampling priority from
217 : // traceparent and got a result inconsistent with that parsed here, go
218 : // with the one previously parsed from traceparent.
219 22 : if (!result.sampling_priority ||
220 11 : (*result.sampling_priority > 0) == (priority > 0)) {
221 7 : result.sampling_priority = priority;
222 : }
223 40 : } else if (starts_with(key, "t.")) {
224 : // The part of the key that follows "t." is the name of a trace tag,
225 : // except without the "_dd.p." prefix.
226 10 : const auto tag_suffix = key.substr(2);
227 10 : std::string tag_name = "_dd.p.";
228 10 : append(tag_name, tag_suffix);
229 : // The tag value was encoded with all '=' replaced by '~'. Undo that
230 : // transformation.
231 10 : std::string decoded_value{value};
232 10 : std::replace(decoded_value.begin(), decoded_value.end(), '~', '=');
233 10 : result.trace_tags.emplace_back(std::move(tag_name),
234 10 : std::move(decoded_value));
235 10 : } else {
236 : // Unrecognized key: append the whole pair to
237 : // `additional_datadog_w3c_tracestate`, which will be used if/when we
238 : // inject trace context.
239 17 : auto& entries = result.additional_datadog_w3c_tracestate;
240 17 : if (!entries) {
241 10 : entries.emplace();
242 : } else {
243 7 : *entries += ';';
244 : }
245 17 : append(*entries, pair);
246 : }
247 :
248 44 : pair_begin = pair_end == end ? end : pair_end + 1;
249 : }
250 30 : }
251 :
252 : // Fill the specified `result` with information parsed from the "tracestate"
253 : // element of the specified `headers`, if present.
254 : //
255 : // `extract_tracestate` populates the `additional_w3c_tracestate` field of
256 : // `ExtractedData`, in addition to those populated by
257 : // `parse_datadog_tracestate`.
258 42 : void extract_tracestate(ExtractedData& result, const DictReader& headers) {
259 42 : const auto maybe_tracestate = headers.lookup("tracestate");
260 42 : if (!maybe_tracestate) {
261 12 : return;
262 : }
263 :
264 35 : const auto tracestate = strip(*maybe_tracestate);
265 35 : auto maybe_parsed = parse_tracestate(tracestate);
266 35 : if (!maybe_parsed) {
267 : // No "dd" entry in `tracestate`, so there's nothing to extract.
268 5 : if (!tracestate.empty()) {
269 4 : result.additional_w3c_tracestate = std::string{tracestate};
270 : }
271 5 : return;
272 : }
273 :
274 30 : auto& [datadog_value, other_entries] = *maybe_parsed;
275 30 : if (!other_entries.empty()) {
276 7 : result.additional_w3c_tracestate = std::move(other_entries);
277 : }
278 :
279 30 : parse_datadog_tracestate(result, datadog_value);
280 35 : }
281 :
282 : } // namespace
283 :
284 80 : Expected<ExtractedData> extract_w3c(
285 : const DictReader& headers,
286 : std::unordered_map<std::string, std::string>& span_tags, Logger&) {
287 80 : ExtractedData result;
288 80 : result.style = PropagationStyle::W3C;
289 :
290 80 : if (auto error_tag_value = extract_traceparent(result, headers)) {
291 7 : span_tags[tags::internal::w3c_extraction_error] =
292 14 : std::move(*error_tag_value);
293 7 : return ExtractedData{};
294 80 : }
295 :
296 : // If we didn't get a trace ID from traceparent, don't bother with
297 : // tracestate.
298 73 : if (!result.trace_id) {
299 31 : return result;
300 : }
301 :
302 42 : extract_tracestate(result, headers);
303 :
304 42 : return result;
305 80 : }
306 :
307 28 : std::string encode_traceparent(TraceID trace_id, std::uint64_t span_id,
308 : int sampling_priority) {
309 28 : std::string result;
310 : // version
311 28 : result += "00-";
312 :
313 : // trace ID
314 28 : result += trace_id.hex_padded();
315 28 : result += '-';
316 :
317 : // span ID
318 28 : result += hex_padded(span_id);
319 28 : result += '-';
320 :
321 : // flags
322 28 : result += sampling_priority > 0 ? "01" : "00";
323 :
324 28 : return result;
325 0 : }
326 :
327 28 : std::string encode_datadog_tracestate(
328 : int sampling_priority, const Optional<std::string>& origin,
329 : const std::vector<std::pair<std::string, std::string>>& trace_tags,
330 : const Optional<std::string>& additional_datadog_w3c_tracestate) {
331 28 : std::string result = "dd=s:";
332 28 : result += std::to_string(sampling_priority);
333 :
334 28 : if (origin) {
335 8 : result += ";o:";
336 8 : result += *origin;
337 8 : std::replace_if(result.end() - origin->size(), result.end(),
338 8 : verboten(0x20, 0x7e, ",;~"), '_');
339 8 : std::replace(result.end() - origin->size(), result.end(), '=', '~');
340 : }
341 :
342 10054 : for (const auto& [key, value] : trace_tags) {
343 10026 : const StringView prefix = "_dd.p.";
344 10026 : if (!starts_with(key, prefix) || key == tags::internal::trace_id_high) {
345 : // Either it's not a propagation tag, or it's one of the propagation tags
346 : // that need not be included in tracestate.
347 5 : continue;
348 : }
349 :
350 : // `key` is "_dd.p.<name>", but we want "t.<name>".
351 10021 : result += ";t.";
352 10021 : result.append(key, prefix.size());
353 10021 : std::replace_if(result.end() - (key.size() - prefix.size()), result.end(),
354 10021 : verboten(0x20, 0x7e, " ,;="), '_');
355 :
356 10021 : result += ':';
357 10021 : result += value;
358 10021 : std::replace_if(result.end() - value.size(), result.end(),
359 10021 : verboten(0x20, 0x7e, ",;~"), '_');
360 : // `value` might contain equal signs ("="), which is reserved in tracestate.
361 : // Replace them with tildes ("~").
362 10021 : std::replace(result.end() - value.size(), result.end(), '=', '~');
363 : }
364 :
365 28 : if (additional_datadog_w3c_tracestate) {
366 6 : result += ';';
367 6 : result += *additional_datadog_w3c_tracestate;
368 : }
369 :
370 28 : const std::size_t max_size = 256;
371 10003 : while (result.size() > max_size) {
372 9975 : const auto last_semicolon_index = result.rfind(';');
373 : // This assumption is safe, because `result` always begins with
374 : // "dd=s:<int>", and that's fewer than `max_size` characters for any
375 : // `<int>`.
376 9975 : assert(last_semicolon_index != std::string::npos);
377 9975 : result.resize(last_semicolon_index);
378 : }
379 :
380 28 : return result;
381 0 : }
382 :
383 28 : std::string encode_tracestate(
384 : int sampling_priority, const Optional<std::string>& origin,
385 : const std::vector<std::pair<std::string, std::string>>& trace_tags,
386 : const Optional<std::string>& additional_datadog_w3c_tracestate,
387 : const Optional<std::string>& additional_w3c_tracestate) {
388 : std::string result = encode_datadog_tracestate(
389 28 : sampling_priority, origin, trace_tags, additional_datadog_w3c_tracestate);
390 :
391 28 : if (additional_w3c_tracestate) {
392 4 : result += ',';
393 4 : result += *additional_w3c_tracestate;
394 : }
395 :
396 28 : return result;
397 0 : }
398 :
399 : } // namespace tracing
400 : } // namespace datadog
|