LCOV - code coverage report
Current view: top level - datadog - w3c_propagation.cpp (source / functions) Hit Total Coverage
Test: filtered.info Lines: 182 186 97.8 %
Date: 2024-01-03 20:30:12 Functions: 11 11 100.0 %

          Line data    Source code
       1             : #include "w3c_propagation.h"
       2             : 
       3             : #include <algorithm>
       4             : #include <cassert>
       5             : #include <cstddef>
       6             : #include <regex>
       7             : #include <utility>
       8             : 
       9             : #include "dict_reader.h"
      10             : #include "hex.h"
      11             : #include "parse_util.h"
      12             : #include "propagation_style.h"
      13             : #include "tags.h"
      14             : 
      15             : namespace datadog {
      16             : namespace tracing {
      17             : namespace {
      18             : 
      19             : // Return a predicate that returns whether its `char` argument is any of the
      20             : // following:
      21             : //
      22             : // - outside of the ASCII inclusive range `[lowest_ascii, highest_ascii]`
      23             : // - equal to one of the `disallowed_characters`.
      24             : //
      25             : // `verboten` is used with `std::replace_if` to sanitize field values within the
      26             : // tracestate header.
      27       20050 : auto verboten(int lowest_ascii, int highest_ascii,
      28             :               StringView disallowed_characters) {
      29       84075 :   return [=, chars = disallowed_characters](char ch) {
      30      168129 :     return int(ch) < lowest_ascii || int(ch) > highest_ascii ||
      31      168129 :            std::find(chars.begin(), chars.end(), ch) != chars.end();
      32       20050 :   };
      33             : }
      34             : 
      35             : // Populate the specified `result` with data extracted from the "traceparent"
      36             : // entry of the specified `headers`. Return `nullopt` on success. Return a value
      37             : // for the `tags::internal::w3c_extraction_error` tag if an error occurs.
      38          80 : Optional<std::string> extract_traceparent(ExtractedData& result,
      39             :                                           const DictReader& headers) {
      40          80 :   const auto maybe_traceparent = headers.lookup("traceparent");
      41          80 :   if (!maybe_traceparent) {
      42          31 :     return nullopt;
      43             :   }
      44             : 
      45          49 :   const auto traceparent = strip(*maybe_traceparent);
      46             : 
      47             :   // Note that leading and trailing whitespace was already removed above.
      48             :   // Note that match group 0 is the entire match.
      49             :   static const auto& pattern =
      50             :       "([0-9a-f]{2})"  // hex version number (match group 1)
      51             :       "-"
      52             :       "([0-9a-f]{32})"  // hex trace ID (match group 2)
      53             :       "-"
      54             :       "([0-9a-f]{16})"  // hex parent span ID (match group 3)
      55             :       "-"
      56             :       "([0-9a-f]{2})"  // hex "trace-flags" (match group 4)
      57             :       "($|-.*)";  // either the end, or a hyphen preceding further fields (match
      58             :                   // group 5)
      59          49 :   static const std::regex regex{pattern};
      60             : 
      61          49 :   std::match_results<StringView::iterator> match;
      62          49 :   if (!std::regex_match(traceparent.begin(), traceparent.end(), match, regex)) {
      63           3 :     return "malformed_traceparent";
      64             :   }
      65             : 
      66          46 :   assert(match.ready());
      67          46 :   assert(match.size() == 5 + 1);
      68             : 
      69         218 :   const auto to_string_view = [](const auto& submatch) {
      70         218 :     assert(submatch.first <= submatch.second);
      71         218 :     return StringView{submatch.first,
      72         218 :                       std::size_t(submatch.second - submatch.first)};
      73             :   };
      74             : 
      75          46 :   const auto version = to_string_view(match[1]);
      76          46 :   if (version == "ff") {
      77           1 :     return "invalid_version";
      78             :   }
      79             : 
      80          45 :   if (version == "00" && !to_string_view(match[5]).empty()) {
      81           1 :     return "malformed_traceparent";
      82             :   }
      83             : 
      84          44 :   result.trace_id = *TraceID::parse_hex(to_string_view(match[2]));
      85          44 :   if (result.trace_id == 0) {
      86           1 :     return "trace_id_zero";
      87             :   }
      88             : 
      89          43 :   result.parent_id = *parse_uint64(to_string_view(match[3]), 16);
      90          43 :   if (*result.parent_id == 0) {
      91           1 :     return "parent_id_zero";
      92             :   }
      93             : 
      94          42 :   const auto flags = *parse_uint64(to_string_view(match[4]), 16);
      95          42 :   result.sampling_priority = int(flags & 1);
      96             : 
      97          42 :   return nullopt;
      98          49 : }
      99             : 
     100             : // `struct PartiallyParsedTracestat` contains the separated Datadog-specific and
     101             : // non-Datadog-specific portions of tracestate.
     102             : struct PartiallyParsedTracestate {
     103             :   StringView datadog_value;
     104             :   std::string other_entries;
     105             : };
     106             : 
     107             : // Return the separate Datadog-specific and non-Datadog-specific portions of the
     108             : // specified `tracestate`. If `tracestate` does not have a Datadog-specific
     109             : // portion, return `nullopt`.
     110          35 : Optional<PartiallyParsedTracestate> parse_tracestate(StringView tracestate) {
     111          35 :   Optional<PartiallyParsedTracestate> result;
     112             : 
     113          35 :   const char* const begin = tracestate.begin();
     114          35 :   const char* const end = tracestate.end();
     115          35 :   const char* pair_begin = begin;
     116          52 :   while (pair_begin != end) {
     117          47 :     const char* const pair_end = std::find(pair_begin, end, ',');
     118             :     // Note that since this `pair` is `strip`ped, `pair_begin` is not
     119             :     // necessarily equal to `pair.begin()` (similarly for the ends).
     120          47 :     const auto pair = strip(range(pair_begin, pair_end));
     121          47 :     if (pair.empty()) {
     122           1 :       pair_begin = pair_end == end ? end : pair_end + 1;
     123          17 :       continue;
     124             :     }
     125             : 
     126          46 :     const auto kv_separator = std::find(pair.begin(), pair.end(), '=');
     127          46 :     if (kv_separator == pair.end()) {
     128             :       // This is an invalid entry because it contains a non-whitespace character
     129             :       // but not a "=".
     130             :       // Let's move on to the next entry.
     131           1 :       pair_begin = pair_end == end ? end : pair_end + 1;
     132           1 :       continue;
     133             :     }
     134             : 
     135          45 :     const auto key = range(pair.begin(), kv_separator);
     136          45 :     if (key != "dd") {
     137             :       // On to the next.
     138          15 :       pair_begin = pair_end == end ? end : pair_end + 1;
     139          15 :       continue;
     140             :     }
     141             : 
     142             :     // We found the "dd" entry.
     143          30 :     result.emplace();
     144          30 :     result->datadog_value = range(kv_separator + 1, pair.end());
     145             :     // `result->other_entries` is whatever was before the "dd" entry and
     146             :     // whatever is after the "dd" entry, but without an extra comma in the
     147             :     // middle.
     148          30 :     if (pair_begin != begin) {
     149             :       // There's a prefix
     150           5 :       append(result->other_entries, range(begin, pair_begin - 1));
     151           5 :       if (pair_end != end) {
     152             :         // and a suffix
     153           1 :         append(result->other_entries, range(pair_end, end));
     154             :       }
     155          25 :     } else if (pair_end != end) {
     156             :       // There's just a suffix
     157           2 :       append(result->other_entries, range(pair_end + 1, end));
     158             :     }
     159             : 
     160          30 :     break;
     161             :   }
     162             : 
     163          35 :   return result;
     164           0 : }
     165             : 
     166             : // Fill the specified `result` with information parsed from the specified
     167             : // `datadog_value`. `datadog_value` is the value of the "dd" entry in the
     168             : // "tracestate" header.
     169             : //
     170             : // `parse_datadog_tracestate` populates the following `ExtractedData` fields:
     171             : //
     172             : // - `origin`
     173             : // - `trace_tags`
     174             : // - `sampling_priority`
     175             : // - `additional_datadog_w3c_tracestate`
     176          30 : void parse_datadog_tracestate(ExtractedData& result, StringView datadog_value) {
     177          30 :   const char* const begin = datadog_value.begin();
     178          30 :   const char* const end = datadog_value.end();
     179          30 :   const char* pair_begin = begin;
     180          85 :   while (pair_begin != end) {
     181          55 :     const char* const pair_end = std::find(pair_begin, end, ';');
     182          55 :     const auto pair = range(pair_begin, pair_end);
     183          55 :     if (pair.empty()) {
     184             :       // chaff!
     185           6 :       pair_begin = pair_end == end ? end : pair_end + 1;
     186          11 :       continue;
     187             :     }
     188             : 
     189          49 :     const auto kv_separator = std::find(pair_begin, pair_end, ':');
     190          49 :     if (kv_separator == pair_end) {
     191             :       // chaff!
     192           3 :       pair_begin = pair_end == end ? end : pair_end + 1;
     193           3 :       continue;
     194             :     }
     195             : 
     196          46 :     const auto key = range(pair_begin, kv_separator);
     197          46 :     const auto value = range(kv_separator + 1, pair_end);
     198          46 :     if (key == "o") {
     199           6 :       result.origin = std::string{value};
     200             :       // Equal signs are allowed in the value of "origin," but equal signs are
     201             :       // also special characters in the `tracestate` encoding. So, equal signs
     202             :       // that would appear in the "origin" value are converted to tildes during
     203             :       // encoding. Here, in decoding, we undo the conversion.
     204           6 :       std::replace(result.origin->begin(), result.origin->end(), '~', '=');
     205          40 :     } else if (key == "s") {
     206          13 :       const auto maybe_priority = parse_int(value, 10);
     207          13 :       if (!maybe_priority) {
     208             :         // chaff!
     209           2 :         pair_begin = pair_end == end ? end : pair_end + 1;
     210           2 :         continue;
     211             :       }
     212          11 :       const int priority = *maybe_priority;
     213             :       // If we didn't parse a sampling priority from traceparent, or if the one
     214             :       // we just parsed from tracestate is consistent with the previous, then
     215             :       // set the sampling priority to the one we just parsed.
     216             :       // Alternatively, if we already parsed a sampling priority from
     217             :       // traceparent and got a result inconsistent with that parsed here, go
     218             :       // with the one previously parsed from traceparent.
     219          22 :       if (!result.sampling_priority ||
     220          11 :           (*result.sampling_priority > 0) == (priority > 0)) {
     221           7 :         result.sampling_priority = priority;
     222             :       }
     223          40 :     } else if (starts_with(key, "t.")) {
     224             :       // The part of the key that follows "t." is the name of a trace tag,
     225             :       // except without the "_dd.p." prefix.
     226          10 :       const auto tag_suffix = key.substr(2);
     227          10 :       std::string tag_name = "_dd.p.";
     228          10 :       append(tag_name, tag_suffix);
     229             :       // The tag value was encoded with all '=' replaced by '~'.  Undo that
     230             :       // transformation.
     231          10 :       std::string decoded_value{value};
     232          10 :       std::replace(decoded_value.begin(), decoded_value.end(), '~', '=');
     233          10 :       result.trace_tags.emplace_back(std::move(tag_name),
     234          10 :                                      std::move(decoded_value));
     235          10 :     } else {
     236             :       // Unrecognized key: append the whole pair to
     237             :       // `additional_datadog_w3c_tracestate`, which will be used if/when we
     238             :       // inject trace context.
     239          17 :       auto& entries = result.additional_datadog_w3c_tracestate;
     240          17 :       if (!entries) {
     241          10 :         entries.emplace();
     242             :       } else {
     243           7 :         *entries += ';';
     244             :       }
     245          17 :       append(*entries, pair);
     246             :     }
     247             : 
     248          44 :     pair_begin = pair_end == end ? end : pair_end + 1;
     249             :   }
     250          30 : }
     251             : 
     252             : // Fill the specified `result` with information parsed from the "tracestate"
     253             : // element of the specified `headers`, if present.
     254             : //
     255             : // `extract_tracestate` populates the `additional_w3c_tracestate` field of
     256             : // `ExtractedData`, in addition to those populated by
     257             : // `parse_datadog_tracestate`.
     258          42 : void extract_tracestate(ExtractedData& result, const DictReader& headers) {
     259          42 :   const auto maybe_tracestate = headers.lookup("tracestate");
     260          42 :   if (!maybe_tracestate) {
     261          12 :     return;
     262             :   }
     263             : 
     264          35 :   const auto tracestate = strip(*maybe_tracestate);
     265          35 :   auto maybe_parsed = parse_tracestate(tracestate);
     266          35 :   if (!maybe_parsed) {
     267             :     // No "dd" entry in `tracestate`, so there's nothing to extract.
     268           5 :     if (!tracestate.empty()) {
     269           4 :       result.additional_w3c_tracestate = std::string{tracestate};
     270             :     }
     271           5 :     return;
     272             :   }
     273             : 
     274          30 :   auto& [datadog_value, other_entries] = *maybe_parsed;
     275          30 :   if (!other_entries.empty()) {
     276           7 :     result.additional_w3c_tracestate = std::move(other_entries);
     277             :   }
     278             : 
     279          30 :   parse_datadog_tracestate(result, datadog_value);
     280          35 : }
     281             : 
     282             : }  // namespace
     283             : 
     284          80 : Expected<ExtractedData> extract_w3c(
     285             :     const DictReader& headers,
     286             :     std::unordered_map<std::string, std::string>& span_tags, Logger&) {
     287          80 :   ExtractedData result;
     288          80 :   result.style = PropagationStyle::W3C;
     289             : 
     290          80 :   if (auto error_tag_value = extract_traceparent(result, headers)) {
     291           7 :     span_tags[tags::internal::w3c_extraction_error] =
     292          14 :         std::move(*error_tag_value);
     293           7 :     return ExtractedData{};
     294          80 :   }
     295             : 
     296             :   // If we didn't get a trace ID from traceparent, don't bother with
     297             :   // tracestate.
     298          73 :   if (!result.trace_id) {
     299          31 :     return result;
     300             :   }
     301             : 
     302          42 :   extract_tracestate(result, headers);
     303             : 
     304          42 :   return result;
     305          80 : }
     306             : 
     307          28 : std::string encode_traceparent(TraceID trace_id, std::uint64_t span_id,
     308             :                                int sampling_priority) {
     309          28 :   std::string result;
     310             :   // version
     311          28 :   result += "00-";
     312             : 
     313             :   // trace ID
     314          28 :   result += trace_id.hex_padded();
     315          28 :   result += '-';
     316             : 
     317             :   // span ID
     318          28 :   result += hex_padded(span_id);
     319          28 :   result += '-';
     320             : 
     321             :   // flags
     322          28 :   result += sampling_priority > 0 ? "01" : "00";
     323             : 
     324          28 :   return result;
     325           0 : }
     326             : 
     327          28 : std::string encode_datadog_tracestate(
     328             :     int sampling_priority, const Optional<std::string>& origin,
     329             :     const std::vector<std::pair<std::string, std::string>>& trace_tags,
     330             :     const Optional<std::string>& additional_datadog_w3c_tracestate) {
     331          28 :   std::string result = "dd=s:";
     332          28 :   result += std::to_string(sampling_priority);
     333             : 
     334          28 :   if (origin) {
     335           8 :     result += ";o:";
     336           8 :     result += *origin;
     337           8 :     std::replace_if(result.end() - origin->size(), result.end(),
     338           8 :                     verboten(0x20, 0x7e, ",;~"), '_');
     339           8 :     std::replace(result.end() - origin->size(), result.end(), '=', '~');
     340             :   }
     341             : 
     342       10054 :   for (const auto& [key, value] : trace_tags) {
     343       10026 :     const StringView prefix = "_dd.p.";
     344       10026 :     if (!starts_with(key, prefix) || key == tags::internal::trace_id_high) {
     345             :       // Either it's not a propagation tag, or it's one of the propagation tags
     346             :       // that need not be included in tracestate.
     347           5 :       continue;
     348             :     }
     349             : 
     350             :     // `key` is "_dd.p.<name>", but we want "t.<name>".
     351       10021 :     result += ";t.";
     352       10021 :     result.append(key, prefix.size());
     353       10021 :     std::replace_if(result.end() - (key.size() - prefix.size()), result.end(),
     354       10021 :                     verboten(0x20, 0x7e, " ,;="), '_');
     355             : 
     356       10021 :     result += ':';
     357       10021 :     result += value;
     358       10021 :     std::replace_if(result.end() - value.size(), result.end(),
     359       10021 :                     verboten(0x20, 0x7e, ",;~"), '_');
     360             :     // `value` might contain equal signs ("="), which is reserved in tracestate.
     361             :     // Replace them with tildes ("~").
     362       10021 :     std::replace(result.end() - value.size(), result.end(), '=', '~');
     363             :   }
     364             : 
     365          28 :   if (additional_datadog_w3c_tracestate) {
     366           6 :     result += ';';
     367           6 :     result += *additional_datadog_w3c_tracestate;
     368             :   }
     369             : 
     370          28 :   const std::size_t max_size = 256;
     371       10003 :   while (result.size() > max_size) {
     372        9975 :     const auto last_semicolon_index = result.rfind(';');
     373             :     // This assumption is safe, because `result` always begins with
     374             :     // "dd=s:<int>", and that's fewer than `max_size` characters for any
     375             :     // `<int>`.
     376        9975 :     assert(last_semicolon_index != std::string::npos);
     377        9975 :     result.resize(last_semicolon_index);
     378             :   }
     379             : 
     380          28 :   return result;
     381           0 : }
     382             : 
     383          28 : std::string encode_tracestate(
     384             :     int sampling_priority, const Optional<std::string>& origin,
     385             :     const std::vector<std::pair<std::string, std::string>>& trace_tags,
     386             :     const Optional<std::string>& additional_datadog_w3c_tracestate,
     387             :     const Optional<std::string>& additional_w3c_tracestate) {
     388             :   std::string result = encode_datadog_tracestate(
     389          28 :       sampling_priority, origin, trace_tags, additional_datadog_w3c_tracestate);
     390             : 
     391          28 :   if (additional_w3c_tracestate) {
     392           4 :     result += ',';
     393           4 :     result += *additional_w3c_tracestate;
     394             :   }
     395             : 
     396          28 :   return result;
     397           0 : }
     398             : 
     399             : }  // namespace tracing
     400             : }  // namespace datadog

Generated by: LCOV version 1.16