
| Current Path : /usr/local/lib/python3.8/dist-packages/sentry_sdk/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //usr/local/lib/python3.8/dist-packages/sentry_sdk/tracing_utils.py |
import re
import contextlib
import json
import math
from numbers import Real
import sentry_sdk
from sentry_sdk.utils import (
capture_internal_exceptions,
Dsn,
logger,
to_base64,
to_string,
from_base64,
)
from sentry_sdk._compat import PY2
from sentry_sdk._types import MYPY
if PY2:
from collections import Mapping
else:
from collections.abc import Mapping
if MYPY:
import typing
from typing import Generator
from typing import Optional
from typing import Any
from typing import Dict
from typing import Union
SENTRY_TRACE_REGEX = re.compile(
"^[ \t]*" # whitespace
"([0-9a-f]{32})?" # trace_id
"-?([0-9a-f]{16})?" # span_id
"-?([01])?" # sampled
"[ \t]*$" # whitespace
)
# This is a normal base64 regex, modified to reflect that fact that we strip the
# trailing = or == off
base64_stripped = (
# any of the characters in the base64 "alphabet", in multiples of 4
"([a-zA-Z0-9+/]{4})*"
# either nothing or 2 or 3 base64-alphabet characters (see
# https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding for
# why there's never only 1 extra character)
"([a-zA-Z0-9+/]{2,3})?"
)
# comma-delimited list of entries of the form `xxx=yyy`
tracestate_entry = "[^=]+=[^=]+"
TRACESTATE_ENTRIES_REGEX = re.compile(
# one or more xxxxx=yyyy entries
"^({te})+"
# each entry except the last must be followed by a comma
"(,|$)".format(te=tracestate_entry)
)
# this doesn't check that the value is valid, just that there's something there
# of the form `sentry=xxxx`
SENTRY_TRACESTATE_ENTRY_REGEX = re.compile(
# either sentry is the first entry or there's stuff immediately before it,
# ending in a comma (this prevents matching something like `coolsentry=xxx`)
"(?:^|.+,)"
# sentry's part, not including the potential comma
"(sentry=[^,]*)"
# either there's a comma and another vendor's entry or we end
"(?:,.+|$)"
)
class EnvironHeaders(Mapping): # type: ignore
def __init__(
self,
environ, # type: typing.Mapping[str, str]
prefix="HTTP_", # type: str
):
# type: (...) -> None
self.environ = environ
self.prefix = prefix
def __getitem__(self, key):
# type: (str) -> Optional[Any]
return self.environ[self.prefix + key.replace("-", "_").upper()]
def __len__(self):
# type: () -> int
return sum(1 for _ in iter(self))
def __iter__(self):
# type: () -> Generator[str, None, None]
for k in self.environ:
if not isinstance(k, str):
continue
k = k.replace("-", "_").upper()
if not k.startswith(self.prefix):
continue
yield k[len(self.prefix) :]
def has_tracing_enabled(options):
# type: (Dict[str, Any]) -> bool
"""
Returns True if either traces_sample_rate or traces_sampler is
defined, False otherwise.
"""
return bool(
options.get("traces_sample_rate") is not None
or options.get("traces_sampler") is not None
)
def is_valid_sample_rate(rate):
# type: (Any) -> bool
"""
Checks the given sample rate to make sure it is valid type and value (a
boolean or a number between 0 and 1, inclusive).
"""
# both booleans and NaN are instances of Real, so a) checking for Real
# checks for the possibility of a boolean also, and b) we have to check
# separately for NaN
if not isinstance(rate, Real) or math.isnan(rate):
logger.warning(
"[Tracing] Given sample rate is invalid. Sample rate must be a boolean or a number between 0 and 1. Got {rate} of type {type}.".format(
rate=rate, type=type(rate)
)
)
return False
# in case rate is a boolean, it will get cast to 1 if it's True and 0 if it's False
rate = float(rate)
if rate < 0 or rate > 1:
logger.warning(
"[Tracing] Given sample rate is invalid. Sample rate must be between 0 and 1. Got {rate}.".format(
rate=rate
)
)
return False
return True
@contextlib.contextmanager
def record_sql_queries(
hub, # type: sentry_sdk.Hub
cursor, # type: Any
query, # type: Any
params_list, # type: Any
paramstyle, # type: Optional[str]
executemany, # type: bool
):
# type: (...) -> Generator[Span, None, None]
# TODO: Bring back capturing of params by default
if hub.client and hub.client.options["_experiments"].get(
"record_sql_params", False
):
if not params_list or params_list == [None]:
params_list = None
if paramstyle == "pyformat":
paramstyle = "format"
else:
params_list = None
paramstyle = None
query = _format_sql(cursor, query)
data = {}
if params_list is not None:
data["db.params"] = params_list
if paramstyle is not None:
data["db.paramstyle"] = paramstyle
if executemany:
data["db.executemany"] = True
with capture_internal_exceptions():
hub.add_breadcrumb(message=query, category="query", data=data)
with hub.start_span(op="db", description=query) as span:
for k, v in data.items():
span.set_data(k, v)
yield span
def maybe_create_breadcrumbs_from_span(hub, span):
# type: (sentry_sdk.Hub, Span) -> None
if span.op == "redis":
hub.add_breadcrumb(
message=span.description, type="redis", category="redis", data=span._tags
)
elif span.op == "http":
hub.add_breadcrumb(type="http", category="httplib", data=span._data)
elif span.op == "subprocess":
hub.add_breadcrumb(
type="subprocess",
category="subprocess",
message=span.description,
data=span._data,
)
def extract_sentrytrace_data(header):
# type: (Optional[str]) -> typing.Mapping[str, Union[str, bool, None]]
"""
Given a `sentry-trace` header string, return a dictionary of data.
"""
trace_id = parent_span_id = parent_sampled = None
if header:
if header.startswith("00-") and header.endswith("-00"):
header = header[3:-3]
match = SENTRY_TRACE_REGEX.match(header)
if match:
trace_id, parent_span_id, sampled_str = match.groups()
if trace_id:
trace_id = "{:032x}".format(int(trace_id, 16))
if parent_span_id:
parent_span_id = "{:016x}".format(int(parent_span_id, 16))
if sampled_str:
parent_sampled = sampled_str != "0"
return {
"trace_id": trace_id,
"parent_span_id": parent_span_id,
"parent_sampled": parent_sampled,
}
def extract_tracestate_data(header):
# type: (Optional[str]) -> typing.Mapping[str, Optional[str]]
"""
Extracts the sentry tracestate value and any third-party data from the given
tracestate header, returning a dictionary of data.
"""
sentry_entry = third_party_entry = None
before = after = ""
if header:
# find sentry's entry, if any
sentry_match = SENTRY_TRACESTATE_ENTRY_REGEX.search(header)
if sentry_match:
sentry_entry = sentry_match.group(1)
# remove the commas after the split so we don't end up with
# `xxx=yyy,,zzz=qqq` (double commas) when we put them back together
before, after = map(lambda s: s.strip(","), header.split(sentry_entry))
# extract sentry's value from its entry and test to make sure it's
# valid; if it isn't, discard the entire entry so that a new one
# will be created
sentry_value = sentry_entry.replace("sentry=", "")
if not re.search("^{b64}$".format(b64=base64_stripped), sentry_value):
sentry_entry = None
else:
after = header
# if either part is invalid or empty, remove it before gluing them together
third_party_entry = (
",".join(filter(TRACESTATE_ENTRIES_REGEX.search, [before, after])) or None
)
return {
"sentry_tracestate": sentry_entry,
"third_party_tracestate": third_party_entry,
}
def compute_tracestate_value(data):
# type: (typing.Mapping[str, str]) -> str
"""
Computes a new tracestate value using the given data.
Note: Returns just the base64-encoded data, NOT the full `sentry=...`
tracestate entry.
"""
tracestate_json = json.dumps(data)
# Base64-encoded strings always come out with a length which is a multiple
# of 4. In order to achieve this, the end is padded with one or more `=`
# signs. Because the tracestate standard calls for using `=` signs between
# vendor name and value (`sentry=xxx,dogsaregreat=yyy`), to avoid confusion
# we strip the `=`
return (to_base64(tracestate_json) or "").rstrip("=")
def compute_tracestate_entry(span):
# type: (Span) -> Optional[str]
"""
Computes a new sentry tracestate for the span. Includes the `sentry=`.
Will return `None` if there's no client and/or no DSN.
"""
data = {}
hub = span.hub or sentry_sdk.Hub.current
client = hub.client
scope = hub.scope
if client and client.options.get("dsn"):
options = client.options
user = scope._user
data = {
"trace_id": span.trace_id,
"environment": options["environment"],
"release": options.get("release"),
"public_key": Dsn(options["dsn"]).public_key,
}
if user and (user.get("id") or user.get("segment")):
user_data = {}
if user.get("id"):
user_data["id"] = user["id"]
if user.get("segment"):
user_data["segment"] = user["segment"]
data["user"] = user_data
if span.containing_transaction:
data["transaction"] = span.containing_transaction.name
return "sentry=" + compute_tracestate_value(data)
return None
def reinflate_tracestate(encoded_tracestate):
# type: (str) -> typing.Optional[Mapping[str, str]]
"""
Given a sentry tracestate value in its encoded form, translate it back into
a dictionary of data.
"""
inflated_tracestate = None
if encoded_tracestate:
# Base64-encoded strings always come out with a length which is a
# multiple of 4. In order to achieve this, the end is padded with one or
# more `=` signs. Because the tracestate standard calls for using `=`
# signs between vendor name and value (`sentry=xxx,dogsaregreat=yyy`),
# to avoid confusion we strip the `=` when the data is initially
# encoded. Python's decoding function requires they be put back.
# Fortunately, it doesn't complain if there are too many, so we just
# attach two `=` on spec (there will never be more than 2, see
# https://en.wikipedia.org/wiki/Base64#Decoding_Base64_without_padding).
tracestate_json = from_base64(encoded_tracestate + "==")
try:
assert tracestate_json is not None
inflated_tracestate = json.loads(tracestate_json)
except Exception as err:
logger.warning(
(
"Unable to attach tracestate data to envelope header: {err}"
+ "\nTracestate value is {encoded_tracestate}"
).format(err=err, encoded_tracestate=encoded_tracestate),
)
return inflated_tracestate
def _format_sql(cursor, sql):
# type: (Any, str) -> Optional[str]
real_sql = None
# If we're using psycopg2, it could be that we're
# looking at a query that uses Composed objects. Use psycopg2's mogrify
# function to format the query. We lose per-parameter trimming but gain
# accuracy in formatting.
try:
if hasattr(cursor, "mogrify"):
real_sql = cursor.mogrify(sql)
if isinstance(real_sql, bytes):
real_sql = real_sql.decode(cursor.connection.encoding)
except Exception:
real_sql = None
return real_sql or to_string(sql)
def has_tracestate_enabled(span=None):
# type: (Optional[Span]) -> bool
client = ((span and span.hub) or sentry_sdk.Hub.current).client
options = client and client.options
return bool(options and options["_experiments"].get("propagate_tracestate"))
# Circular imports
if MYPY:
from sentry_sdk.tracing import Span