Skip to content
Snippets Groups Projects
Commit dfa70adc authored by Erik Johnston's avatar Erik Johnston
Browse files

Add in flight request metrics

This tracks CPU and DB usage while requests are in flight, rather than
when we write the response.
parent 08462620
No related branches found
No related tags found
No related merge requests found
...@@ -98,14 +98,76 @@ response_size = metrics.register_counter( ...@@ -98,14 +98,76 @@ response_size = metrics.register_counter(
"response_size", labels=["method", "servlet", "tag"] "response_size", labels=["method", "servlet", "tag"]
) )
# In flight metrics are incremented while the requests are in flight, rather
# than when the response was written.
in_flight_requests_ru_utime = metrics.register_counter(
"in_flight_requests_ru_utime_seconds", labels=["method", "servlet"],
)
in_flight_requests_ru_stime = metrics.register_counter(
"in_flight_requests_ru_stime_seconds", labels=["method", "servlet"],
)
in_flight_requests_db_txn_count = metrics.register_counter(
"in_flight_requests_db_txn_count", labels=["method", "servlet"],
)
# seconds spent waiting for db txns, excluding scheduling time, when processing
# this request
in_flight_requests_db_txn_duration = metrics.register_counter(
"in_flight_requests_db_txn_duration_seconds", labels=["method", "servlet"],
)
# seconds spent waiting for a db connection, when processing this request
in_flight_requests_db_sched_duration = metrics.register_counter(
"in_flight_requests_db_sched_duration_seconds", labels=["method", "servlet"]
)
_in_flight_requests_count = metrics.register_gauge(
"in_flight_requests_count", labels=["method", "servlet"]
)
# The set of all in flight requests.
_in_flight_requests = set()
def _collect_in_flight():
"""Called just before metrics are collected, so we use it to update all
the in flight request metrics
"""
# Map from (method, name) -> int, the number of in flight requests of that
# type
counts = {}
for rm in _in_flight_requests:
rm.update_metrics()
key = (rm.method, rm.name,)
counts[key] = counts.get(key, 0) + 1
for (method, name), count in counts.iteritems():
_in_flight_requests_count.set(count, method, name)
metrics.register_collector(_collect_in_flight)
class RequestMetrics(object): class RequestMetrics(object):
def start(self, time_msec, name): def start(self, time_msec, name, method):
self.start = time_msec self.start = time_msec
self.start_context = LoggingContext.current_context() self.start_context = LoggingContext.current_context()
self.name = name self.name = name
self.method = method
self._request_stats = _RequestStats.from_context(self.start_context)
_in_flight_requests.add(self)
def stop(self, time_msec, request): def stop(self, time_msec, request):
_in_flight_requests.discard(self)
context = LoggingContext.current_context() context = LoggingContext.current_context()
tag = "" tag = ""
...@@ -147,3 +209,91 @@ class RequestMetrics(object): ...@@ -147,3 +209,91 @@ class RequestMetrics(object):
) )
response_size.inc_by(request.sentLength, request.method, self.name, tag) response_size.inc_by(request.sentLength, request.method, self.name, tag)
# We always call this at the end to ensure that we update the metrics
# regardless of whether a call to /metrics while the request was in
# flight.
self.update_metrics()
def update_metrics(self):
"""Updates the in flight metrics with values from this request.
"""
diff = self._request_stats.update()
in_flight_requests_ru_utime.inc_by(
diff.ru_utime, self.method, self.name,
)
in_flight_requests_ru_stime.inc_by(
diff.ru_stime, self.method, self.name,
)
in_flight_requests_db_txn_count.inc_by(
diff.db_txn_count, self.method, self.name,
)
in_flight_requests_db_txn_duration.inc_by(
diff.db_txn_duration_ms / 1000., self.method, self.name,
)
in_flight_requests_db_sched_duration.inc_by(
diff.db_sched_duration_ms / 1000., self.method, self.name,
)
class _RequestStats(object):
"""Keeps tracks of various metrics for an in flight request.
"""
__slots__ = [
"context", "ru_utime", "ru_stime",
"db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms",
]
def __init__(self, context, ru_utime, ru_stime, db_txn_count,
db_txn_duration_ms, db_sched_duration_ms):
self.context = context
self.ru_utime = ru_utime
self.ru_stime = ru_stime
self.db_txn_count = db_txn_count
self.db_txn_duration_ms = db_txn_duration_ms
self.db_sched_duration_ms = db_sched_duration_ms
@staticmethod
def from_context(context):
ru_utime, ru_stime = context.get_resource_usage()
return _RequestStats(
context,
ru_utime, ru_stime,
context.db_txn_count,
context.db_txn_duration_ms,
context.db_sched_duration_ms,
)
def update(self):
"""Updates the current values and returns the difference between the
old and new values.
Returns:
_RequestStats: The difference between the old and new values
"""
new = _RequestStats.from_context(self.context)
diff = _RequestStats(
self.context,
new.ru_utime - self.ru_utime,
new.ru_stime - self.ru_stime,
new.db_txn_count - self.db_txn_count,
new.db_txn_duration_ms - self.db_txn_duration_ms,
new.db_sched_duration_ms - self.db_sched_duration_ms,
)
self.ru_utime = new.ru_utime
self.ru_stime = new.ru_stime
self.db_txn_count = new.db_txn_count
self.db_txn_duration_ms = new.db_txn_duration_ms
self.db_sched_duration_ms = new.db_sched_duration_ms
return diff
...@@ -85,7 +85,9 @@ class SynapseRequest(Request): ...@@ -85,7 +85,9 @@ class SynapseRequest(Request):
def _started_processing(self, servlet_name): def _started_processing(self, servlet_name):
self.start_time = int(time.time() * 1000) self.start_time = int(time.time() * 1000)
self.request_metrics = RequestMetrics() self.request_metrics = RequestMetrics()
self.request_metrics.start(self.start_time, name=servlet_name) self.request_metrics.start(
self.start_time, name=servlet_name, method=self.method,
)
self.site.access_logger.info( self.site.access_logger.info(
"%s - %s - Received request: %s %s", "%s - %s - Received request: %s %s",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment