| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- from typing import Optional
- from ray.dashboard.consts import COMPONENT_METRICS_TAG_KEYS
- class NullMetric:
- """Mock metric class to be used in case of prometheus_client import error."""
- def set(self, *args, **kwargs):
- pass
- def observe(self, *args, **kwargs):
- pass
- def inc(self, *args, **kwargs):
- pass
- try:
- from prometheus_client import CollectorRegistry, Counter, Gauge, Histogram
- # The metrics in this class should be kept in sync with
- # python/ray/tests/test_metrics_agent.py
- class DashboardPrometheusMetrics:
- def __init__(self, registry: Optional[CollectorRegistry] = None):
- self.registry: CollectorRegistry = registry or CollectorRegistry(
- auto_describe=True
- )
- # Buckets: 5ms, 10ms, 25ms, 50ms, 75ms
- # 100ms, 250ms, 500ms, 750ms
- # 1s, 2.5s, 5s, 7.5s, 10s
- # 20s, 40s, 60s
- # used for API duration
- histogram_buckets_s = [
- 0.005,
- 0.01,
- 0.025,
- 0.05,
- 0.075,
- 0.1,
- 0.25,
- 0.5,
- 0.75,
- 1,
- 2.5,
- 5,
- 7.5,
- 10,
- 20,
- 40,
- 60,
- ]
- self.metrics_request_duration = Histogram(
- "dashboard_api_requests_duration_seconds",
- "Total duration in seconds per endpoint",
- ("endpoint", "http_status", "Version", "SessionName", "Component"),
- unit="seconds",
- namespace="ray",
- registry=self.registry,
- buckets=histogram_buckets_s,
- )
- self.metrics_request_count = Counter(
- "dashboard_api_requests_count",
- "Total requests count per endpoint",
- (
- "method",
- "endpoint",
- "http_status",
- "Version",
- "SessionName",
- "Component",
- ),
- unit="requests",
- namespace="ray",
- registry=self.registry,
- )
- self.metrics_event_loop_tasks = Gauge(
- "dashboard_event_loop_tasks",
- "Number of tasks currently pending in the event loop's queue.",
- tuple(COMPONENT_METRICS_TAG_KEYS),
- unit="tasks",
- namespace="ray",
- registry=self.registry,
- )
- self.metrics_event_loop_lag = Gauge(
- "dashboard_event_loop_lag",
- "Event loop lag in seconds.",
- tuple(COMPONENT_METRICS_TAG_KEYS),
- unit="seconds",
- namespace="ray",
- registry=self.registry,
- )
- self.metrics_dashboard_cpu = Gauge(
- "component_cpu",
- "Dashboard CPU percentage usage.",
- tuple(COMPONENT_METRICS_TAG_KEYS),
- unit="percentage",
- namespace="ray",
- registry=self.registry,
- )
- self.metrics_dashboard_mem_uss = Gauge(
- "component_uss",
- "USS usage of all components on the node.",
- tuple(COMPONENT_METRICS_TAG_KEYS),
- unit="mb",
- namespace="ray",
- registry=self.registry,
- )
- self.metrics_dashboard_mem_rss = Gauge(
- "component_rss",
- "RSS usage of all components on the node.",
- tuple(COMPONENT_METRICS_TAG_KEYS),
- unit="mb",
- namespace="ray",
- registry=self.registry,
- )
- except ImportError:
- class DashboardPrometheusMetrics(object):
- def __getattr__(self, attr):
- return NullMetric()
|