diff --git a/.gitignore b/.gitignore index f74d1ab..7899570 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ /stubs -/vendor +vendor composer.lock diff --git a/example/opentelemetry/README.md b/example/opentelemetry/README.md new file mode 100644 index 0000000..15d0149 --- /dev/null +++ b/example/opentelemetry/README.md @@ -0,0 +1,45 @@ +# Example for Relay OpenTelemetry instrumentation + +This example demonstrates how to monitor Redis using [OpenTelemetry](https://opentelemetry.io/) and +[Uptrace](https://uptrace.dev/get/open-source-apm.html. It requires Docker to start Redis Server and Uptrace. + +**Step 1**. Download the example using Git: + +```shell +git clone https://github.com/cachewerk/relay.git +cd example/opentelemetry +``` + +**Step 2**. Start the services using Docker: + +```shell +docker-compose up -d +``` + +**Step 3**. Make sure Redis and Uptrace are running: + +```shell +docker-compose logs redis +docker-compose logs uptrace +``` + +**Step 4**. Install dependencies and run the Relay example: + +```shell +composer install +php main.php +``` + +**Step 5**. Follow the link from the CLI to view the trace: + +```shell +php main.php +trace: http://localhost:14318/traces/ee029d8782242c8ed38b16d961093b35 +``` + +![Relay trace](./image/relay-trace.png) + +You can also open Uptrace UI at [http://localhost:14318](http://localhost:14318) to view available +spans, logs, and metrics. + +See [Monitoring Relay Redis client with OpenTelemetry](https://uptrace.dev/blog/posts/relay-cache-opentelemetry.html). diff --git a/example/opentelemetry/composer.json b/example/opentelemetry/composer.json new file mode 100644 index 0000000..daacfc8 --- /dev/null +++ b/example/opentelemetry/composer.json @@ -0,0 +1,19 @@ +{ + "name": "relay/example-opentelemetry", + "authors": [ + { + "name": "Vladimir Mihailenco", + "email": "vladimir.webdev@gmail.com" + } + ], + "require": { + "cachewerk/relay": "^0.5.1", + "uptrace/uptrace": "0.1.2" + }, + "autoload": { + "psr-4": { + "CacheWerk\\Relay\\": "../../src/" + } + }, + "minimum-stability": "dev" +} diff --git a/example/opentelemetry/config/alertmanager.yml b/example/opentelemetry/config/alertmanager.yml new file mode 100644 index 0000000..ac3e340 --- /dev/null +++ b/example/opentelemetry/config/alertmanager.yml @@ -0,0 +1,53 @@ +# See https://prometheus.io/docs/alerting/latest/configuration/ for details. + +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: "mailhog:1025" + smtp_from: "alertmanager@example.com" + smtp_require_tls: false + +receivers: + - name: "team-X" + email_configs: + - to: "some-receiver@example.com" + send_resolved: true + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ["alertname", "cluster", "service"] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 3h + + # A default receiver + receiver: team-X + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + # The child route trees. + routes: + # This route matches error alerts created from spans or logs. + - matchers: + - alert_kind="error" + group_interval: 24h + receiver: team-X + +# The directory from which notification templates are read. +templates: + - "/etc/alertmanager/template/*.tmpl" diff --git a/example/opentelemetry/config/otel-collector.yaml b/example/opentelemetry/config/otel-collector.yaml new file mode 100644 index 0000000..b44dd1f --- /dev/null +++ b/example/opentelemetry/config/otel-collector.yaml @@ -0,0 +1,68 @@ +extensions: + health_check: + pprof: + endpoint: 0.0.0.0:1777 + zpages: + endpoint: 0.0.0.0:55679 + +receivers: + otlp: + protocols: + grpc: + http: + hostmetrics: + collection_interval: 10s + scrapers: + cpu: + disk: + load: + filesystem: + memory: + network: + paging: + redis: + endpoint: "redis-server:6379" + collection_interval: 10s + jaeger: + protocols: + grpc: + +processors: + resourcedetection: + detectors: ["system"] + batch: + send_batch_size: 10000 + timeout: 10s + +exporters: + logging: + logLevel: debug + otlp: + endpoint: uptrace:14317 + tls: + insecure: true + headers: { "uptrace-dsn": "http://project2_secret_token@localhost:14317/2" } + +service: + # telemetry: + # logs: + # level: DEBUG + pipelines: + traces: + receivers: [otlp, jaeger] + processors: [batch] + exporters: [otlp, logging] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [otlp] + metrics/hostmetrics: + receivers: [hostmetrics, redis] + processors: [batch, resourcedetection] + exporters: [otlp] + logs: + receivers: [otlp] + processors: [batch] + exporters: [otlp] + + extensions: [health_check, pprof, zpages] diff --git a/example/opentelemetry/config/vector.toml b/example/opentelemetry/config/vector.toml new file mode 100644 index 0000000..10db91d --- /dev/null +++ b/example/opentelemetry/config/vector.toml @@ -0,0 +1,39 @@ +[sources.syslog_logs] +type = "demo_logs" +format = "syslog" +interval = 0.1 + +[sources.apache_common_logs] +type = "demo_logs" +format = "apache_common" +interval = 0.1 + +[sources.apache_error_logs] +type = "demo_logs" +format = "apache_error" +interval = 0.1 + +[sources.json_logs] +type = "demo_logs" +format = "json" +interval = 0.1 + +# Parse Syslog logs +# See the Vector Remap Language reference for more info: https://vrl.dev +[transforms.parse_logs] +type = "remap" +inputs = ["syslog_logs"] +source = ''' +. = parse_syslog!(string!(.message)) +''' + +# Export data to Uptrace. +[sinks.uptrace] +type = "http" +inputs = ["parse_logs", "apache_common_logs", "apache_error_logs", "json_logs"] +encoding.codec = "json" +framing.method = "newline_delimited" +compression = "gzip" +uri = "http://uptrace:14318/api/v1/vector/logs" +#uri = "https://api.uptrace.dev/api/v1/vector/logs" +headers.uptrace-dsn = "http://project2_secret_token@localhost:14317/2" diff --git a/example/opentelemetry/docker-compose.yml b/example/opentelemetry/docker-compose.yml new file mode 100644 index 0000000..4b47375 --- /dev/null +++ b/example/opentelemetry/docker-compose.yml @@ -0,0 +1,81 @@ +version: "3" + +services: + clickhouse: + image: clickhouse/clickhouse-server:22.10 + restart: on-failure + environment: + CLICKHOUSE_DB: uptrace + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "localhost:8123/ping"] + interval: 1s + timeout: 1s + retries: 30 + volumes: + - ch_data:/var/lib/clickhouse + ports: + - "8123:8123" + - "9000:9000" + + uptrace: + image: "uptrace/uptrace:1.2.4" + #image: "uptrace/uptrace-dev:latest" + restart: on-failure + volumes: + - uptrace_data:/var/lib/uptrace + - ./uptrace.yml:/etc/uptrace/uptrace.yml + #environment: + # - DEBUG=2 + ports: + - "14317:14317" + - "14318:14318" + depends_on: + clickhouse: + condition: service_healthy + + otel-collector: + image: otel/opentelemetry-collector-contrib:0.59.0 + restart: on-failure + volumes: + - ./config/otel-collector.yaml:/etc/otelcol-contrib/config.yaml + ports: + - "4317:4317" + - "4318:4318" + + redis-server: + image: redis + ports: + - "6379:6379" + redis-cli: + image: redis + + vector: + image: timberio/vector:0.24.X-alpine + volumes: + - ./config/vector.toml:/etc/vector/vector.toml:ro + + alertmanager: + image: prom/alertmanager:v0.24.0 + restart: on-failure + volumes: + - ./config/alertmanager.yml:/etc/alertmanager/config.yml + - alertmanager_data:/alertmanager + ports: + - 9093:9093 + command: + - "--config.file=/etc/alertmanager/config.yml" + - "--storage.path=/alertmanager" + + mailhog: + image: mailhog/mailhog:v1.0.1 + restart: on-failure + ports: + - "8025:8025" + +volumes: + uptrace_data: + driver: local + ch_data: + driver: local + alertmanager_data: + driver: local diff --git a/example/opentelemetry/image/metrics.png b/example/opentelemetry/image/metrics.png new file mode 100644 index 0000000..3927aa0 Binary files /dev/null and b/example/opentelemetry/image/metrics.png differ diff --git a/example/opentelemetry/image/relay-trace.png b/example/opentelemetry/image/relay-trace.png new file mode 100644 index 0000000..704eb0c Binary files /dev/null and b/example/opentelemetry/image/relay-trace.png differ diff --git a/example/opentelemetry/main.php b/example/opentelemetry/main.php new file mode 100644 index 0000000..516684c --- /dev/null +++ b/example/opentelemetry/main.php @@ -0,0 +1,64 @@ +setDsn('http://project2_secret_token@localhost:14318/2'); +$conf->setServiceName('myservice'); +$conf->setServiceVersion('1.0.0'); + +$uptrace = new Uptrace\Distro($conf); +$tracerProvider = $uptrace->createTracerProvider(); +$tracer = $tracerProvider->getTracer('relay/example-opentelemetry'); + +$redis = new RelayOpenTelemetry(function() { + return new Relay\Relay; +}, $tracerProvider); + +$redis->connect('127.0.0.1', 6379); + +$span = handleRequest($tracer, $redis); +echo $uptrace->traceUrl($span) . PHP_EOL; + +for ($i = 0; $i <= 1000000; $i++) { + handleRequest($tracer, $redis); + sleep(1); +} + +// Send buffered spans and free resources. +$tracerProvider->shutdown(); + +function handleRequest($tracer, $redis) { + $span = $tracer->spanBuilder('handle-request')->startSpan(); + $spanScope = $span->activate(); + + $value = $redis->get('count'); + $redis->set('counter', (int)$value + 1); + + $redis->multi() + ->set('key1', 'val1') + ->get('key1') + ->set('key2', 'val2') + ->get('key2') + ->exec(); + + $pipe = $redis->pipeline(); + for ($i = 0; $i <= 100; $i++) { + $pipe->set('key' . $i, ''); + } + $pipe->exec(); + + $it = NULL; + do { + // Scan for some keys + $arr_keys = $redis->scan($it); + } while ($it > 0); + + $spanScope->detach(); + $span->end(); + + return $span; +} diff --git a/example/opentelemetry/uptrace.yml b/example/opentelemetry/uptrace.yml new file mode 100644 index 0000000..33d393a --- /dev/null +++ b/example/opentelemetry/uptrace.yml @@ -0,0 +1,265 @@ +## +## Uptrace configuration file. +## See https://uptrace.dev/get/config.html for details. +## +## You can use environment variables anywhere in this file, for example: +## +## foo: $FOO +## bar: ${BAR} +## baz: ${BAZ:default} +## +## To escape `$`, use `$$`, for example: +## +## foo: $$FOO_BAR +## + +## +## ClickHouse database credentials. +## +ch: + # Connection string for a ClickHouse database. For example: + # clickhouse://:@:/?sslmode=disable + # + # See https://clickhouse.uptrace.dev/guide/golang-clickhouse.html#options + dsn: "clickhouse://default:@clickhouse:9000/uptrace?sslmode=disable" + + # Maximum query execution time. + max_execution_time: 30s + +## +## A list of pre-configured projects. Each project is fully isolated. +## +projects: + # Conventionally, the first project is used to monitor Uptrace itself. + - id: 1 + name: Uptrace + # Token grants write access to the project. Keep a secret. + token: project1_secret_token + pinned_attrs: + - service + - host.name + - deployment.environment + # Group spans by deployment.environment attribute. + group_by_env: false + # Group funcs spans by service.name attribute. + group_funcs_by_service: false + + # Other projects can be used to monitor your applications. + # To monitor micro-services or multiple related services, use a single project. + - id: 2 + name: My project + token: project2_secret_token + pinned_attrs: + - service + - host.name + - deployment.environment + # Group spans by deployment.environment attribute. + group_by_env: false + # Group funcs spans by service.name attribute. + group_funcs_by_service: false + +## +## Create metrics from spans and events. +## +metrics_from_spans: + - name: uptrace.tracing.spans + description: Total number of spans including logs and events + instrument: counter + unit: 1 + value: span.count + attrs: [span.system, service.name, host.name, span.status_code] + + - name: uptrace.tracing.spans_duration + description: Spans duration + instrument: histogram + unit: microseconds + value: span.duration / 1000 + attrs: [span.system, service.name, host.name] + where: span.duration > 0 + + - name: uptrace.tracing.error_rate + description: Spans error rate + instrument: gauge + unit: percents + value: span.error_count / span.count + attrs: [span.system, service.name, host.name] + +## +## Alerting rules for monitoring metrics. +## +## See https://uptrace.dev/get/alerting.html for details. +## +alerting: + rules: + - name: Redis is working + metrics: + - uptrace.tracing.spans as $spans + query: + - $spans > 0 + - where system = 'db:redis' + for: 5m + annotations: + summary: "Got {{ $values.spans }} spans from Redis" + + - name: Redis is failing + metrics: + - uptrace.tracing.spans as $spans + query: + - $spans{status='error'} > 10 + - where system = 'db:redis' + for: 5m + + # Create alerts from error logs and span events. + create_alerts_from_spans: + enabled: true + labels: + alert_kind: error + +## +## To require authentication, uncomment one of the following sections. +## +auth: + # users: + # - username: uptrace + # password: uptrace + # - username: admin + # password: admin + + # Cloudflare Zero Trust Access (Identity) + # See https://developers.cloudflare.com/cloudflare-one/identity/ for more info. + # cloudflare: + # # The base URL of the Cloudflare Zero Trust team. + # - team_url: https://myteam.cloudflareaccess.com + # # The Application Audience (AUD) Tag for this application. + # # You can retrieve this from the Cloudflare Zero Trust 'Access' Dashboard. + # audience: bea6df23b944e4a0cd178609ba1bb64dc98dfe1f66ae7b918e563f6cf28b37e0 + + # OpenID Connect (Single Sign-On) + # oidc: + # # The ID is used in API endpoints, for example, in redirect URL + # # `http:///api/v1/sso//callback`. + # - id: keycloak + # # Display name for the button in the login form. + # # Default to 'OpenID Connect' + # display_name: Keycloak + # # The base URL for the OIDC provider. + # issuer_url: http://localhost:8080/realms/uptrace + # # The OAuth 2.0 Client ID + # client_id: uptrace + # # The OAuth 2.0 Client Secret + # client_secret: ogbhd8Q0X0e5AZFGSG3m9oirPvnetqkA + # # Additional OAuth 2.0 scopes to request from the OIDC provider. + # # Defaults to 'profile'. 'openid' is requested by default and need not be specified. + # scopes: + # - profile + # # The OIDC UserInfo claim to use as the user's username. + # # Defaults to 'preferred_username'. + # claim: preferred_username + +## +## AlertManager client configuration. +## See https://uptrace.dev/get/alerting.html for details. +## +## Note that this is NOT an AlertManager config and you need to configure AlertManager separately. +## See https://prometheus.io/docs/alerting/latest/configuration/ for details. +## +alertmanager_client: + # AlertManager API endpoints that Uptrace uses to manage alerts. + urls: + - "http://localhost:9093/api/v2/alerts" + +## +## Various options to tweak ClickHouse schema. +## For changes to take effect, you need reset the ClickHouse database with `ch reset`. +## +ch_schema: + # Compression codec, for example, LZ4, ZSTD(3), or Default. + compression: ZSTD(3) + + # Whether to use ReplicatedMergeTree instead of MergeTree. + replicated: false + + # Cluster name for Distributed tables and ON CLUSTER clause. + #cluster: uptrace1 + + spans: + storage_policy: "default" + # Delete spans data after 30 days. + ttl_delete: 30 DAY + + metrics: + storage_policy: "default" + # Delete metrics data after 90 days. + ttl_delete: 90 DAY + +## +## Addresses on which Uptrace receives gRPC and HTTP requests. +## +listen: + # OTLP/gRPC API. + grpc: + addr: ":14317" + # tls: + # cert_file: config/tls/uptrace.crt + # key_file: config/tls/uptrace.key + + # OTLP/HTTP API and Uptrace API with UI. + http: + addr: ":14318" + # tls: + # cert_file: config/tls/uptrace.crt + # key_file: config/tls/uptrace.key + +## +## Various options for Uptrace UI. +## +site: + # Overrides public URL for Vue-powered UI in case you put Uptrace behind a proxy. + #addr: 'https://uptrace.mydomain.com' + +## +## Spans processing options. +## +spans: + # The size of the Go chan used to buffer incoming spans. + # If the buffer is full, Uptrace starts to drop spans. + #buffer_size: 100000 + + # The number of spans to insert in a single query. + #batch_size: 10000 + +## +## Metrics processing options. +## +metrics: + # List of attributes to drop for being noisy. + drop_attrs: + - telemetry.sdk.language + - telemetry.sdk.name + - telemetry.sdk.version + + # The size of the Go chan used to buffer incoming measures. + # If the buffer is full, Uptrace starts to drop measures. + #buffer_size: 100000 + + # The number of measures to insert in a single query. + #batch_size: 10000 + +## +## SQLite/PostgreSQL db that is used to store metadata such us metric names, dashboards, alerts, +## and so on. +## +db: + # Either sqlite or postgres. + driver: sqlite + # Database connection string. + # + # Uptrace automatically creates SQLite database file in the current working directory. + # Make sure the directory is writable by Uptrace process. + dsn: "file:uptrace.sqlite3?_pragma=foreign_keys(1)&_pragma=busy_timeout(1000)" + +# Secret key that is used to sign JWT tokens etc. +secret_key: 102c1a557c314fc28198acd017960843 + +# Enable to log HTTP requests and database queries. +debug: false