feat(observability): add local grafana+prom stack for metrics insights

This commit is contained in:
Julian Tölle 2020-11-22 20:04:56 +01:00
parent 6b1640b753
commit d0a9b0a07c
8 changed files with 295 additions and 0 deletions

View file

@ -0,0 +1,86 @@
# Adopted from https://github.com/stefanprodan/dockprom/blob/master/docker-compose.yml
version: "3.4"
services:
prometheus:
image: prom/prometheus:v2.33.4
volumes:
- ./prometheus:/etc/prometheus
- prometheus_data:/prometheus
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=200h"
- "--web.enable-lifecycle"
ports:
- 9090:9090
networks:
- observability
- api
loki:
image: grafana/loki:2.5.0
command: ["-config.file=/etc/loki/loki.yaml"]
ports:
- "3100" # loki needs to be exposed so it receives logs
volumes:
- ./loki/loki.yaml:/etc/loki/loki.yaml
networks:
- observability
promtail:
image: grafana/promtail:2.5.0
command: ["-config.file=/etc/promtail.yaml"]
volumes:
- ./promtail/promtail.yaml:/etc/promtail.yaml
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/log/journal/:/var/log/journal/
- /run/log/journal/:/run/log/journal/
- /etc/machine-id:/etc/machine-id
ports:
- "3102"
networks:
- observability
depends_on:
- loki
tempo:
image: grafana/tempo:1.3.2
command: ["-config.file=/etc/tempo.yaml"]
volumes:
- ./tempo/tempo.yaml:/etc/tempo.yaml
- tempo_data:/tmp/tempo
ports:
- "55680" # OpenTelemetry
- "3101" # tempo
networks:
- observability
- api
grafana:
image: grafana/grafana-oss:8.4.2
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_USER=listory
- GF_SECURITY_ADMIN_PASSWORD=listory
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_HTTP_PORT=2345
ports:
- 2345:2345
networks:
- observability
volumes:
prometheus_data: {}
tempo_data: {}
networks:
observability: {}
api:
external: true
name: listory_web

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "Prometheus"
orgId: 1
folder: ""
type: file
disableDeletion: false
editable: true
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View file

@ -0,0 +1,41 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo:3101
basicAuth: false
isDefault: false
version: 1
editable: true
apiVersion: 1
uid: tempo
- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
jsonData:
derivedFields:
- datasourceUid: tempo
matcherRegex: '"traceId":"([A-Za-z0-9]+)"'
name: TraceID
url: $${__value.raw}

View file

@ -0,0 +1,66 @@
auth_enabled: false
server:
http_listen_port: 3100
ingester:
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed
max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h
chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first
chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m)
max_transfer_retries: 0 # Chunk transfers disabled
wal:
dir: /loki/wal
schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: /tmp/loki/boltdb-shipper-active
cache_location: /tmp/loki/boltdb-shipper-cache
cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space
shared_store: filesystem
filesystem:
directory: /tmp/loki/chunks
compactor:
working_directory: /tmp/loki/boltdb-shipper-compactor
shared_store: filesystem
limits_config:
reject_old_samples: true
reject_old_samples_max_age: 168h
chunk_store_config:
max_look_back_period: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s
ruler:
storage:
type: local
local:
directory: /tmp/loki/rules
rule_path: /tmp/loki/rules-temp
ring:
kvstore:
store: inmemory
enable_api: true

View file

@ -0,0 +1,17 @@
global:
scrape_interval: 15s
# A scrape configuration containing exactly one endpoint to scrape.
scrape_configs:
- job_name: "listory"
metrics_path: "/metrics"
static_configs:
- targets: ["api:9464"]
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "tempo"
static_configs:
- targets: ["tempo:3100"]

View file

@ -0,0 +1,30 @@
server:
http_listen_port: 3102
clients:
- url: http://loki:3100/loki/api/v1/push
positions:
filename: /tmp/positions.yaml
target_config:
sync_period: 10s
scrape_configs:
- job_name: listory
journal:
labels:
job: listory
relabel_configs:
# services
- source_labels:
- __journal__systemd_unit
target_label: unit
# docker containers
- source_labels:
- __journal_container_name
target_label: container # use whatever label you like
- source_labels:
- container
action: keep
regex: listory-.* # only keep api logs

View file

@ -0,0 +1,36 @@
server:
http_listen_port: 3101
distributor:
receivers:
otlp:
protocols:
http:
ingester:
trace_idle_period: 10s # the length of time after a trace has not received spans to consider it complete and flush it
max_block_bytes: 1_000_000 # cut the head block when it hits this size or ...
max_block_duration: 5m # this much time passes
compactor:
compaction:
compaction_window: 1h # blocks in this time window will be compacted together
max_block_bytes: 100_000_000 # maximum size of compacted blocks
block_retention: 1h
compacted_block_retention: 10m
storage:
trace:
backend: local # backend configuration to use
block:
bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives
index_downsample_bytes: 1000 # number of bytes per index record
encoding: zstd # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd
wal:
path: /tmp/tempo/wal # where to store the the wal locally
encoding: none # wal encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd
local:
path: /tmp/tempo/blocks
pool:
max_workers: 100 # the worker pool mainly drives querying, but is also used for polling the blocklist
queue_depth: 10000