diff --git a/docker-compose.yml b/docker-compose.yml index 4b8ad81..a78e2bb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,11 @@ version: "3.4" +# Required for promtail scraping +x-logging: &default-logging + driver: "journald" + options: + tag: "{{.Name}}" + services: db: image: postgres:13.5 @@ -38,6 +44,7 @@ services: networks: - web - db + logging: *default-logging frontend: build: diff --git a/observability/docker-compose.yml b/observability/docker-compose.yml new file mode 100644 index 0000000..6037ca0 --- /dev/null +++ b/observability/docker-compose.yml @@ -0,0 +1,86 @@ +# Adopted from https://github.com/stefanprodan/dockprom/blob/master/docker-compose.yml + +version: "3.4" + +services: + prometheus: + image: prom/prometheus:v2.33.4 + volumes: + - ./prometheus:/etc/prometheus + - prometheus_data:/prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=200h" + - "--web.enable-lifecycle" + ports: + - 9090:9090 + networks: + - observability + - api + + loki: + image: grafana/loki:2.5.0 + command: ["-config.file=/etc/loki/loki.yaml"] + ports: + - "3100" # loki needs to be exposed so it receives logs + volumes: + - ./loki/loki.yaml:/etc/loki/loki.yaml + networks: + - observability + + promtail: + image: grafana/promtail:2.5.0 + command: ["-config.file=/etc/promtail.yaml"] + volumes: + - ./promtail/promtail.yaml:/etc/promtail.yaml + - /var/lib/docker/containers:/var/lib/docker/containers:ro + + - /var/log/journal/:/var/log/journal/ + - /run/log/journal/:/run/log/journal/ + - /etc/machine-id:/etc/machine-id + ports: + - "3102" + networks: + - observability + depends_on: + - loki + + tempo: + image: grafana/tempo:1.3.2 + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./tempo/tempo.yaml:/etc/tempo.yaml + - tempo_data:/tmp/tempo + ports: + - "55680" # OpenTelemetry + - "3101" # tempo + networks: + - observability + - api + + grafana: + image: grafana/grafana-oss:8.4.2 + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning + environment: + - GF_SECURITY_ADMIN_USER=listory + - GF_SECURITY_ADMIN_PASSWORD=listory + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_USERS_ALLOW_SIGN_UP=false + - GF_SERVER_HTTP_PORT=2345 + ports: + - 2345:2345 + networks: + - observability + +volumes: + prometheus_data: {} + tempo_data: {} + +networks: + observability: {} + api: + external: true + name: listory_web diff --git a/observability/grafana/provisioning/dashboards/dashboard.yml b/observability/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 0000000..ff7db10 --- /dev/null +++ b/observability/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Prometheus" + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + allowUiUpdates: true + options: + path: /etc/grafana/provisioning/dashboards diff --git a/observability/grafana/provisioning/datasources/datasource.yml b/observability/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..d23bab8 --- /dev/null +++ b/observability/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,41 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: false + version: 1 + editable: false + + - name: Tempo + type: tempo + access: proxy + orgId: 1 + url: http://tempo:3101 + basicAuth: false + isDefault: false + version: 1 + editable: true + apiVersion: 1 + uid: tempo + + - name: Loki + type: loki + access: proxy + orgId: 1 + url: http://loki:3100 + basicAuth: false + isDefault: false + version: 1 + editable: false + apiVersion: 1 + jsonData: + derivedFields: + - datasourceUid: tempo + matcherRegex: '"traceId":"([A-Za-z0-9]+)"' + name: TraceID + url: $${__value.raw} diff --git a/observability/loki/loki.yaml b/observability/loki/loki.yaml new file mode 100644 index 0000000..1c8f95e --- /dev/null +++ b/observability/loki/loki.yaml @@ -0,0 +1,66 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +ingester: + lifecycler: + address: 127.0.0.1 + ring: + kvstore: + store: inmemory + replication_factor: 1 + final_sleep: 0s + chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed + max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h + chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first + chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) + max_transfer_retries: 0 # Chunk transfers disabled + + wal: + dir: /loki/wal + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + active_index_directory: /tmp/loki/boltdb-shipper-active + cache_location: /tmp/loki/boltdb-shipper-cache + cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space + shared_store: filesystem + filesystem: + directory: /tmp/loki/chunks + +compactor: + working_directory: /tmp/loki/boltdb-shipper-compactor + shared_store: filesystem + +limits_config: + reject_old_samples: true + reject_old_samples_max_age: 168h + +chunk_store_config: + max_look_back_period: 0s + +table_manager: + retention_deletes_enabled: false + retention_period: 0s + +ruler: + storage: + type: local + local: + directory: /tmp/loki/rules + rule_path: /tmp/loki/rules-temp + ring: + kvstore: + store: inmemory + enable_api: true diff --git a/observability/prometheus/prometheus.yml b/observability/prometheus/prometheus.yml new file mode 100644 index 0000000..00de50c --- /dev/null +++ b/observability/prometheus/prometheus.yml @@ -0,0 +1,17 @@ +global: + scrape_interval: 15s + +# A scrape configuration containing exactly one endpoint to scrape. +scrape_configs: + - job_name: "listory" + metrics_path: "/metrics" + static_configs: + - targets: ["api:9464"] + + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "tempo" + static_configs: + - targets: ["tempo:3100"] diff --git a/observability/promtail/promtail.yaml b/observability/promtail/promtail.yaml new file mode 100644 index 0000000..f9dc97e --- /dev/null +++ b/observability/promtail/promtail.yaml @@ -0,0 +1,30 @@ +server: + http_listen_port: 3102 + +clients: + - url: http://loki:3100/loki/api/v1/push + +positions: + filename: /tmp/positions.yaml + +target_config: + sync_period: 10s + +scrape_configs: + - job_name: listory + journal: + labels: + job: listory + relabel_configs: + # services + - source_labels: + - __journal__systemd_unit + target_label: unit + # docker containers + - source_labels: + - __journal_container_name + target_label: container # use whatever label you like + - source_labels: + - container + action: keep + regex: listory-.* # only keep api logs diff --git a/observability/tempo/tempo.yaml b/observability/tempo/tempo.yaml new file mode 100644 index 0000000..66b0b6d --- /dev/null +++ b/observability/tempo/tempo.yaml @@ -0,0 +1,36 @@ +server: + http_listen_port: 3101 + +distributor: + receivers: + otlp: + protocols: + http: + +ingester: + trace_idle_period: 10s # the length of time after a trace has not received spans to consider it complete and flush it + max_block_bytes: 1_000_000 # cut the head block when it hits this size or ... + max_block_duration: 5m # this much time passes + +compactor: + compaction: + compaction_window: 1h # blocks in this time window will be compacted together + max_block_bytes: 100_000_000 # maximum size of compacted blocks + block_retention: 1h + compacted_block_retention: 10m + +storage: + trace: + backend: local # backend configuration to use + block: + bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives + index_downsample_bytes: 1000 # number of bytes per index record + encoding: zstd # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + wal: + path: /tmp/tempo/wal # where to store the the wal locally + encoding: none # wal encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + local: + path: /tmp/tempo/blocks + pool: + max_workers: 100 # the worker pool mainly drives querying, but is also used for polling the blocklist + queue_depth: 10000