From 4d6eae83988f12355a5af90a649ff6ef5fa3d4a4 Mon Sep 17 00:00:00 2001 From: Supan Adit Pratama Date: Thu, 19 Jun 2025 15:31:20 +0700 Subject: [PATCH] wip: full fledge monitoring system --- docker/alloy/compose.yaml | 27 ++ docker/alloy/config.alloy | 102 +++++ docker/cadvisor/compose.yaml | 21 + docker/grafana/compose.yaml | 217 +---------- docker/loki/compose.yaml | 24 ++ docker/loki/loki.yaml | 58 +++ docker/loki/promtail.yaml | 22 ++ docker/minio/compose.yaml | 2 +- docker/prometheus/compose.yaml | 26 ++ docker/prometheus/prometheus.yml | 647 +++++++++++++++++++++++++++++++ docker/pyroscope/compose.yaml | 17 + docker/pyroscope/pyroscope.yml | 6 + docker/tempo/compose.yaml | 39 ++ docker/tempo/tempo.yaml | 95 +++++ docker/thanos/bucket.yaml | 9 + docker/thanos/compose.yaml | 60 +++ 16 files changed, 1169 insertions(+), 203 deletions(-) create mode 100644 docker/alloy/compose.yaml create mode 100644 docker/alloy/config.alloy create mode 100644 docker/cadvisor/compose.yaml create mode 100644 docker/loki/compose.yaml create mode 100644 docker/loki/loki.yaml create mode 100644 docker/loki/promtail.yaml create mode 100644 docker/prometheus/compose.yaml create mode 100644 docker/prometheus/prometheus.yml create mode 100644 docker/pyroscope/compose.yaml create mode 100644 docker/pyroscope/pyroscope.yml create mode 100644 docker/tempo/compose.yaml create mode 100644 docker/tempo/tempo.yaml create mode 100644 docker/thanos/bucket.yaml create mode 100644 docker/thanos/compose.yaml diff --git a/docker/alloy/compose.yaml b/docker/alloy/compose.yaml new file mode 100644 index 0000000..b73e6ef --- /dev/null +++ b/docker/alloy/compose.yaml @@ -0,0 +1,27 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + alloy: + image: grafana/alloy:latest + restart: on-failure + volumes: + - ./config.alloy:/etc/alloy/config.alloy + environment: + REMOTE_WRITE_HOST: 172.10.10.6:30291 + LOKI_HOST: 172.10.10.6:30501 + TEMPO_HOST: 172.10.10.6:30641 + PYROSCOPE_HOST: pyroscope:4040 + command: + - run + - /etc/alloy/config.alloy + - --storage.path=/var/lib/alloy/data + - --server.http.listen-addr=0.0.0.0:12345 + - --stability.level=experimental + ports: + - "12345:12345" + - "4319:4319" + - "4017:4017" + - "4018:4018" \ No newline at end of file diff --git a/docker/alloy/config.alloy b/docker/alloy/config.alloy new file mode 100644 index 0000000..14894cb --- /dev/null +++ b/docker/alloy/config.alloy @@ -0,0 +1,102 @@ +logging { + level = "debug" + + // Forward internal logs to the local Loki instance. + write_to = [loki.relabel.alloy_logs.receiver] +} + +loki.relabel "alloy_logs" { + rule { + target_label = "instance" + replacement = constants.hostname + } + + rule { + target_label = "job" + replacement = "integrations/self" + } + + forward_to = [loki.write.loki.receiver] +} + +tracing { + // Write all spans. Don't do this in production! + sampling_fraction = 1.0 + + // Forward internal spans to the local Tempo instance. + write_to = [otelcol.exporter.otlp.tempo.input] +} + +prometheus.exporter.self "alloy" {} +prometheus.scrape "alloy" { + targets = prometheus.exporter.self.alloy.targets + forward_to = [prometheus.remote_write.mimir.receiver] +} + +pyroscope.scrape "default" { + targets = [ + {"__address__" = "localhost:12345", "service_name" = "alloy"}, + ] + forward_to = [pyroscope.write.pyroscope.receiver] +} + +prometheus.remote_write "mimir" { + endpoint { + url = string.format( + "http://%s/api/v1/receive", + coalesce(sys.env("REMOTE_WRITE_HOST"), "localhost:9009"), + ) + } +} + +loki.write "loki" { + endpoint { + url = string.format( + "http://%s/loki/api/v1/push", + coalesce(sys.env("LOKI_HOST"), "localhost:3100"), + ) + } +} + +otelcol.receiver.otlp "default" { + grpc { + endpoint = "alloy:4017" + } + + http { + endpoint = "alloy:4018" + } + + output { + metrics = [otelcol.exporter.otlp.tempo.input] + logs = [otelcol.exporter.otlp.tempo.input] + traces = [otelcol.exporter.otlp.tempo.input] + } +} + +otelcol.exporter.otlp "tempo" { + client { + endpoint = coalesce(sys.env("TEMPO_HOST"), "localhost:4317") + + wait_for_ready = true + + tls { + insecure = true + } + } + + sending_queue { + enabled = false + num_consumers = 100 + queue_size = 10000 + } +} + +pyroscope.write "pyroscope" { + endpoint { + url = string.format( + "http://%s", + coalesce(sys.env("PYROSCOPE_HOST"), "localhost:4040"), + ) + } +} diff --git a/docker/cadvisor/compose.yaml b/docker/cadvisor/compose.yaml new file mode 100644 index 0000000..cef8a73 --- /dev/null +++ b/docker/cadvisor/compose.yaml @@ -0,0 +1,21 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + cadvisor: + image: "gcr.io/cadvisor/cadvisor:v0.49.1" + container_name: cadvisor + privileged: true + devices: + - "/dev/kmsg:/dev/kmsg" + volumes: + - "/:/rootfs:ro" + - "/var/run:/var/run:ro" + - "/sys:/sys:ro" + - "/var/lib/docker/:/var/lib/docker:ro" + - "/dev/disk/:/dev/disk:ro" + ports: + - "9070:8080" + restart: always \ No newline at end of file diff --git a/docker/grafana/compose.yaml b/docker/grafana/compose.yaml index 1c70d09..bce5f80 100644 --- a/docker/grafana/compose.yaml +++ b/docker/grafana/compose.yaml @@ -1,213 +1,26 @@ +networks: + default: + name: eigen-monitoring + external: true + services: grafana: image: grafana/grafana:latest restart: unless-stopped + labels: + - "traefik.enable=true" + - "traefik.http.services.grafana.loadbalancer.server.port=3000" + - "traefik.http.routers.grafana.rule=Host(`grafana.eigen.research`)" + - "traefik.http.routers.grafana.tls=true" + - "traefik.http.routers.grafana.service=grafana" + - "traefik.http.routers.grafana.entryPoints=web,websecure" volumes: - - ./data/grafana:/var/lib/grafana + - ./.data:/var/lib/grafana environment: - GF_SECURITY_ADMIN_PASSWORD=ZTWTWXeZhFs4wg6vMr7M - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_DOMAIN=old.grafana.eigen.co.id - - GF_SERVER_ROOT_URL=https://old.grafana.eigen.co.id + - GF_SERVER_ROOT_URL=http://grafana.eigen.research - GF_INSTALL_PLUGINS=grafana-pyroscope-app - GF_FEATURE_TOGGLES_ENABLE=traceToProfiles tracesEmbeddedFlameGraph - - GF_SMTP_ENABLED=false - ports: - - "3000:3000" - - pyroscope: - image: grafana/pyroscope:latest - environment: - JAEGER_AGENT_HOST: distributor - JAEGER_SAMPLER_TYPE: const - JAEGER_SAMPLER_PARAM: 1 - command: ["-config.file=/etc/pyroscope.yml"] - ports: - - "4040:4040" - volumes: - - ./config/pyroscope.yml:/etc/pyroscope.yml - - memcached: - image: bitnami/memcached:latest - container_name: memcached - ports: - - "11211:11211" - environment: - - MEMCACHED_CACHE_SIZE=128 - - MEMCACHED_THREADS=4 - - init-tempo: - image: &tempoImage grafana/tempo:latest - user: root - entrypoint: - - "chown" - - "10001:10001" - - "/var/tempo" - volumes: - - ./data/tempo:/var/tempo - - tempo: - image: *tempoImage - command: [ "-config.file=/etc/tempo.yaml" ] - volumes: - - ./config/tempo-standalone.yaml:/etc/tempo.yaml - - ./data/tempo:/var/tempo - ports: - - "14268:14268" # jaeger ingest - - "3200:3200" # tempo - - "9095:9095" # tempo grpc - - "9411:9411" # zipkin - depends_on: - - init-tempo - - memcached - - alloy: - image: grafana/alloy:latest - restart: on-failure - volumes: - - ./config/config.alloy:/etc/alloy/config.alloy - environment: - REMOTE_WRITE_HOST: 172.10.10.6:30291 - LOKI_HOST: 172.10.10.6:30501 - TEMPO_HOST: 172.10.10.6:30641 - PYROSCOPE_HOST: pyroscope:4040 - depends_on: - # - thanos-receiver - # - loki - # - tempo - - pyroscope - command: - - run - - /etc/alloy/config.alloy - - --storage.path=/var/lib/alloy/data - - --server.http.listen-addr=0.0.0.0:12345 - - --stability.level=experimental - ports: - - "12345:12345" - - "4319:4319" - - "4017:4017" - - "4018:4018" - - loki: - image: grafana/loki:3.0.0 - container_name: loki - volumes: - - ./config/loki:/mnt/config - ports: - - "3100:3100" - command: -config.file=/mnt/config/loki-config.yaml - - promtail: - image: grafana/promtail:3.0.0 - container_name: promtail - volumes: - - ./config/loki:/mnt/config - - /var/log:/var/log - depends_on: - - loki - command: -config.file=/mnt/config/promtail-config.yaml - - prometheus: - image: prom/prometheus:latest - restart: unless-stopped - user: root - volumes: - - ./config/prometheus.yml:/etc/prometheus/prometheus.yml - - ./data/prometheus:/prometheus - # Credentials - - ./credentials/kubernetes-eigen-core:/credentials/kubernetes-eigen-core - - ./credentials/kubernetes-eigen-internal:/credentials/kubernetes-eigen-internal - - ./credentials/kubernetes-eigen-external:/credentials/kubernetes-eigen-external - - ./credentials/kubernetes-pmps-local:/credentials/kubernetes-pmps-local - - ./credentials/kubernetes-ifgf-jakarta:/credentials/kubernetes-ifgf-jakarta - command: - - '--config.file=/etc/prometheus/prometheus.yml' - - '--storage.tsdb.retention.time=60d' - - '--storage.tsdb.min-block-duration=2h' - - '--storage.tsdb.max-block-duration=2h' - - '--storage.tsdb.path=/prometheus' - - '--web.console.libraries=/etc/prometheus/console_libraries' - - '--web.console.templates=/etc/prometheus/consoles' - - '--web.enable-lifecycle' - - '--web.enable-admin-api' - - '--web.enable-remote-write-receiver' - - '--enable-feature=native-histograms' - ports: - - "9090:9090" - #depends_on: - # - thanos-receiver - - thanos-query: - image: thanosio/thanos:v0.36.1 - user: root - command: - - query - - --http-address=:19192 - - --grpc-address=:19092 - - --endpoint=thanos-store:19090 - - --endpoint=thanos-receiver:10907 - #- --query.auto-downsampling - #- --query.max-concurrent-select=10 - #- --query.max-concurrent=50 - #- --query.timeout=1440m - #- --query.partial-response - - thanos-store: - image: thanosio/thanos:v0.36.1 - user: root - command: - - store - - --data-dir=/data - - --objstore.config-file=/etc/thanos/bucket.yaml - - --http-address=:19191 - - --grpc-address=:19090 - volumes: - - ./data/thanos/store:/data - - ./config/bucket.yaml:/etc/thanos/bucket.yaml - - thanos-compactor: - image: thanosio/thanos:v0.36.1 - user: root - command: - - compact - - --data-dir=/data - - --objstore.config-file=/etc/thanos/bucket.yaml - - --wait - - --wait-interval=5m - volumes: - - ./data/thanos/compactor:/data - - ./config/bucket.yaml:/etc/thanos/bucket.yaml - - thanos-receiver: - image: thanosio/thanos:v0.36.1 - user: root - command: - - receive - - --grpc-address=:10907 - - --http-address=:10909 - - --tsdb.path=/data - - --receive.local-endpoint=127.0.0.1:10907 - - --objstore.config-file=/etc/thanos/bucket.yaml - - --label=receive_instance_id="thanos-receiver-1" - - --remote-write.address=:10908 - ports: - - "10908:10908" - volumes: - - ./data/thanos/receiver:/data/default-tenant - - ./config/bucket.yaml:/etc/thanos/bucket.yaml - - cadvisor: - image: "gcr.io/cadvisor/cadvisor:v0.49.1" - container_name: cadvisor - privileged: true - devices: - - "/dev/kmsg:/dev/kmsg" - volumes: - - "/:/rootfs:ro" - - "/var/run:/var/run:ro" - - "/sys:/sys:ro" - - "/var/lib/docker/:/var/lib/docker:ro" - - "/dev/disk/:/dev/disk:ro" - ports: - - "9070:8080" - restart: always \ No newline at end of file + - GF_SMTP_ENABLED=false \ No newline at end of file diff --git a/docker/loki/compose.yaml b/docker/loki/compose.yaml new file mode 100644 index 0000000..f1ae608 --- /dev/null +++ b/docker/loki/compose.yaml @@ -0,0 +1,24 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + volumes: + - ./loki.yaml:/mnt/config/loki.yaml + ports: + - "3100:3100" + command: -config.file=/mnt/config/loki.yaml + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + volumes: + - ./promtail.yaml:/mnt/config/promtail.yaml + - /var/log:/var/log + depends_on: + - loki + command: -config.file=/mnt/config/promtail.yaml diff --git a/docker/loki/loki.yaml b/docker/loki/loki.yaml new file mode 100644 index 0000000..0aa75f6 --- /dev/null +++ b/docker/loki/loki.yaml @@ -0,0 +1,58 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +storage_config: + aws: + s3: https://eigen:secret@api.minio.eigen.co.id:443 + s3forcepathstyle: true + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 +# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration +# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/ +# +# Statistics help us better understand how Loki is used, and they show us performance +# levels for most users. This helps us prioritize features and documentation. +# For more information on what's sent, look at +# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go +# Refer to the buildReport method to see what goes into a report. +# +# If you would like to disable reporting, uncomment the following lines: +#analytics: +# reporting_enabled: false diff --git a/docker/loki/promtail.yaml b/docker/loki/promtail.yaml new file mode 100644 index 0000000..fda5e0d --- /dev/null +++ b/docker/loki/promtail.yaml @@ -0,0 +1,22 @@ + server: + http_listen_port: 9080 + grpc_listen_port: 0 + + positions: + filename: /tmp/positions.yaml + + clients: + - url: http://loki:3100/loki/api/v1/push + + scrape_configs: + - job_name: system + static_configs: + - targets: + - localhost + labels: + job: varlogs + __path__: /var/log/*log + + limits_config: + readline_rate_enabled: true + max_line_size: 256Kb diff --git a/docker/minio/compose.yaml b/docker/minio/compose.yaml index 8011f4d..7e4ee54 100644 --- a/docker/minio/compose.yaml +++ b/docker/minio/compose.yaml @@ -27,7 +27,7 @@ services: - "./.data:/data" environment: MINIO_ROOT_USER: eigen - MINIO_ROOT_PASSWORD: Eigen3m! + MINIO_ROOT_PASSWORD: secret MINIO_SERVER_URL: http://minio:9000 MINIO_BROWSER_REDIRECT_URL: http://console.eigen.research MINIO_SITE_REGION: ap-indonesia-1 diff --git a/docker/prometheus/compose.yaml b/docker/prometheus/compose.yaml new file mode 100644 index 0000000..70e1c5f --- /dev/null +++ b/docker/prometheus/compose.yaml @@ -0,0 +1,26 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + prometheus: + image: prom/prometheus:latest + restart: unless-stopped + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - ./.data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=60d' + - '--storage.tsdb.min-block-duration=2h' + - '--storage.tsdb.max-block-duration=2h' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--web.enable-lifecycle' + - '--web.enable-admin-api' + - '--web.enable-remote-write-receiver' + - '--enable-feature=native-histograms' + ports: + - "9090:9090" \ No newline at end of file diff --git a/docker/prometheus/prometheus.yml b/docker/prometheus/prometheus.yml new file mode 100644 index 0000000..4634058 --- /dev/null +++ b/docker/prometheus/prometheus.yml @@ -0,0 +1,647 @@ +global: + scrape_interval: 15s + external_labels: + cluster: "id-prometheus-1" + +scrape_configs: + # - job_name: "kubernetes-eigen-core-metrics-state" + # metrics_path: /metrics + # scheme: http + # static_configs: + # - targets: ["172.10.10.3:30080"] + # labels: + # cluster: "kubernetes-eigen-core" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.3:.*" + # target_label: instance + # replacement: "eigen-master-1" + # - job_name: "kubernetes-eigen-core-metrics-kubelet" + # scheme: https + # metrics_path: /metrics + # tls_config: + # ca_file: /credentials/kubernetes-eigen-core/ca.crt + # cert_file: /credentials/kubernetes-eigen-core/client.crt + # key_file: /credentials/kubernetes-eigen-core/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.3:10250"] + # labels: + # cluster: "kubernetes-eigen-core" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.3:.*" + # target_label: instance + # replacement: "eigen-master-1" + # - job_name: "kubernetes-eigen-core-metrics-resource" + # scheme: https + # metrics_path: /metrics/resource + # tls_config: + # ca_file: /credentials/kubernetes-eigen-core/ca.crt + # cert_file: /credentials/kubernetes-eigen-core/client.crt + # key_file: /credentials/kubernetes-eigen-core/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.3:10250"] + # labels: + # cluster: "kubernetes-eigen-core" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.3:.*" + # target_label: instance + # replacement: "eigen-master-1" + # - job_name: "kubernetes-eigen-core-metrics-cadvisor" + # scheme: https + # metrics_path: /metrics/cadvisor + # tls_config: + # ca_file: /credentials/kubernetes-eigen-core/ca.crt + # cert_file: /credentials/kubernetes-eigen-core/client.crt + # key_file: /credentials/kubernetes-eigen-core/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.3:10250"] + # labels: + # cluster: "kubernetes-eigen-core" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.3:.*" + # target_label: instance + # replacement: "eigen-master-1" + + # - job_name: "kubernetes-eigen-internal-metrics-state" + # metrics_path: /metrics + # scheme: http + # static_configs: + # - targets: ["172.10.10.4:30080"] + # labels: + # cluster: "kubernetes-eigen-internal" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.4:.*" + # target_label: instance + # replacement: "eigen-master-2" + # - job_name: "kubernetes-eigen-internal-metrics-kubelet" + # scheme: https + # metrics_path: /metrics + # tls_config: + # ca_file: /credentials/kubernetes-eigen-internal/ca.crt + # cert_file: /credentials/kubernetes-eigen-internal/client.crt + # key_file: /credentials/kubernetes-eigen-internal/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.4:10250"] + # labels: + # cluster: "kubernetes-eigen-internal" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.4:.*" + # target_label: instance + # replacement: "eigen-master-2" + # - job_name: "kubernetes-eigen-internal-metrics-resource" + # scheme: https + # metrics_path: /metrics/resource + # tls_config: + # ca_file: /credentials/kubernetes-eigen-internal/ca.crt + # cert_file: /credentials/kubernetes-eigen-internal/client.crt + # key_file: /credentials/kubernetes-eigen-internal/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.4:10250"] + # labels: + # cluster: "kubernetes-eigen-internal" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.4:.*" + # target_label: instance + # replacement: "eigen-master-2" + # - job_name: "kubernetes-eigen-internal-metrics-cadvisor" + # scheme: https + # metrics_path: /metrics/cadvisor + # tls_config: + # ca_file: /credentials/kubernetes-eigen-internal/ca.crt + # cert_file: /credentials/kubernetes-eigen-internal/client.crt + # key_file: /credentials/kubernetes-eigen-internal/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.4:10250"] + # labels: + # cluster: "kubernetes-eigen-internal" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.4:.*" + # target_label: instance + # replacement: "eigen-master-2" + + # - job_name: "kubernetes-eigen-external-metrics-state" + # metrics_path: /metrics + # scheme: http + # static_configs: + # - targets: ["172.10.10.5:30080"] + # labels: + # cluster: "kubernetes-eigen-external" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.5:.*" + # target_label: instance + # replacement: "eigen-master-3" + # - job_name: "kubernetes-eigen-external-metrics-kubelet" + # scheme: https + # metrics_path: /metrics + # tls_config: + # ca_file: /credentials/kubernetes-eigen-external/ca.crt + # cert_file: /credentials/kubernetes-eigen-external/client.crt + # key_file: /credentials/kubernetes-eigen-external/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.5:10250"] + # labels: + # cluster: "kubernetes-eigen-external" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.5:.*" + # target_label: instance + # replacement: "eigen-master-3" + # - job_name: "kubernetes-eigen-external-metrics-resource" + # scheme: https + # metrics_path: /metrics/resource + # tls_config: + # ca_file: /credentials/kubernetes-eigen-external/ca.crt + # cert_file: /credentials/kubernetes-eigen-external/client.crt + # key_file: /credentials/kubernetes-eigen-external/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.5:10250"] + # labels: + # cluster: "kubernetes-eigen-external" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.5:.*" + # target_label: instance + # replacement: "eigen-master-3" + # - job_name: "kubernetes-eigen-external-metrics-cadvisor" + # scheme: https + # metrics_path: /metrics/cadvisor + # tls_config: + # ca_file: /credentials/kubernetes-eigen-external/ca.crt + # cert_file: /credentials/kubernetes-eigen-external/client.crt + # key_file: /credentials/kubernetes-eigen-external/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.10.5:10250"] + # labels: + # cluster: "kubernetes-eigen-external" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.10.5:.*" + # target_label: instance + # replacement: "eigen-master-3" + + # - job_name: "kubernetes-ifgf-jakarta-metrics-state" + # metrics_path: /metrics + # scheme: http + # static_configs: + # - targets: ["139.162.15.217:30081"] + # labels: + # cluster: "kubernetes-ifgf-jakarta" + # relabel_configs: + # - source_labels: [__address__] + # regex: "139.162.15.217:.*" + # target_label: instance + # replacement: "ifgf-sg-1" + # - job_name: "kubernetes-ifgf-jakarta-metrics-kubelet" + # scheme: https + # metrics_path: /metrics + # authorization: + # type: Bearer + # credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K + # # credentials_file: /credentials/kubernetes-ifgf-jakarta/token + # tls_config: + # ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt + # insecure_skip_verify: true + # static_configs: + # - targets: ["139.162.15.217:16443"] + # labels: + # cluster: "kubernetes-ifgf-jakarta" + # relabel_configs: + # - source_labels: [__address__] + # regex: "139.162.15.217:.*" + # target_label: instance + # replacement: "ifgf-sg-1" + # - job_name: "kubernetes-ifgf-jakarta-metrics-resource" + # scheme: https + # metrics_path: /metrics/resource + # authorization: + # type: Bearer + # credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K + # # credentials_file: /credentials/kubernetes-ifgf-jakarta/token + # tls_config: + # ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt + # insecure_skip_verify: true + # static_configs: + # - targets: ["139.162.15.217:10250"] + # labels: + # cluster: "kubernetes-ifgf-jakarta" + # relabel_configs: + # - source_labels: [__address__] + # regex: "139.162.15.217:.*" + # target_label: instance + # replacement: "ifgf-sg-1" + # - job_name: "kubernetes-ifgf-jakarta-metrics-cadvisor" + # scheme: https + # metrics_path: /metrics/cadvisor + # authorization: + # type: Bearer + # credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K + # # credentials_file: /credentials/kubernetes-ifgf-jakarta/token + # tls_config: + # ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt + # insecure_skip_verify: true + # static_configs: + # - targets: ["139.162.15.217:10250"] + # labels: + # cluster: "kubernetes-ifgf-jakarta" + # relabel_configs: + # - source_labels: [__address__] + # regex: "139.162.15.217:.*" + # target_label: instance + # replacement: "ifgf-sg-1" + + # - job_name: "kubernetes-pmps-local-metrics-state" + # metrics_path: /metrics + # scheme: http + # static_configs: + # - targets: + # [ + # "172.10.11.2:30080", + # "172.10.11.3:30080", + # "172.10.11.4:30080", + # "172.10.11.5:30080", + # "172.10.11.6:30080", + # "172.10.11.7:30080", + # ] + # labels: + # cluster: "kubernetes-pmps-local" + # relabel_configs: + # # Master 1 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.2:.*|.*172.10.11.2.*|.*172.10.11.2.*" + # target_label: instance + # replacement: "pmps-master-1" + # # Master 2 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.3:.*|.*172.10.11.3.*|.*172.10.11.3.*" + # target_label: instance + # replacement: "pmps-master-2" + # # Worker 1 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.4:.*|.*172.10.11.4.*|.*172.10.11.4.*" + # target_label: instance + # replacement: "pmps-worker-1" + # # Worker 2 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.5:.*|.*172.10.11.5.*|.*172.10.11.5.*" + # target_label: instance + # replacement: "pmps-worker-2" + # # Worker 3 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.6:.*|.*172.10.11.6.*|.*172.10.11.6.*" + # target_label: instance + # replacement: "pmps-worker-3" + # # Worker 4 + # - action: replace + # source_labels: [host_ip, internal_ip] + # regex: "172.10.11.7:.*|.*172.10.11.7.*|.*172.10.11.7.*" + # target_label: instance + # replacement: "pmps-worker-4" + # - job_name: "kubernetes-pmps-local-metrics-kubelet" + # scheme: https + # metrics_path: /metrics + # tls_config: + # ca_file: /credentials/kubernetes-pmps-local/ca.crt + # cert_file: /credentials/kubernetes-pmps-local/client.crt + # key_file: /credentials/kubernetes-pmps-local/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.11.2:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.3:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.4:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.5:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.6:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.7:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.11.2:.*" + # target_label: instance + # replacement: "pmps-master-1" + # - source_labels: [__address__] + # regex: "172.10.11.3:.*" + # target_label: instance + # replacement: "pmps-master-2" + # - source_labels: [__address__] + # regex: "172.10.11.4:.*" + # target_label: instance + # replacement: "pmps-worker-1" + # - source_labels: [__address__] + # regex: "172.10.11.5:.*" + # target_label: instance + # replacement: "pmps-worker-2" + # - source_labels: [__address__] + # regex: "172.10.11.6:.*" + # target_label: instance + # replacement: "pmps-worker-3" + # - source_labels: [__address__] + # regex: "172.10.11.7:.*" + # target_label: instance + # replacement: "pmps-worker-4" + # - job_name: "kubernetes-pmps-local-metrics-resource" + # scheme: https + # metrics_path: /metrics/resource + # tls_config: + # ca_file: /credentials/kubernetes-pmps-local/ca.crt + # cert_file: /credentials/kubernetes-pmps-local/client.crt + # key_file: /credentials/kubernetes-pmps-local/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.11.2:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.3:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.4:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.5:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.6:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.7:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.11.2:.*" + # target_label: instance + # replacement: "pmps-master-1" + # - source_labels: [__address__] + # regex: "172.10.11.3:.*" + # target_label: instance + # replacement: "pmps-master-2" + # - source_labels: [__address__] + # regex: "172.10.11.4:.*" + # target_label: instance + # replacement: "pmps-worker-1" + # - source_labels: [__address__] + # regex: "172.10.11.5:.*" + # target_label: instance + # replacement: "pmps-worker-2" + # - source_labels: [__address__] + # regex: "172.10.11.6:.*" + # target_label: instance + # replacement: "pmps-worker-3" + # - source_labels: [__address__] + # regex: "172.10.11.7:.*" + # target_label: instance + # replacement: "pmps-worker-4" + # - job_name: "kubernetes-pmps-local-metrics-cadvisor" + # scheme: https + # metrics_path: /metrics/cadvisor + # tls_config: + # ca_file: /credentials/kubernetes-pmps-local/ca.crt + # cert_file: /credentials/kubernetes-pmps-local/client.crt + # key_file: /credentials/kubernetes-pmps-local/client.key + # insecure_skip_verify: true + # static_configs: + # - targets: ["172.10.11.2:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.3:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.4:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.5:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.6:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # - targets: ["172.10.11.7:10250"] + # labels: + # cluster: "kubernetes-pmps-local" + # relabel_configs: + # - source_labels: [__address__] + # regex: "172.10.11.2:.*" + # target_label: instance + # replacement: "pmps-master-1" + # - source_labels: [__address__] + # regex: "172.10.11.3:.*" + # target_label: instance + # replacement: "pmps-master-2" + # - source_labels: [__address__] + # regex: "172.10.11.4:.*" + # target_label: instance + # replacement: "pmps-worker-1" + # - source_labels: [__address__] + # regex: "172.10.11.5:.*" + # target_label: instance + # replacement: "pmps-worker-2" + # - source_labels: [__address__] + # regex: "172.10.11.6:.*" + # target_label: instance + # replacement: "pmps-worker-3" + # - source_labels: [__address__] + # regex: "172.10.11.7:.*" + # target_label: instance + # replacement: "pmps-worker-4" + + # - job_name: "tempo" + # static_configs: + # - targets: + # - "tempo:3200" + + # - job_name: minio-job + # metrics_path: /minio/v2/metrics/cluster + # scheme: http + # static_configs: + # - targets: ["172.10.10.2:5000"] + # - job_name: "ifgf-semarang" + # static_configs: + # - targets: ["165.232.160.64:31110"] + # - job_name: "phillipworks" + # static_configs: + # - targets: ["54.151.227.26:9100"] + + # - job_name: "eigen" + # static_configs: + # - targets: ["172.10.10.2:9100"] + # labels: + # instance: "eigen-storage-1" + # - targets: ["172.10.10.3:9100"] + # labels: + # cluster: "kubernetes-eigen-core" + # instance: "eigen-master-1" + # - targets: ["172.10.10.4:9100"] + # labels: + # cluster: "kubernetes-eigen-internal" + # instance: "eigen-master-2" + # - targets: ["172.10.10.5:9100"] + # labels: + # cluster: "kubernetes-eigen-external" + # instance: "eigen-master-3" + # - targets: ["172.10.10.10:9100"] + # labels: + # instance: "eigen-docker-1" + + - job_name: "cadvisor" + static_configs: + - targets: ["cadvisor:8080"] + labels: + instance: "eigen-storage-1" + - targets: ["172.10.10.10:9070"] + labels: + instance: "eigen-docker-1" + + # - job_name: "traefik" + # static_configs: + # - targets: ["172.10.10.10:8082"] + # labels: + # instance: "eigen-docker-1" + + # - job_name: "pipamas" + # static_configs: + # - targets: ["172.10.11.2:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-master-1" + # - targets: ["172.10.11.3:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-master-2" + # - targets: ["172.10.11.4:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-worker-1" + # - targets: ["172.10.11.5:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-worker-2" + # - targets: ["172.10.11.6:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-worker-3" + # - targets: ["172.10.11.7:9100"] + # labels: + # cluster: "kubernetes-pmps-local" + # instance: "pmps-worker-4" + + # - job_name: "postgresql-exporter" + # static_configs: + # - targets: ["172.10.10.4:30187"] + # labels: + # namespace: "eigen-erp-test" + # kubernetes_namespace: "eigen-erp-test" + # cluster: "kubernetes-eigen-internal" + # instance: "eigen-master-2" + # release: "postgresql" + # - targets: ["172.10.11.6:30187"] + # labels: + # namespace: "erp-db-postgresql" + # kubernetes_namespace: "erp-db-postgresql" + # cluster: "kubernetes-pmps-local" + # instance: "pmps-worker-3" + # release: "postgresql" + # - targets: ["172.10.10.5:30189"] + # labels: + # namespace: "weplay-pos-testing" + # kubernetes_namespace: "weplay-pos-testing" + # cluster: "kubernetes-eigen-external" + # instance: "eigen-master-3" + # release: "postgresql" + # - targets: ["172.10.10.5:30188"] + # labels: + # namespace: "wg-testing" + # kubernetes_namespace: "wg-testing" + # cluster: "kubernetes-eigen-external" + # instance: "eigen-master-3" + # release: "postgresql" + # - targets: ["172.10.10.5:30187"] + # labels: + # namespace: "ijem-testing" + # kubernetes_namespace: "ijem-testing" + # cluster: "kubernetes-eigen-external" + # instance: "eigen-master-3" + # release: "postgresql" + + # - job_name: "pipamas-tracking-system" + # static_configs: + # - targets: ["10.1.0.101:9100"] + - job_name: "process-exporter" + static_configs: + - targets: ["172.10.10.2:9256"] + labels: + cluster: "eigen-storage-1" + instance: "172.10.10.2" + - job_name: "node-exporter" + static_configs: + - targets: ["10.1.0.101:9100"] + labels: + cluster: "pipamas-tracking-system" + instance: "10.1.0.101" + - targets: ["54.151.227.26:9100"] + labels: + cluster: "phillipworks" + instance: "54.151.227.26" + # - job_name: "ifgf-bandung" + # static_configs: + # - targets: ["172.105.126.186:9100"] + # - job_name: "ifgf-jakarta" + # static_configs: + # - targets: ["139.162.15.217:9100"] + # labels: + # cluster: "kubernetes-ifgf-jakarta" + # instance: "ifgf-sg-1" + # - job_name: "maja" + # static_configs: + # - targets: ["147.93.29.222:9100"] + # - job_name: "ifgf-global" + # static_configs: + # - targets: ["192.53.116.11:9100"] + #- job_name: "benchmark-maja-production" + # static_configs: + # - targets: ['34.87.148.13:9100'] +remote_write: + # - url: "http://thanos-receiver:10908/api/v1/receive" + #write_relabel_configs: + # - source_labels: [__name__] + # regex: ".*" + # action: keep + #queue_config: + # batch_send_deadline: 5s + # max_samples_per_send: 500 + # capacity: 2500 + # min_shards: 1 + # max_shards: 100 + - url: "http://172.10.10.6:30291/api/v1/receive" +#feature_gates: +# enable_native_histograms: true diff --git a/docker/pyroscope/compose.yaml b/docker/pyroscope/compose.yaml new file mode 100644 index 0000000..b162cea --- /dev/null +++ b/docker/pyroscope/compose.yaml @@ -0,0 +1,17 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + pyroscope: + image: grafana/pyroscope:latest + environment: + JAEGER_AGENT_HOST: distributor + JAEGER_SAMPLER_TYPE: const + JAEGER_SAMPLER_PARAM: 1 + command: ["-config.file=/etc/pyroscope.yml"] + ports: + - "4040:4040" + volumes: + - ./pyroscope.yml:/etc/pyroscope.yml \ No newline at end of file diff --git a/docker/pyroscope/pyroscope.yml b/docker/pyroscope/pyroscope.yml new file mode 100644 index 0000000..4741c7b --- /dev/null +++ b/docker/pyroscope/pyroscope.yml @@ -0,0 +1,6 @@ +tracing: + enabled: true + profiling_enabled: true + +pyroscopedb: + max_block_duration: 5m diff --git a/docker/tempo/compose.yaml b/docker/tempo/compose.yaml new file mode 100644 index 0000000..f2b1b4f --- /dev/null +++ b/docker/tempo/compose.yaml @@ -0,0 +1,39 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + memcached: + image: bitnami/memcached:latest + container_name: memcached + ports: + - "11211:11211" + environment: + - MEMCACHED_CACHE_SIZE=128 + - MEMCACHED_THREADS=4 + + init-tempo: + image: &tempoImage grafana/tempo:latest + user: root + entrypoint: + - "chown" + - "10001:10001" + - "/var/tempo" + volumes: + - ./.data:/var/tempo + + tempo: + image: *tempoImage + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./tempo.yaml:/etc/tempo.yaml + - ./.data:/var/tempo + ports: + - "14268:14268" # jaeger ingest + - "3200:3200" # tempo + - "9095:9095" # tempo grpc + - "9411:9411" # zipkin + depends_on: + - init-tempo + - memcached diff --git a/docker/tempo/tempo.yaml b/docker/tempo/tempo.yaml new file mode 100644 index 0000000..cf33f01 --- /dev/null +++ b/docker/tempo/tempo.yaml @@ -0,0 +1,95 @@ +stream_over_http_enabled: true +server: + http_listen_port: 3200 + log_level: info + +cache: + background: + writeback_goroutines: 5 + caches: + - roles: + - frontend-search + memcached: + host: memcached:11211 + +query_frontend: + search: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + metadata_slo: + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + trace_by_id: + duration_slo: 100ms + metrics: + max_duration: 120h # maximum duration of a metrics query, increase for local setups + query_backend_after: 5m + duration_slo: 5s + throughput_bytes_slo: 1.073741824e+09 + +distributor: + receivers: # this configuration will listen on all ports and protocols that tempo is capable of. + jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can + protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver + thrift_http: # + endpoint: "tempo:14268" # for a production deployment you should only enable the receivers you need! + grpc: + endpoint: "tempo:14250" + thrift_binary: + endpoint: "tempo:6832" + thrift_compact: + endpoint: "tempo:6831" + zipkin: + endpoint: "tempo:9411" + otlp: + protocols: + grpc: + endpoint: "tempo:4317" + http: + endpoint: "tempo:4318" + opencensus: + endpoint: "tempo:55678" + +ingester: + max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally + +compactor: + compaction: + block_retention: 24h # overall Tempo trace retention. set for demo purposes + +metrics_generator: + registry: + external_labels: + source: tempo + cluster: docker-compose + storage: + path: /var/tempo/generator/wal + remote_write: + - url: http://prometheus:9090/api/v1/write + send_exemplars: true + traces_storage: + path: /var/tempo/generator/traces + processor: + local_blocks: + filter_server_spans: false + flush_to_storage: true + +storage: + trace: + backend: s3 + s3: + bucket: tempo + endpoint: api.minio.eigen.co.id + access_key: eigen + secret_key: Eigen3m! + insecure: false + wal: + path: /var/tempo/wal # where to store the wal locally + local: + path: /var/tempo/blocks + +overrides: + defaults: + metrics_generator: + processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator + generate_native_histograms: both diff --git a/docker/thanos/bucket.yaml b/docker/thanos/bucket.yaml new file mode 100644 index 0000000..6cf459b --- /dev/null +++ b/docker/thanos/bucket.yaml @@ -0,0 +1,9 @@ +type: S3 +config: + bucket: "thanos" + endpoint: "minio:9000" + access_key: "eigen" + secret_key: "secret" + insecure: false + trace: + enable: true diff --git a/docker/thanos/compose.yaml b/docker/thanos/compose.yaml new file mode 100644 index 0000000..46729cb --- /dev/null +++ b/docker/thanos/compose.yaml @@ -0,0 +1,60 @@ +networks: + default: + name: eigen-monitoring + external: true + +services: + thanos-query: + image: thanosio/thanos:v0.36.1 + command: + - query + - --http-address=:19192 + - --grpc-address=:19092 + - --endpoint=thanos-store:19090 + - --endpoint=thanos-receiver:10907 + #- --query.auto-downsampling + #- --query.max-concurrent-select=10 + #- --query.max-concurrent=50 + #- --query.timeout=1440m + #- --query.partial-response + + thanos-store: + image: thanosio/thanos:v0.36.1 + command: + - store + - --data-dir=/data + - --objstore.config-file=/etc/thanos/bucket.yaml + - --http-address=:19191 + - --grpc-address=:19090 + volumes: + - ./.data/store:/data + - ./bucket.yaml:/etc/thanos/bucket.yaml + + thanos-compactor: + image: thanosio/thanos:v0.36.1 + command: + - compact + - --data-dir=/data + - --objstore.config-file=/etc/thanos/bucket.yaml + - --wait + - --wait-interval=5m + volumes: + - ./.data/compactor:/data + - ./bucket.yaml:/etc/thanos/bucket.yaml + + thanos-receiver: + image: thanosio/thanos:v0.36.1 + command: + - receive + - --grpc-address=:10907 + - --http-address=:10909 + - --tsdb.path=/data + - --receive.local-endpoint=127.0.0.1:10907 + - --objstore.config-file=/etc/thanos/bucket.yaml + - --label=receive_instance_id="thanos-receiver-1" + - --remote-write.address=:10908 + ports: + - "10908:10908" + volumes: + - ./.data/receiver:/data/default-tenant + - ./bucket.yaml:/etc/thanos/bucket.yaml \ No newline at end of file