wip: full fledge monitoring system

main
Supan Adit Pratama 2025-06-19 15:31:20 +07:00
parent 05c4bc5400
commit 4d6eae8398
16 changed files with 1169 additions and 203 deletions

27
docker/alloy/compose.yaml Normal file
View File

@ -0,0 +1,27 @@
networks:
default:
name: eigen-monitoring
external: true
services:
alloy:
image: grafana/alloy:latest
restart: on-failure
volumes:
- ./config.alloy:/etc/alloy/config.alloy
environment:
REMOTE_WRITE_HOST: 172.10.10.6:30291
LOKI_HOST: 172.10.10.6:30501
TEMPO_HOST: 172.10.10.6:30641
PYROSCOPE_HOST: pyroscope:4040
command:
- run
- /etc/alloy/config.alloy
- --storage.path=/var/lib/alloy/data
- --server.http.listen-addr=0.0.0.0:12345
- --stability.level=experimental
ports:
- "12345:12345"
- "4319:4319"
- "4017:4017"
- "4018:4018"

102
docker/alloy/config.alloy Normal file
View File

@ -0,0 +1,102 @@
logging {
level = "debug"
// Forward internal logs to the local Loki instance.
write_to = [loki.relabel.alloy_logs.receiver]
}
loki.relabel "alloy_logs" {
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
target_label = "job"
replacement = "integrations/self"
}
forward_to = [loki.write.loki.receiver]
}
tracing {
// Write all spans. Don't do this in production!
sampling_fraction = 1.0
// Forward internal spans to the local Tempo instance.
write_to = [otelcol.exporter.otlp.tempo.input]
}
prometheus.exporter.self "alloy" {}
prometheus.scrape "alloy" {
targets = prometheus.exporter.self.alloy.targets
forward_to = [prometheus.remote_write.mimir.receiver]
}
pyroscope.scrape "default" {
targets = [
{"__address__" = "localhost:12345", "service_name" = "alloy"},
]
forward_to = [pyroscope.write.pyroscope.receiver]
}
prometheus.remote_write "mimir" {
endpoint {
url = string.format(
"http://%s/api/v1/receive",
coalesce(sys.env("REMOTE_WRITE_HOST"), "localhost:9009"),
)
}
}
loki.write "loki" {
endpoint {
url = string.format(
"http://%s/loki/api/v1/push",
coalesce(sys.env("LOKI_HOST"), "localhost:3100"),
)
}
}
otelcol.receiver.otlp "default" {
grpc {
endpoint = "alloy:4017"
}
http {
endpoint = "alloy:4018"
}
output {
metrics = [otelcol.exporter.otlp.tempo.input]
logs = [otelcol.exporter.otlp.tempo.input]
traces = [otelcol.exporter.otlp.tempo.input]
}
}
otelcol.exporter.otlp "tempo" {
client {
endpoint = coalesce(sys.env("TEMPO_HOST"), "localhost:4317")
wait_for_ready = true
tls {
insecure = true
}
}
sending_queue {
enabled = false
num_consumers = 100
queue_size = 10000
}
}
pyroscope.write "pyroscope" {
endpoint {
url = string.format(
"http://%s",
coalesce(sys.env("PYROSCOPE_HOST"), "localhost:4040"),
)
}
}

View File

@ -0,0 +1,21 @@
networks:
default:
name: eigen-monitoring
external: true
services:
cadvisor:
image: "gcr.io/cadvisor/cadvisor:v0.49.1"
container_name: cadvisor
privileged: true
devices:
- "/dev/kmsg:/dev/kmsg"
volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
ports:
- "9070:8080"
restart: always

View File

@ -1,213 +1,26 @@
networks:
default:
name: eigen-monitoring
external: true
services:
grafana:
image: grafana/grafana:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
- "traefik.http.routers.grafana.rule=Host(`grafana.eigen.research`)"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.service=grafana"
- "traefik.http.routers.grafana.entryPoints=web,websecure"
volumes:
- ./data/grafana:/var/lib/grafana
- ./.data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=ZTWTWXeZhFs4wg6vMr7M
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_DOMAIN=old.grafana.eigen.co.id
- GF_SERVER_ROOT_URL=https://old.grafana.eigen.co.id
- GF_SERVER_ROOT_URL=http://grafana.eigen.research
- GF_INSTALL_PLUGINS=grafana-pyroscope-app
- GF_FEATURE_TOGGLES_ENABLE=traceToProfiles tracesEmbeddedFlameGraph
- GF_SMTP_ENABLED=false
ports:
- "3000:3000"
pyroscope:
image: grafana/pyroscope:latest
environment:
JAEGER_AGENT_HOST: distributor
JAEGER_SAMPLER_TYPE: const
JAEGER_SAMPLER_PARAM: 1
command: ["-config.file=/etc/pyroscope.yml"]
ports:
- "4040:4040"
volumes:
- ./config/pyroscope.yml:/etc/pyroscope.yml
memcached:
image: bitnami/memcached:latest
container_name: memcached
ports:
- "11211:11211"
environment:
- MEMCACHED_CACHE_SIZE=128
- MEMCACHED_THREADS=4
init-tempo:
image: &tempoImage grafana/tempo:latest
user: root
entrypoint:
- "chown"
- "10001:10001"
- "/var/tempo"
volumes:
- ./data/tempo:/var/tempo
tempo:
image: *tempoImage
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./config/tempo-standalone.yaml:/etc/tempo.yaml
- ./data/tempo:/var/tempo
ports:
- "14268:14268" # jaeger ingest
- "3200:3200" # tempo
- "9095:9095" # tempo grpc
- "9411:9411" # zipkin
depends_on:
- init-tempo
- memcached
alloy:
image: grafana/alloy:latest
restart: on-failure
volumes:
- ./config/config.alloy:/etc/alloy/config.alloy
environment:
REMOTE_WRITE_HOST: 172.10.10.6:30291
LOKI_HOST: 172.10.10.6:30501
TEMPO_HOST: 172.10.10.6:30641
PYROSCOPE_HOST: pyroscope:4040
depends_on:
# - thanos-receiver
# - loki
# - tempo
- pyroscope
command:
- run
- /etc/alloy/config.alloy
- --storage.path=/var/lib/alloy/data
- --server.http.listen-addr=0.0.0.0:12345
- --stability.level=experimental
ports:
- "12345:12345"
- "4319:4319"
- "4017:4017"
- "4018:4018"
loki:
image: grafana/loki:3.0.0
container_name: loki
volumes:
- ./config/loki:/mnt/config
ports:
- "3100:3100"
command: -config.file=/mnt/config/loki-config.yaml
promtail:
image: grafana/promtail:3.0.0
container_name: promtail
volumes:
- ./config/loki:/mnt/config
- /var/log:/var/log
depends_on:
- loki
command: -config.file=/mnt/config/promtail-config.yaml
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
user: root
volumes:
- ./config/prometheus.yml:/etc/prometheus/prometheus.yml
- ./data/prometheus:/prometheus
# Credentials
- ./credentials/kubernetes-eigen-core:/credentials/kubernetes-eigen-core
- ./credentials/kubernetes-eigen-internal:/credentials/kubernetes-eigen-internal
- ./credentials/kubernetes-eigen-external:/credentials/kubernetes-eigen-external
- ./credentials/kubernetes-pmps-local:/credentials/kubernetes-pmps-local
- ./credentials/kubernetes-ifgf-jakarta:/credentials/kubernetes-ifgf-jakarta
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=60d'
- '--storage.tsdb.min-block-duration=2h'
- '--storage.tsdb.max-block-duration=2h'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
- '--web.enable-remote-write-receiver'
- '--enable-feature=native-histograms'
ports:
- "9090:9090"
#depends_on:
# - thanos-receiver
thanos-query:
image: thanosio/thanos:v0.36.1
user: root
command:
- query
- --http-address=:19192
- --grpc-address=:19092
- --endpoint=thanos-store:19090
- --endpoint=thanos-receiver:10907
#- --query.auto-downsampling
#- --query.max-concurrent-select=10
#- --query.max-concurrent=50
#- --query.timeout=1440m
#- --query.partial-response
thanos-store:
image: thanosio/thanos:v0.36.1
user: root
command:
- store
- --data-dir=/data
- --objstore.config-file=/etc/thanos/bucket.yaml
- --http-address=:19191
- --grpc-address=:19090
volumes:
- ./data/thanos/store:/data
- ./config/bucket.yaml:/etc/thanos/bucket.yaml
thanos-compactor:
image: thanosio/thanos:v0.36.1
user: root
command:
- compact
- --data-dir=/data
- --objstore.config-file=/etc/thanos/bucket.yaml
- --wait
- --wait-interval=5m
volumes:
- ./data/thanos/compactor:/data
- ./config/bucket.yaml:/etc/thanos/bucket.yaml
thanos-receiver:
image: thanosio/thanos:v0.36.1
user: root
command:
- receive
- --grpc-address=:10907
- --http-address=:10909
- --tsdb.path=/data
- --receive.local-endpoint=127.0.0.1:10907
- --objstore.config-file=/etc/thanos/bucket.yaml
- --label=receive_instance_id="thanos-receiver-1"
- --remote-write.address=:10908
ports:
- "10908:10908"
volumes:
- ./data/thanos/receiver:/data/default-tenant
- ./config/bucket.yaml:/etc/thanos/bucket.yaml
cadvisor:
image: "gcr.io/cadvisor/cadvisor:v0.49.1"
container_name: cadvisor
privileged: true
devices:
- "/dev/kmsg:/dev/kmsg"
volumes:
- "/:/rootfs:ro"
- "/var/run:/var/run:ro"
- "/sys:/sys:ro"
- "/var/lib/docker/:/var/lib/docker:ro"
- "/dev/disk/:/dev/disk:ro"
ports:
- "9070:8080"
restart: always

24
docker/loki/compose.yaml Normal file
View File

@ -0,0 +1,24 @@
networks:
default:
name: eigen-monitoring
external: true
services:
loki:
image: grafana/loki:3.0.0
container_name: loki
volumes:
- ./loki.yaml:/mnt/config/loki.yaml
ports:
- "3100:3100"
command: -config.file=/mnt/config/loki.yaml
promtail:
image: grafana/promtail:3.0.0
container_name: promtail
volumes:
- ./promtail.yaml:/mnt/config/promtail.yaml
- /var/log:/var/log
depends_on:
- loki
command: -config.file=/mnt/config/promtail.yaml

58
docker/loki/loki.yaml Normal file
View File

@ -0,0 +1,58 @@
auth_enabled: false
server:
http_listen_port: 3100
grpc_listen_port: 9096
common:
instance_addr: 127.0.0.1
path_prefix: /tmp/loki
storage:
filesystem:
chunks_directory: /tmp/loki/chunks
rules_directory: /tmp/loki/rules
replication_factor: 1
ring:
kvstore:
store: inmemory
storage_config:
aws:
s3: https://eigen:secret@api.minio.eigen.co.id:443
s3forcepathstyle: true
tsdb_shipper:
active_index_directory: /loki/index
cache_location: /loki/index_cache
cache_ttl: 24h
query_range:
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 100
schema_config:
configs:
- from: 2020-10-24
store: tsdb
object_store: filesystem
schema: v13
index:
prefix: index_
period: 24h
ruler:
alertmanager_url: http://localhost:9093
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
#
# Statistics help us better understand how Loki is used, and they show us performance
# levels for most users. This helps us prioritize features and documentation.
# For more information on what's sent, look at
# https://github.com/grafana/loki/blob/main/pkg/analytics/stats.go
# Refer to the buildReport method to see what goes into a report.
#
# If you would like to disable reporting, uncomment the following lines:
#analytics:
# reporting_enabled: false

22
docker/loki/promtail.yaml Normal file
View File

@ -0,0 +1,22 @@
server:
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
clients:
- url: http://loki:3100/loki/api/v1/push
scrape_configs:
- job_name: system
static_configs:
- targets:
- localhost
labels:
job: varlogs
__path__: /var/log/*log
limits_config:
readline_rate_enabled: true
max_line_size: 256Kb

View File

@ -27,7 +27,7 @@ services:
- "./.data:/data"
environment:
MINIO_ROOT_USER: eigen
MINIO_ROOT_PASSWORD: Eigen3m!
MINIO_ROOT_PASSWORD: secret
MINIO_SERVER_URL: http://minio:9000
MINIO_BROWSER_REDIRECT_URL: http://console.eigen.research
MINIO_SITE_REGION: ap-indonesia-1

View File

@ -0,0 +1,26 @@
networks:
default:
name: eigen-monitoring
external: true
services:
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./.data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.retention.time=60d'
- '--storage.tsdb.min-block-duration=2h'
- '--storage.tsdb.max-block-duration=2h'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
- '--web.enable-remote-write-receiver'
- '--enable-feature=native-histograms'
ports:
- "9090:9090"

View File

@ -0,0 +1,647 @@
global:
scrape_interval: 15s
external_labels:
cluster: "id-prometheus-1"
scrape_configs:
# - job_name: "kubernetes-eigen-core-metrics-state"
# metrics_path: /metrics
# scheme: http
# static_configs:
# - targets: ["172.10.10.3:30080"]
# labels:
# cluster: "kubernetes-eigen-core"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.3:.*"
# target_label: instance
# replacement: "eigen-master-1"
# - job_name: "kubernetes-eigen-core-metrics-kubelet"
# scheme: https
# metrics_path: /metrics
# tls_config:
# ca_file: /credentials/kubernetes-eigen-core/ca.crt
# cert_file: /credentials/kubernetes-eigen-core/client.crt
# key_file: /credentials/kubernetes-eigen-core/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.3:10250"]
# labels:
# cluster: "kubernetes-eigen-core"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.3:.*"
# target_label: instance
# replacement: "eigen-master-1"
# - job_name: "kubernetes-eigen-core-metrics-resource"
# scheme: https
# metrics_path: /metrics/resource
# tls_config:
# ca_file: /credentials/kubernetes-eigen-core/ca.crt
# cert_file: /credentials/kubernetes-eigen-core/client.crt
# key_file: /credentials/kubernetes-eigen-core/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.3:10250"]
# labels:
# cluster: "kubernetes-eigen-core"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.3:.*"
# target_label: instance
# replacement: "eigen-master-1"
# - job_name: "kubernetes-eigen-core-metrics-cadvisor"
# scheme: https
# metrics_path: /metrics/cadvisor
# tls_config:
# ca_file: /credentials/kubernetes-eigen-core/ca.crt
# cert_file: /credentials/kubernetes-eigen-core/client.crt
# key_file: /credentials/kubernetes-eigen-core/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.3:10250"]
# labels:
# cluster: "kubernetes-eigen-core"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.3:.*"
# target_label: instance
# replacement: "eigen-master-1"
# - job_name: "kubernetes-eigen-internal-metrics-state"
# metrics_path: /metrics
# scheme: http
# static_configs:
# - targets: ["172.10.10.4:30080"]
# labels:
# cluster: "kubernetes-eigen-internal"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.4:.*"
# target_label: instance
# replacement: "eigen-master-2"
# - job_name: "kubernetes-eigen-internal-metrics-kubelet"
# scheme: https
# metrics_path: /metrics
# tls_config:
# ca_file: /credentials/kubernetes-eigen-internal/ca.crt
# cert_file: /credentials/kubernetes-eigen-internal/client.crt
# key_file: /credentials/kubernetes-eigen-internal/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.4:10250"]
# labels:
# cluster: "kubernetes-eigen-internal"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.4:.*"
# target_label: instance
# replacement: "eigen-master-2"
# - job_name: "kubernetes-eigen-internal-metrics-resource"
# scheme: https
# metrics_path: /metrics/resource
# tls_config:
# ca_file: /credentials/kubernetes-eigen-internal/ca.crt
# cert_file: /credentials/kubernetes-eigen-internal/client.crt
# key_file: /credentials/kubernetes-eigen-internal/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.4:10250"]
# labels:
# cluster: "kubernetes-eigen-internal"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.4:.*"
# target_label: instance
# replacement: "eigen-master-2"
# - job_name: "kubernetes-eigen-internal-metrics-cadvisor"
# scheme: https
# metrics_path: /metrics/cadvisor
# tls_config:
# ca_file: /credentials/kubernetes-eigen-internal/ca.crt
# cert_file: /credentials/kubernetes-eigen-internal/client.crt
# key_file: /credentials/kubernetes-eigen-internal/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.4:10250"]
# labels:
# cluster: "kubernetes-eigen-internal"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.4:.*"
# target_label: instance
# replacement: "eigen-master-2"
# - job_name: "kubernetes-eigen-external-metrics-state"
# metrics_path: /metrics
# scheme: http
# static_configs:
# - targets: ["172.10.10.5:30080"]
# labels:
# cluster: "kubernetes-eigen-external"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.5:.*"
# target_label: instance
# replacement: "eigen-master-3"
# - job_name: "kubernetes-eigen-external-metrics-kubelet"
# scheme: https
# metrics_path: /metrics
# tls_config:
# ca_file: /credentials/kubernetes-eigen-external/ca.crt
# cert_file: /credentials/kubernetes-eigen-external/client.crt
# key_file: /credentials/kubernetes-eigen-external/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.5:10250"]
# labels:
# cluster: "kubernetes-eigen-external"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.5:.*"
# target_label: instance
# replacement: "eigen-master-3"
# - job_name: "kubernetes-eigen-external-metrics-resource"
# scheme: https
# metrics_path: /metrics/resource
# tls_config:
# ca_file: /credentials/kubernetes-eigen-external/ca.crt
# cert_file: /credentials/kubernetes-eigen-external/client.crt
# key_file: /credentials/kubernetes-eigen-external/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.5:10250"]
# labels:
# cluster: "kubernetes-eigen-external"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.5:.*"
# target_label: instance
# replacement: "eigen-master-3"
# - job_name: "kubernetes-eigen-external-metrics-cadvisor"
# scheme: https
# metrics_path: /metrics/cadvisor
# tls_config:
# ca_file: /credentials/kubernetes-eigen-external/ca.crt
# cert_file: /credentials/kubernetes-eigen-external/client.crt
# key_file: /credentials/kubernetes-eigen-external/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.10.5:10250"]
# labels:
# cluster: "kubernetes-eigen-external"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.10.5:.*"
# target_label: instance
# replacement: "eigen-master-3"
# - job_name: "kubernetes-ifgf-jakarta-metrics-state"
# metrics_path: /metrics
# scheme: http
# static_configs:
# - targets: ["139.162.15.217:30081"]
# labels:
# cluster: "kubernetes-ifgf-jakarta"
# relabel_configs:
# - source_labels: [__address__]
# regex: "139.162.15.217:.*"
# target_label: instance
# replacement: "ifgf-sg-1"
# - job_name: "kubernetes-ifgf-jakarta-metrics-kubelet"
# scheme: https
# metrics_path: /metrics
# authorization:
# type: Bearer
# credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K
# # credentials_file: /credentials/kubernetes-ifgf-jakarta/token
# tls_config:
# ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt
# insecure_skip_verify: true
# static_configs:
# - targets: ["139.162.15.217:16443"]
# labels:
# cluster: "kubernetes-ifgf-jakarta"
# relabel_configs:
# - source_labels: [__address__]
# regex: "139.162.15.217:.*"
# target_label: instance
# replacement: "ifgf-sg-1"
# - job_name: "kubernetes-ifgf-jakarta-metrics-resource"
# scheme: https
# metrics_path: /metrics/resource
# authorization:
# type: Bearer
# credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K
# # credentials_file: /credentials/kubernetes-ifgf-jakarta/token
# tls_config:
# ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt
# insecure_skip_verify: true
# static_configs:
# - targets: ["139.162.15.217:10250"]
# labels:
# cluster: "kubernetes-ifgf-jakarta"
# relabel_configs:
# - source_labels: [__address__]
# regex: "139.162.15.217:.*"
# target_label: instance
# replacement: "ifgf-sg-1"
# - job_name: "kubernetes-ifgf-jakarta-metrics-cadvisor"
# scheme: https
# metrics_path: /metrics/cadvisor
# authorization:
# type: Bearer
# credentials: c3VHMFR1VHQrM2FhSmpxRmYwZnQ0UkdjSXRhZ0NpcEtvYUxPWUtLaGFkUT0K
# # credentials_file: /credentials/kubernetes-ifgf-jakarta/token
# tls_config:
# ca_file: /credentials/kubernetes-ifgf-jakarta/ca.crt
# insecure_skip_verify: true
# static_configs:
# - targets: ["139.162.15.217:10250"]
# labels:
# cluster: "kubernetes-ifgf-jakarta"
# relabel_configs:
# - source_labels: [__address__]
# regex: "139.162.15.217:.*"
# target_label: instance
# replacement: "ifgf-sg-1"
# - job_name: "kubernetes-pmps-local-metrics-state"
# metrics_path: /metrics
# scheme: http
# static_configs:
# - targets:
# [
# "172.10.11.2:30080",
# "172.10.11.3:30080",
# "172.10.11.4:30080",
# "172.10.11.5:30080",
# "172.10.11.6:30080",
# "172.10.11.7:30080",
# ]
# labels:
# cluster: "kubernetes-pmps-local"
# relabel_configs:
# # Master 1
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.2:.*|.*172.10.11.2.*|.*172.10.11.2.*"
# target_label: instance
# replacement: "pmps-master-1"
# # Master 2
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.3:.*|.*172.10.11.3.*|.*172.10.11.3.*"
# target_label: instance
# replacement: "pmps-master-2"
# # Worker 1
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.4:.*|.*172.10.11.4.*|.*172.10.11.4.*"
# target_label: instance
# replacement: "pmps-worker-1"
# # Worker 2
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.5:.*|.*172.10.11.5.*|.*172.10.11.5.*"
# target_label: instance
# replacement: "pmps-worker-2"
# # Worker 3
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.6:.*|.*172.10.11.6.*|.*172.10.11.6.*"
# target_label: instance
# replacement: "pmps-worker-3"
# # Worker 4
# - action: replace
# source_labels: [host_ip, internal_ip]
# regex: "172.10.11.7:.*|.*172.10.11.7.*|.*172.10.11.7.*"
# target_label: instance
# replacement: "pmps-worker-4"
# - job_name: "kubernetes-pmps-local-metrics-kubelet"
# scheme: https
# metrics_path: /metrics
# tls_config:
# ca_file: /credentials/kubernetes-pmps-local/ca.crt
# cert_file: /credentials/kubernetes-pmps-local/client.crt
# key_file: /credentials/kubernetes-pmps-local/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.11.2:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.3:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.4:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.5:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.6:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.7:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.11.2:.*"
# target_label: instance
# replacement: "pmps-master-1"
# - source_labels: [__address__]
# regex: "172.10.11.3:.*"
# target_label: instance
# replacement: "pmps-master-2"
# - source_labels: [__address__]
# regex: "172.10.11.4:.*"
# target_label: instance
# replacement: "pmps-worker-1"
# - source_labels: [__address__]
# regex: "172.10.11.5:.*"
# target_label: instance
# replacement: "pmps-worker-2"
# - source_labels: [__address__]
# regex: "172.10.11.6:.*"
# target_label: instance
# replacement: "pmps-worker-3"
# - source_labels: [__address__]
# regex: "172.10.11.7:.*"
# target_label: instance
# replacement: "pmps-worker-4"
# - job_name: "kubernetes-pmps-local-metrics-resource"
# scheme: https
# metrics_path: /metrics/resource
# tls_config:
# ca_file: /credentials/kubernetes-pmps-local/ca.crt
# cert_file: /credentials/kubernetes-pmps-local/client.crt
# key_file: /credentials/kubernetes-pmps-local/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.11.2:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.3:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.4:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.5:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.6:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.7:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.11.2:.*"
# target_label: instance
# replacement: "pmps-master-1"
# - source_labels: [__address__]
# regex: "172.10.11.3:.*"
# target_label: instance
# replacement: "pmps-master-2"
# - source_labels: [__address__]
# regex: "172.10.11.4:.*"
# target_label: instance
# replacement: "pmps-worker-1"
# - source_labels: [__address__]
# regex: "172.10.11.5:.*"
# target_label: instance
# replacement: "pmps-worker-2"
# - source_labels: [__address__]
# regex: "172.10.11.6:.*"
# target_label: instance
# replacement: "pmps-worker-3"
# - source_labels: [__address__]
# regex: "172.10.11.7:.*"
# target_label: instance
# replacement: "pmps-worker-4"
# - job_name: "kubernetes-pmps-local-metrics-cadvisor"
# scheme: https
# metrics_path: /metrics/cadvisor
# tls_config:
# ca_file: /credentials/kubernetes-pmps-local/ca.crt
# cert_file: /credentials/kubernetes-pmps-local/client.crt
# key_file: /credentials/kubernetes-pmps-local/client.key
# insecure_skip_verify: true
# static_configs:
# - targets: ["172.10.11.2:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.3:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.4:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.5:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.6:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# - targets: ["172.10.11.7:10250"]
# labels:
# cluster: "kubernetes-pmps-local"
# relabel_configs:
# - source_labels: [__address__]
# regex: "172.10.11.2:.*"
# target_label: instance
# replacement: "pmps-master-1"
# - source_labels: [__address__]
# regex: "172.10.11.3:.*"
# target_label: instance
# replacement: "pmps-master-2"
# - source_labels: [__address__]
# regex: "172.10.11.4:.*"
# target_label: instance
# replacement: "pmps-worker-1"
# - source_labels: [__address__]
# regex: "172.10.11.5:.*"
# target_label: instance
# replacement: "pmps-worker-2"
# - source_labels: [__address__]
# regex: "172.10.11.6:.*"
# target_label: instance
# replacement: "pmps-worker-3"
# - source_labels: [__address__]
# regex: "172.10.11.7:.*"
# target_label: instance
# replacement: "pmps-worker-4"
# - job_name: "tempo"
# static_configs:
# - targets:
# - "tempo:3200"
# - job_name: minio-job
# metrics_path: /minio/v2/metrics/cluster
# scheme: http
# static_configs:
# - targets: ["172.10.10.2:5000"]
# - job_name: "ifgf-semarang"
# static_configs:
# - targets: ["165.232.160.64:31110"]
# - job_name: "phillipworks"
# static_configs:
# - targets: ["54.151.227.26:9100"]
# - job_name: "eigen"
# static_configs:
# - targets: ["172.10.10.2:9100"]
# labels:
# instance: "eigen-storage-1"
# - targets: ["172.10.10.3:9100"]
# labels:
# cluster: "kubernetes-eigen-core"
# instance: "eigen-master-1"
# - targets: ["172.10.10.4:9100"]
# labels:
# cluster: "kubernetes-eigen-internal"
# instance: "eigen-master-2"
# - targets: ["172.10.10.5:9100"]
# labels:
# cluster: "kubernetes-eigen-external"
# instance: "eigen-master-3"
# - targets: ["172.10.10.10:9100"]
# labels:
# instance: "eigen-docker-1"
- job_name: "cadvisor"
static_configs:
- targets: ["cadvisor:8080"]
labels:
instance: "eigen-storage-1"
- targets: ["172.10.10.10:9070"]
labels:
instance: "eigen-docker-1"
# - job_name: "traefik"
# static_configs:
# - targets: ["172.10.10.10:8082"]
# labels:
# instance: "eigen-docker-1"
# - job_name: "pipamas"
# static_configs:
# - targets: ["172.10.11.2:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-master-1"
# - targets: ["172.10.11.3:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-master-2"
# - targets: ["172.10.11.4:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-worker-1"
# - targets: ["172.10.11.5:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-worker-2"
# - targets: ["172.10.11.6:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-worker-3"
# - targets: ["172.10.11.7:9100"]
# labels:
# cluster: "kubernetes-pmps-local"
# instance: "pmps-worker-4"
# - job_name: "postgresql-exporter"
# static_configs:
# - targets: ["172.10.10.4:30187"]
# labels:
# namespace: "eigen-erp-test"
# kubernetes_namespace: "eigen-erp-test"
# cluster: "kubernetes-eigen-internal"
# instance: "eigen-master-2"
# release: "postgresql"
# - targets: ["172.10.11.6:30187"]
# labels:
# namespace: "erp-db-postgresql"
# kubernetes_namespace: "erp-db-postgresql"
# cluster: "kubernetes-pmps-local"
# instance: "pmps-worker-3"
# release: "postgresql"
# - targets: ["172.10.10.5:30189"]
# labels:
# namespace: "weplay-pos-testing"
# kubernetes_namespace: "weplay-pos-testing"
# cluster: "kubernetes-eigen-external"
# instance: "eigen-master-3"
# release: "postgresql"
# - targets: ["172.10.10.5:30188"]
# labels:
# namespace: "wg-testing"
# kubernetes_namespace: "wg-testing"
# cluster: "kubernetes-eigen-external"
# instance: "eigen-master-3"
# release: "postgresql"
# - targets: ["172.10.10.5:30187"]
# labels:
# namespace: "ijem-testing"
# kubernetes_namespace: "ijem-testing"
# cluster: "kubernetes-eigen-external"
# instance: "eigen-master-3"
# release: "postgresql"
# - job_name: "pipamas-tracking-system"
# static_configs:
# - targets: ["10.1.0.101:9100"]
- job_name: "process-exporter"
static_configs:
- targets: ["172.10.10.2:9256"]
labels:
cluster: "eigen-storage-1"
instance: "172.10.10.2"
- job_name: "node-exporter"
static_configs:
- targets: ["10.1.0.101:9100"]
labels:
cluster: "pipamas-tracking-system"
instance: "10.1.0.101"
- targets: ["54.151.227.26:9100"]
labels:
cluster: "phillipworks"
instance: "54.151.227.26"
# - job_name: "ifgf-bandung"
# static_configs:
# - targets: ["172.105.126.186:9100"]
# - job_name: "ifgf-jakarta"
# static_configs:
# - targets: ["139.162.15.217:9100"]
# labels:
# cluster: "kubernetes-ifgf-jakarta"
# instance: "ifgf-sg-1"
# - job_name: "maja"
# static_configs:
# - targets: ["147.93.29.222:9100"]
# - job_name: "ifgf-global"
# static_configs:
# - targets: ["192.53.116.11:9100"]
#- job_name: "benchmark-maja-production"
# static_configs:
# - targets: ['34.87.148.13:9100']
remote_write:
# - url: "http://thanos-receiver:10908/api/v1/receive"
#write_relabel_configs:
# - source_labels: [__name__]
# regex: ".*"
# action: keep
#queue_config:
# batch_send_deadline: 5s
# max_samples_per_send: 500
# capacity: 2500
# min_shards: 1
# max_shards: 100
- url: "http://172.10.10.6:30291/api/v1/receive"
#feature_gates:
# enable_native_histograms: true

View File

@ -0,0 +1,17 @@
networks:
default:
name: eigen-monitoring
external: true
services:
pyroscope:
image: grafana/pyroscope:latest
environment:
JAEGER_AGENT_HOST: distributor
JAEGER_SAMPLER_TYPE: const
JAEGER_SAMPLER_PARAM: 1
command: ["-config.file=/etc/pyroscope.yml"]
ports:
- "4040:4040"
volumes:
- ./pyroscope.yml:/etc/pyroscope.yml

View File

@ -0,0 +1,6 @@
tracing:
enabled: true
profiling_enabled: true
pyroscopedb:
max_block_duration: 5m

39
docker/tempo/compose.yaml Normal file
View File

@ -0,0 +1,39 @@
networks:
default:
name: eigen-monitoring
external: true
services:
memcached:
image: bitnami/memcached:latest
container_name: memcached
ports:
- "11211:11211"
environment:
- MEMCACHED_CACHE_SIZE=128
- MEMCACHED_THREADS=4
init-tempo:
image: &tempoImage grafana/tempo:latest
user: root
entrypoint:
- "chown"
- "10001:10001"
- "/var/tempo"
volumes:
- ./.data:/var/tempo
tempo:
image: *tempoImage
command: ["-config.file=/etc/tempo.yaml"]
volumes:
- ./tempo.yaml:/etc/tempo.yaml
- ./.data:/var/tempo
ports:
- "14268:14268" # jaeger ingest
- "3200:3200" # tempo
- "9095:9095" # tempo grpc
- "9411:9411" # zipkin
depends_on:
- init-tempo
- memcached

95
docker/tempo/tempo.yaml Normal file
View File

@ -0,0 +1,95 @@
stream_over_http_enabled: true
server:
http_listen_port: 3200
log_level: info
cache:
background:
writeback_goroutines: 5
caches:
- roles:
- frontend-search
memcached:
host: memcached:11211
query_frontend:
search:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
metadata_slo:
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
trace_by_id:
duration_slo: 100ms
metrics:
max_duration: 120h # maximum duration of a metrics query, increase for local setups
query_backend_after: 5m
duration_slo: 5s
throughput_bytes_slo: 1.073741824e+09
distributor:
receivers: # this configuration will listen on all ports and protocols that tempo is capable of.
jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
thrift_http: #
endpoint: "tempo:14268" # for a production deployment you should only enable the receivers you need!
grpc:
endpoint: "tempo:14250"
thrift_binary:
endpoint: "tempo:6832"
thrift_compact:
endpoint: "tempo:6831"
zipkin:
endpoint: "tempo:9411"
otlp:
protocols:
grpc:
endpoint: "tempo:4317"
http:
endpoint: "tempo:4318"
opencensus:
endpoint: "tempo:55678"
ingester:
max_block_duration: 5m # cut the headblock when this much time passes. this is being set for demo purposes and should probably be left alone normally
compactor:
compaction:
block_retention: 24h # overall Tempo trace retention. set for demo purposes
metrics_generator:
registry:
external_labels:
source: tempo
cluster: docker-compose
storage:
path: /var/tempo/generator/wal
remote_write:
- url: http://prometheus:9090/api/v1/write
send_exemplars: true
traces_storage:
path: /var/tempo/generator/traces
processor:
local_blocks:
filter_server_spans: false
flush_to_storage: true
storage:
trace:
backend: s3
s3:
bucket: tempo
endpoint: api.minio.eigen.co.id
access_key: eigen
secret_key: Eigen3m!
insecure: false
wal:
path: /var/tempo/wal # where to store the wal locally
local:
path: /var/tempo/blocks
overrides:
defaults:
metrics_generator:
processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
generate_native_histograms: both

View File

@ -0,0 +1,9 @@
type: S3
config:
bucket: "thanos"
endpoint: "minio:9000"
access_key: "eigen"
secret_key: "secret"
insecure: false
trace:
enable: true

View File

@ -0,0 +1,60 @@
networks:
default:
name: eigen-monitoring
external: true
services:
thanos-query:
image: thanosio/thanos:v0.36.1
command:
- query
- --http-address=:19192
- --grpc-address=:19092
- --endpoint=thanos-store:19090
- --endpoint=thanos-receiver:10907
#- --query.auto-downsampling
#- --query.max-concurrent-select=10
#- --query.max-concurrent=50
#- --query.timeout=1440m
#- --query.partial-response
thanos-store:
image: thanosio/thanos:v0.36.1
command:
- store
- --data-dir=/data
- --objstore.config-file=/etc/thanos/bucket.yaml
- --http-address=:19191
- --grpc-address=:19090
volumes:
- ./.data/store:/data
- ./bucket.yaml:/etc/thanos/bucket.yaml
thanos-compactor:
image: thanosio/thanos:v0.36.1
command:
- compact
- --data-dir=/data
- --objstore.config-file=/etc/thanos/bucket.yaml
- --wait
- --wait-interval=5m
volumes:
- ./.data/compactor:/data
- ./bucket.yaml:/etc/thanos/bucket.yaml
thanos-receiver:
image: thanosio/thanos:v0.36.1
command:
- receive
- --grpc-address=:10907
- --http-address=:10909
- --tsdb.path=/data
- --receive.local-endpoint=127.0.0.1:10907
- --objstore.config-file=/etc/thanos/bucket.yaml
- --label=receive_instance_id="thanos-receiver-1"
- --remote-write.address=:10908
ports:
- "10908:10908"
volumes:
- ./.data/receiver:/data/default-tenant
- ./bucket.yaml:/etc/thanos/bucket.yaml