Observability — Metrics, Logs, Traces

Metrics answer "what is wrong." Logs answer "why." Traces answer "where."

When to use

Metrics: dashboards, alerting, capacity planning — always
Logs: debugging specific request failures (structured JSON only)
Traces: distributed systems where a single request spans multiple services

Tradeoffs

High-cardinality metrics are expensive (one time series per label combination)
Verbose unstructured logs have high storage cost and are hard to query
Tracing adds per-request overhead and requires instrumentation across all services

Go
Python

var requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
    Name: "http_requests_total",
}, []string{"method", "status"})

func HandleRequest(w http.ResponseWriter, r *http.Request) {
    ctx, span := tracer.Start(r.Context(), "handle_request")
    defer span.End()

    user, err := getUser(ctx, r.URL.Query().Get("id"))
    if err != nil {
        slog.ErrorContext(ctx, "user fetch failed",
            "user_id", r.URL.Query().Get("id"), "error", err)
        requestCount.WithLabelValues(r.Method, "500").Inc()
        http.Error(w, "error", 500)
        return
    }
    requestCount.WithLabelValues(r.Method, "200").Inc()
    json.NewEncoder(w).Encode(user)
}

from prometheus_client import Counter
from opentelemetry import trace
import structlog

request_count = Counter("http_requests_total", "Total requests", ["method", "status"])
tracer = trace.get_tracer(__name__)
log = structlog.get_logger()

def handle_request(method: str, user_id: str):
    with tracer.start_as_current_span("handle_request"):
        user = get_user(user_id)
        if not user:
            log.error("user_fetch_failed", user_id=user_id)
            request_count.labels(method=method, status="500").inc()
            return None
        request_count.labels(method=method, status="200").inc()
        return user

Gotcha: Logs without structure are grep-able but not queryable. Always emit JSON. log.Printf("user %s failed", id) is a log. {"level":"error","user_id":"id","error":"msg"} is an observable event.