roam/daily/2021-08-30.org
2021-09-01 16:57:39 -04:00

3.7 KiB

2021-08-30

Researching recent sites outages

Researching recent Control Panel outages due to OOM-killed pods.

Checking the log configuration, we don't appear to be capturing request durations nor request size.

  ENVIRONMENT=${ENVIRONMENT:-development}
  SERVICE=${SERVICE:-aweber-classic}
  STATSD_HOST=statsd
  STATSD_PORT=8125

  TMPFILE=$(mktemp)

  capture() {
      local stat="$1"
      local path="$2"
      local type="$3"
      local fullpath="applications.${SERVICE}.${ENVIRONMENT}.${path}"
      local value=$(sed -n "s/^${stat}: //p" $TMPFILE)
      local metric="${fullpath}:${value}|${type}"
      echo "Capturing metric ${metric}"
      # echo "${metric}" | nc -w 1 -c ${STATSD_HOST} ${STATSD_PORT}
  }

  counter() {
      local stat="$1"
      local path="$2"
      capture "$stat" "counters.apache.$path" "c"
  }


  CONTROL_PANEL_PODS=$(kubectl get pods -n cp -l app=control-panel | grep -v 'feature-branch\|sensu' | awk '{ print $1}')

  for pod in $CONTROL_PANEL_PODS
  do
      POD_IP=$(kubectl get pod $pod -oyaml | grep " podIP: " | awk '{print $2}')
      echo "Fetching status from ${pod} (${POD_IP})"
      curl http://${POD_IP}/server-status?auto > "$TMPFILE"
      counter "Load1" "load_1"
      counter "Load5" "load_5"
      counter "Load15" "load_15"
      counter "CPUUser" "cpu_user"
      counter "CPUSystem" "cpu_system"
      counter "CPUChildrenUser" "cpu_children_user"
      counter "CPUChildrenSystem" "cpu_children_system"
      counter "CPULoad" "cpu_load"
      counter "ReqPerSec" "requests_per_second"
      counter "BytesPerSec" "bytes_per_second"
      counter "BytesPerReq" "bytes_per_request"
      counter "DurationPerReq" "duration_per_request"
      counter "BusyWorkers" "busy_workers"
      counter "IdleWorkers" "idle_workers"
  done

  rm "$TMPFILE"
Pod: {}
10.51.12.43
Pod: {}
10.51.27.62
Pod: {}
10.51.20.19
Pod: {}
10.51.23.32
Pod: {}
10.51.13.57
Pod: {}
10.51.19.22
Pod: {}
10.51.21.18
Pod: {}
10.51.15.47
Capturing metric applications.aweber-classic.development.counters.apache.load_1:1.96|c
Capturing metric applications.aweber-classic.development.counters.apache.load_5:2.02|c
Capturing metric applications.aweber-classic.development.counters.apache.load_15:1.91|c
Capturing metric applications.aweber-classic.development.counters.apache.cpu_user:42.35|c
Capturing metric applications.aweber-classic.development.counters.apache.cpu_system:55.37|c
Capturing metric applications.aweber-classic.development.counters.apache.cpu_children_user:42393.1|c
Capturing metric applications.aweber-classic.development.counters.apache.cpu_children_system:9040.76|c
Capturing metric applications.aweber-classic.development.counters.apache.cpu_load:27.1413|c
Capturing metric applications.aweber-classic.development.counters.apache.requests_per_second:4.08449|c
Capturing metric applications.aweber-classic.development.counters.apache.bytes_per_second:18589.1|c
Capturing metric applications.aweber-classic.development.counters.apache.bytes_per_request:4551.15|c
Capturing metric applications.aweber-classic.development.counters.apache.duration_per_request:367.236|c
Capturing metric applications.aweber-classic.development.counters.apache.busy_workers:13|c
Capturing metric applications.aweber-classic.development.counters.apache.idle_workers:3|c