How to calculate Elasticsearch average documents number in recent days

Calculate Elasticsearch average documents number in recent days using shell script.

Shell script

Simple shell script using curl, xargs, awk and bc utilities.

#!/bin/bash
# Calculate average documents number in last n days

# initialize settings
number_of_days=14
elasticsearch_server="192.0.2.20"
elasticsearch_port="9200"
elasticsearch_protocol="http"
elasticsearch_indices="*"
elasticsearch_date_separator="."

# initialize global counters
sum_global=0
ele_global=0

# initialize counters for each week day
declare -a sum_weekday
sum_weekday[1]=0
sum_weekday[2]=0
sum_weekday[3]=0
sum_weekday[4]=0
sum_weekday[5]=0
sum_weekday[6]=0
sum_weekday[7]=0

declare -a ele_weekday
ele_weekday[1]=0
ele_weekday[2]=0
ele_weekday[3]=0
ele_weekday[4]=0
ele_weekday[5]=0
ele_weekday[6]=0
ele_weekday[7]=0

# initialize MIN counters for each week day
declare -a min_weekday
min_weekday[1]=0
min_weekday[2]=0
min_weekday[3]=0
min_weekday[4]=0
min_weekday[5]=0
min_weekday[6]=0
min_weekday[7]=0

# initialize MAX counters for each week day
declare -a max_weekday
max_weekday[1]=0
max_weekday[2]=0
max_weekday[3]=0
max_weekday[4]=0
max_weekday[5]=0
max_weekday[6]=0
max_weekday[7]=0

# weekday array
declare -a weekday
weekday[1]="Monday"
weekday[2]="Tuesday"
weekday[3]="Wednesday"
weekday[4]="Thursday"
weekday[5]="Friday"
weekday[6]="Saturday"
weekday[7]="Sunday"

echo

printf "%16s %16s\n" "Variable" "Value"
printf "%16s %16s\n" "Server" "$elasticsearch_server"
printf "%16s %16s\n" "Number of days" "$number_of_days"
printf "%16s %16s\n" "Indices" "$elasticsearch_indices"

echo

printf "%12s %16s\n" "Date" "Documents"

for n in $(seq 1 $number_of_days | tac); do
  day="$(date --date "$n days ago" +"%d")"
  month="$(date --date "$n days ago" +"%m")"
  year="$(date --date "$n days ago" +"%Y")"

  day_of_the_week="$(date --date "${month}/${day}/${year}" +"%u")"

  sum_internal="$(curl --silent  "${elasticsearch_protocol}://${elasticsearch_server}:${elasticsearch_port}/_cat/indices/${elasticsearch_indices}${year}${elasticsearch_date_separator}${month}${elasticsearch_date_separator}${day}?h=index" | \
                  xargs -I {} curl --silent "${elasticsearch_protocol}://${elasticsearch_server}:${elasticsearch_port}/_cat/count/{}?h=count" | \
                  awk '{sum += $1}; END{print sum}')"

  if [ -n "$sum_internal" ]; then
    printf "%12s %16s\n" "${day}.${month}.${year}" "${sum_internal}"

    sum_global="$(expr $sum_global + $sum_internal)"
    ele_global="$(expr $ele_global + 1)"

    sum_weekday[$day_of_the_week]=$(expr ${sum_weekday[$day_of_the_week]} + $sum_internal)
    ele_weekday[$day_of_the_week]=$(expr ${ele_weekday[$day_of_the_week]} + 1)

    if [ "$sum_internal" -gt "${max_weekday[$day_of_the_week]}" ]; then
      max_weekday[$day_of_the_week]=$sum_internal
    fi

    if [ "$sum_internal" -lt "${min_weekday[$day_of_the_week]}" ] || [ "${min_weekday[$day_of_the_week]}" -eq "0" ]; then
      min_weekday[$day_of_the_week]=$sum_internal
    fi
  fi  
done

echo

printf "%11s %17s %5s %16s %16s\n" "Weekday" "AVG documents" "Days" "MIN documents" "MAX documents"

for d in $(seq 1 7); do
  if [ -n "${ele_weekday[$d]}" ] && [ "${ele_weekday[$d]}" -gt "0" ]; then
    printf "%11s %17s %5s %16s %16s\n" "${weekday[$d]}" "$(expr ${sum_weekday[$d]} / ${ele_weekday[$d]})" "${ele_weekday[$d]}" "${min_weekday[$d]}" "${max_weekday[$d]}"
  else
    printf "%11s %17s %5s %16s %16s\n" "${weekday[$d]}" "" "${ele_weekday[$d]}" "" ""
  fi
done

echo
echo "Average documents per day: $(echo "scale=2; ${sum_global}/${ele_global}" | bc)"
echo

Usage

Report for last 4 days.


        Variable            Value
          Server       192.0.2.20
  Number of days                4
         Indices                *

        Date        Documents
  18.05.2020          5209896
  19.05.2020         14996353
  20.05.2020          5344200
  21.05.2020          5786398

    Weekday     AVG documents  Days    MIN documents    MAX documents
     Monday           5209896     1          5209896          5209896
    Tuesday          14996353     1         14996353         14996353
  Wednesday           5344200     1          5344200          5344200
   Thursday           5786398     1          5786398          5786398
     Friday                       0                                  
   Saturday                       0                                  
     Sunday                       0                                  

Average documents per day: 7834211.75

Report for last 15 days.


        Variable            Value
          Server       192.0.2.20
  Number of days               15 
         Indices                *

        Date        Documents
  07.05.2020          4810659
  08.05.2020          4985672
  09.05.2020          4163362
  10.05.2020          3966883
  11.05.2020          5494458
  12.05.2020          4556809
  13.05.2020          4576365
  14.05.2020          4280290
  15.05.2020          3968434
  16.05.2020          3329636
  17.05.2020          3570905
  18.05.2020          5209896
  19.05.2020         14996353
  20.05.2020          5344200
  21.05.2020          5786398

    Weekday     AVG documents  Days    MIN documents    MAX documents
     Monday           5352177     2          5209896          5494458
    Tuesday           9776581     2          4556809         14996353
  Wednesday           4960282     2          4576365          5344200
   Thursday           4959115     3          4280290          5786398
     Friday           4477053     2          3968434          4985672
   Saturday           3746499     2          3329636          4163362
     Sunday           3768894     2          3570905          3966883

Average documents per day: 5269354.66