How to solve pipeline mysteries

I have already described how to inspect a pipeline, but today I will show you how to write shell scripts that will aid you in reconstructing a whole pipeline when all you got is a process ID that is the part of the pipeline.

Shell script

Use the following shell script to inspect a whole pipeline.

#!/usr/bin/env bash
# pipeline solver
#
# Usage:
#   pipelinesolver.sh $ProcessID
#

# Global parameters
setting_display_every_file_descriptor=1 # 1 - display every file descriptor
# 0 - display only pipes
setting_inspect_previous_commands=1    # 1 - inspect previous and further commands (writing to a pipe and reading from a pipe)
# 0 - inspect only further commands (reading from a pipe)
setting_debug=0                        # debug 1 - on
                                       #       0 - off
# Global variables
declare -g global_checked_processes=() # array that contains already checked pids

# function: debug
# parameters:
#   prefix
#   message
function debug() {
  if [ "$setting_debug" -eq "1" ]; then
    echo "$1 (debug) $2"
  fi
}

# function: pipesolver
# parameters:
#  pid          | (required) | process ID
#  previous_pid |            | previous process ID
#  shift        |            | line shift (formatting)
#
function pipesolver() {
  if [ -n "$1" ]; then
    # define process id
    local pid="$1"

    # define previous process id
    if [ -z "$2" ]; then
      local previous_pid="0"
    else
      local previous_pid="$2"
    fi

    # define shift
    if [ -z "$3" ]; then
      local shift="2"
    else
      local shift="$3"
    fi

    # add pid to global_checked_processes if it is not there
    if [[ ! "${global_checked_processes[*]}" =~ ${pid} ]]; then
      global_checked_processes+=("$pid")
    fi

    # define prefix for this pid
    local prefix="$(echo "${pid}" "$(printf %${shift}s\> | tr " " "-")")"

    # debug
    debug "$prefix" "pid: ${pid}"
    debug "$prefix" "previous_pid: ${previous_pid}"
    debug "$prefix" "shift: ${shift}"
    debug "$prefix" "elements in global_checked_processes: ${#global_checked_processes[@]}"
    debug "$prefix" "last element in global_checked_processes: ${global_checked_processes[-1]}"

    # display command
    commandline="$(tr '\0' ' ' </proc/${pid}/cmdline)"
    echo "${prefix} Command: ${commandline}"

    # inspect each file descriptor
    for file_descriptor in $(find /proc/${pid}/fd/* -printf "%f\n"  | sort -h); do
      # get file descriptor destination
      # for example "pipe:[23450122]" for "/proc/23877/fd/0 -> pipe:[23450122]"
      file_descriptor_destination="$(find "/proc/${pid}/fd/${file_descriptor}" -printf "%l\n")"

      # display file descriptor
      if [ "$setting_display_every_file_descriptor" -eq "1" ]; then
        echo "${prefix} File descriptor ${file_descriptor} points to ${file_descriptor_destination}"
      fi

      # inspect destination (pipe only)
      case "$file_descriptor_destination" in
      pipe:*)
        # display file descriptor
        if [ "$setting_display_every_file_descriptor" -ne "1" ]; then
          echo "${prefix} File descriptor ${file_descriptor} points to ${file_descriptor_destination}"
        fi

        # get pipe id
        pipe_id="$(echo "$pid" "$file_descriptor_destination" | awk -F: '{print $2}' | tr -d [])"

        # get pipe endpoints
        # for lsof output:
        #   grep      19567 milosz    1w     FIFO               0,12       0t0   23699554 pipe
        #   grep      19568 milosz    0r     FIFO               0,12       0t0   23699554 pipe
        # you will get:
        #   19567:1:w 19568:0:r
        pipe_endpoints="$(lsof -w -n -P -w -d0-1024 | awk '$NF == "pipe" && $(NF-1) == "'"${pipe_id}"'" {print $2 ":" substr($4, 1, length($4)-1) ":" substr($4, length($4), length($4)) }')"

        # parse endpoints
        for endpoint in $pipe_endpoints; do
          endpoint_pid="$(echo "$endpoint" | awk -F: '{print $1}')"
          endpoint_fd="$(echo "$endpoint" | awk -F: '{print $2}')"
          endpoint_mode="$(echo "$endpoint" | awk -F: '{print $3}')"

          # debug
          debug "$prefix" "endpoint_pid: ${endpoint_pid}"
          debug "$prefix" "endpoint_fd: ${endpoint_fd}"
          debug "$prefix" "endpoint_mode: ${endpoint_mode}"

          # process only "r" (read) and "w" (write)
          if [ "${pid}" -eq "${endpoint_pid}" ]; then
            case "$endpoint_mode" in
            "r")
              echo "${prefix}      I am reading from it using file descriptor ${endpoint_fd}"
              ;;
            "w")
              echo "${prefix}      I am writing to it using file descriptor ${endpoint_fd} "
              ;;
            esac
          elif [ "${previous_pid}" -eq "${endpoint_pid}" ]; then
            case "$endpoint_mode" in
            "r")
              echo "${prefix}      Process ${endpoint_pid} is reading it using its file descriptor ${endpoint_fd}"
              ;;
            "w")
              echo "${prefix}      Process ${endpoint_pid} is writing to it using its file descriptor ${endpoint_fd}"
              ;;
            esac
          else
            case "$endpoint_mode" in
            "r")
              echo "${prefix}      Process ${endpoint_pid} is reading it using its file descriptor ${endpoint_fd}"
              if [[ ! "${global_checked_processes[*]}" =~ ${endpoint_pid} ]]; then
                global_checked_processes+=("$endpoint_pid")
                pipesolver "$endpoint_pid" "$pid" "$((shift + 2))"
              fi
              ;;
            "w")
              echo "${prefix}      Process ${endpoint_pid} is writing to it using its file descriptor ${endpoint_fd}"
              if [ "$setting_inspect_previous_commands" -eq "1" ]; then
                if [[ ! "${global_checked_processes[*]}" =~ ${endpoint_pid} ]]; then
                  global_checked_processes+=("$endpoint_pid")
                  pipesolver "$endpoint_pid" "$pid" "$((shift + 3))"
                fi
              fi
              ;;
            esac
          fi
        done
        ;;
      esac
      pipe_endpoints=""
    done
  fi
}

if [ "$#" -eq "1" ]; then
  pipesolver "$1"
else
  echo "Usage: $0 process_id"
fi

Sample usage

I will use the following pipeline to illustrate pipeline solver.

$ tail -f /var/log/auth.log | \
  stdbuf --output=0 grep pam_unix | \
  tee /tmp/auth_milosz.log | \
  grep milosz

Default verbose output.

$ bash pipelinesolver.sh 29909
29909 --> Command: tail -f /var/log/auth.log
29909 --> File descriptor 0 points to /dev/pts/10
29909 --> File descriptor 1 points to pipe:[24816006]
29909 -->      I am writing to it using file descriptor 1
29909 -->      Process 29910 is reading it using its file descriptor 0
29910 ----> Command: grep pam_unix
29910 ----> File descriptor 0 points to pipe:[24816006]
29910 ---->      Process 29909 is writing to it using its file descriptor 1
29910 ---->      I am reading from it using file descriptor 0
29910 ----> File descriptor 1 points to pipe:[24816008]
29910 ---->      I am writing to it using file descriptor 1
29910 ---->      Process 29911 is reading it using its file descriptor 0
29911 ------> Command: tee /tmp/auth_milosz.log
29911 ------> File descriptor 0 points to pipe:[24816008]
29911 ------>      Process 29910 is writing to it using its file descriptor 1
29911 ------>      I am reading from it using file descriptor 0
29911 ------> File descriptor 1 points to pipe:[24816009]
29911 ------>      I am writing to it using file descriptor 1
29911 ------>      Process 29912 is reading it using its file descriptor 0
29912 --------> Command: grep milosz
29912 --------> File descriptor 0 points to pipe:[24816009]
29912 -------->      Process 29911 is writing to it using its file descriptor 1
29912 -------->      I am reading from it using file descriptor 0
29912 --------> File descriptor 1 points to /dev/pts/10
29912 --------> File descriptor 2 points to /dev/pts/10
29911 ------> File descriptor 2 points to /dev/pts/10
29911 ------> File descriptor 3 points to /tmp/auth_milosz.log
29910 ----> File descriptor 2 points to /dev/pts/10
29909 --> File descriptor 2 points to /dev/pts/10
29909 --> File descriptor 3 points to /var/log/auth.log
29909 --> File descriptor 4 points to anon_inode:inotify

Less verbose output (setting_display_every_file_descriptor set to 0).

$ bash pipelinesolver.sh 29909
29909 --> Command: tail -f /var/log/auth.log
29909 --> File descriptor 1 points to pipe:[24816006]
29909 -->      I am writing to it using file descriptor 1
29909 -->      Process 29910 is reading it using its file descriptor 0
29910 ----> Command: grep pam_unix
29910 ----> File descriptor 0 points to pipe:[24816006]
29910 ---->      Process 29909 is writing to it using its file descriptor 1
29910 ---->      I am reading from it using file descriptor 0
29910 ----> File descriptor 1 points to pipe:[24816008]
29910 ---->      I am writing to it using file descriptor 1
29910 ---->      Process 29911 is reading it using its file descriptor 0
29911 ------> Command: tee /tmp/auth_milosz.log
29911 ------> File descriptor 0 points to pipe:[24816008]
29911 ------>      Process 29910 is writing to it using its file descriptor 1
29911 ------>      I am reading from it using file descriptor 0
29911 ------> File descriptor 1 points to pipe:[24816009]
29911 ------>      I am writing to it using file descriptor 1
29911 ------>      Process 29912 is reading it using its file descriptor 0
29912 --------> Command: grep milosz
29912 --------> File descriptor 0 points to pipe:[24816009]
29912 -------->      Process 29911 is writing to it using its file descriptor 1
29912 -------->      I am reading from it using file descriptor 0

Inspect the same pipeline using different process ID.

$ bash pipelinesolver.sh 29911
29911 --> Command: tee /tmp/auth_milosz.log
29911 --> File descriptor 0 points to pipe:[24816008]
29911 -->      Process 29910 is writing to it using its file descriptor 1
29910 -----> Command: grep pam_unix
29910 -----> File descriptor 0 points to pipe:[24816006]
29910 ----->      Process 29909 is writing to it using its file descriptor 1
29909 --------> Command: tail -f /var/log/auth.log
29909 --------> File descriptor 1 points to pipe:[24816006]
29909 -------->      I am writing to it using file descriptor 1
29909 -------->      Process 29910 is reading it using its file descriptor 0
29910 ----->      I am reading from it using file descriptor 0
29910 -----> File descriptor 1 points to pipe:[24816008]
29910 ----->      I am writing to it using file descriptor 1
29910 ----->      Process 29911 is reading it using its file descriptor 0
29911 -->      I am reading from it using file descriptor 0
29911 --> File descriptor 1 points to pipe:[24816009]
29911 -->      I am writing to it using file descriptor 1
29911 -->      Process 29912 is reading it using its file descriptor 0
29912 ----> Command: grep milosz
29912 ----> File descriptor 0 points to pipe:[24816009]
29912 ---->      Process 29911 is writing to it using its file descriptor 1
29912 ---->      I am reading from it using file descriptor 0

Reconstruct a whole pipeline using a process ID that is the part of that pipeline.

$ bash pipelinesolver.sh 29911 | grep Command: | sort | tr -d '-'
29909 > Command: tail f /var/log/auth.log
29910 > Command: grep pam_unix
29911 > Command: tee /tmp/auth_milosz.log
29912 > Command: grep milosz

I love this!

Other applications

You can use this shell scripts to inspect popular applications.

Redis

1968 --> Command: /usr/bin/redis-server 127.0.0.1:6379
1968 --> File descriptor 0 points to /dev/null
1968 --> File descriptor 1 points to /dev/null
1968 --> File descriptor 2 points to /dev/null
1968 --> File descriptor 3 points to pipe:[36986]
1968 -->      I am reading from it using file descriptor 3
1968 -->      I am writing to it using file descriptor 4
1968 --> File descriptor 4 points to pipe:[36986]
1968 -->      I am reading from it using file descriptor 3
1968 -->      I am writing to it using file descriptor 4
1968 --> File descriptor 5 points to anon_inode:[eventpoll]
1968 --> File descriptor 6 points to socket:[36991]
1968 --> File descriptor 7 points to socket:[36992]
1968 --> File descriptor 8 points to socket:[36993]

Alternative Linux getty

29232 --> Command: /sbin/agetty -o -p -- \u --noclear tty1 linux
29232 --> File descriptor 0 points to /dev/tty1
29232 --> File descriptor 1 points to /dev/tty1
29232 --> File descriptor 2 points to /dev/tty1
29232 --> File descriptor 4 points to anon_inode:inotify

GNU Midnight Commander

32339 --> Command: mc
32339 --> File descriptor 0 points to /dev/pts/11
32339 --> File descriptor 1 points to /dev/pts/11
32339 --> File descriptor 2 points to /dev/pts/11
32339 --> File descriptor 3 points to socket:[24828589]
32339 --> File descriptor 4 points to /dev/tty
32339 --> File descriptor 5 points to /dev/ptmx
32339 --> File descriptor 6 points to /dev/pts/17
32339 --> File descriptor 7 points to pipe:[24828594]
32339 -->      I am reading from it using file descriptor 7
32339 -->      I am writing to it using file descriptor 8
32339 -->      Process 32341 is writing to it using its file descriptor 8
32341 -----> Command: bash -rcfile .bashrc
32341 -----> File descriptor 0 points to /dev/pts/17
32341 -----> File descriptor 1 points to /dev/pts/17
32341 -----> File descriptor 2 points to /dev/pts/17
32341 -----> File descriptor 8 points to pipe:[24828594]
32341 ----->      Process 32339 is reading it using its file descriptor 7
32341 ----->      Process 32339 is writing to it using its file descriptor 8
32341 ----->      I am writing to it using file descriptor 8
32341 -----> File descriptor 255 points to /dev/pts/17
32339 --> File descriptor 8 points to pipe:[24828594]
32339 -->      I am reading from it using file descriptor 7
32339 -->      I am writing to it using file descriptor 8
32339 -->      Process 32341 is writing to it using its file descriptor 8