#!/bin/sh

ZEEK_BUILD=""
DATA_FILE=""
MODE="intf"
INTERFACE=""
SEED_FILE=""

# Path where flamegraph is installed
FLAMEGRAPH_PATH=""
FLAMEGRAPH_PREFIX="benchmark"

usage() {
    usage="\
Usage: $0 -z [zeek binary path] -d [data file path]

  Options:
    -b, --build PATH        The path to a Zeek binary to benchmark
    -d, --data-file PATH    The path to a data file to read from for replay
    -m, --mode MODE         This can be one of three possible values:
                            intf, read, or flamegraph. This controls what
                            mode is used for the benchmark run, and defaults
                            to intf if not passed. The modes are described
                            below.
    -i, --interface INTF    The network interface to use for capturing data.
                            This interface should be completely idle, since
                            tcpreplay will be using it to replay the data.
                            This argument is ignored if the mode is 'file'.
    -f, --flamegraph PATH   The path to the directory where Flamegraph is
                            installed. This argument is required if the mode
                            is 'flamegraph', but is ignored otherwise.
    -o, --output FILE       The file prefix to use as output for Flamegraph.
                            This defaults to 'benchmark'. This argument is ignored
                            if the mode is not 'flamegraph'.
    -s, --seed FILE         (optional) A path to a Zeek random seed file.
                            This is used control the generation of connection
                            IDs and other data so it is consistent between
                            benchmarking runs.

    By default or when 'intf' is passed for the mode argument, the output will
    include CPU, memory, etc statistics from Zeek processing all of the data
    in the data file as if it was reading it live from the network. This mode
    requires an interface to be passed using the -i argument.

    When 'file' is passed for the mode (-m) argument, the output will include
    the runtime and maximum memory usage of Zeek when reading the data file
    directly from disk.

    When 'flamegraph' is passed for the mode (-m) argument, this script will
    output two flamegraphs for the process runtime in svg format. The first
    flamegraph is a standard graph showing the time spent in functions,
    stacked in the normal manner. The second graph is 'stack-reversed'.

    Symbols in Flamegraph outputs may not correctly stack unless the various
    libraries linked into Zeek are built with frame pointers. This includes
    glibc, libpcap, and openssl. Rebuilding those libraries with the
    -fno-omit-frame-pointer compiler flag may provide more accurate output.
    You can set libraries that get preloaded by setting the PRELOAD_LIBS
    varaible in the script.

    This script assumes that it is being run on a system with a large number
    of CPU cores. If being used on a smaller system, modify this script and
    set the ZEEK_CPU and TCPREPLAY_CPU variables to smaller values.
"

    echo "${usage}"
    exit 1
}

while (( "$#" )); do
  case "$1" in
      -d|--data-file)
	  DATA_FILE=$2
	  shift 2
	  ;;
      -b|--build)
	  ZEEK_BUILD=$2
	  shift 2
	  ;;
      -m|--mode)
	  MODE=$2
	  shift 2
	  ;;
      -i|--interface)
	  INTERFACE=$2
	  shift 2
	  ;;
      -f|--flamegraph)
	  FLAMEGRAPH_PATH=$2
	  shift 2
	  ;;
      -o|--output)
	  FLAMEGRAPH_PREFIX=$2
	  shift 2
	  ;;
      -s|--seed)
	  SEED_FILE=$2
	  shift 2
	  ;;
  esac
done

if [ "${MODE}" != "intf" -a "${MODE}" != "file" -a "${MODE}" != "flamegraph" ]; then
    echo "Error: -m argument should be one of 'intf', 'file', or 'flamegraph'"
    echo
    usage
fi

if [ -z "${ZEEK_BUILD}" ]; then
    echo "Error: -b argument is required and should point at a Zeek binary"
    echo
    usage
fi

if [ -z "${DATA_FILE}" ]; then
    echo "Error: -d argument is required and should point at a pcap file to replay"
    echo
    usage
fi

if [ "${MODE}" != "file" -a -z "${INTERFACE}" ]; then
    echo "Error: -i argument is required for the ${MODE} mode and should point to an idle network interface"
    echo
    usage
fi

# Various run-time options
ZEEK_CPU=10
TCPREPLAY_CPU=11
PRELOAD_LIBS=""

ZEEK_ARGS=""
if [ "${MODE}" != "file" ]; then
    ZEEK_ARGS="-i af_packet::${INTERFACE}"
fi

if [ -n "${SEED_FILE}" ]; then
    ZEEK_ARGS="${ZEEK_ARGS} -G ${SEED_FILE}"
fi

if [ "${MODE}" = "intf" ]; then

    TIME_FILE=$(mktemp)

    echo "####### Testing reading data file from a network interface #######"
    echo "Running '${ZEEK_BUILD} ${ZEEK_ARGS}' against ${DATA_FILE}"
    # Start zeek, find it's PID, then wait 10s to let it reach a steady state
    taskset --cpu-list $ZEEK_CPU time -f "%M" -o $TIME_FILE $ZEEK_BUILD $ZEEK_ARGS &
    TIME_PID=$!

    sleep 5
    ZEEK_PID=$(ps -ef | awk -v timepid="${TIME_PID}" '{ if ($3 == timepid) { print $2 } }')
    renice -20 -p $ZEEK_PID > /dev/null
    sleep 5
    echo "Zeek running on PID ${ZEEK_PID}"

    # Start perf stat on the zeek process
    perf stat -p $ZEEK_PID &
    PERF_PID=$!

    # Start replaying the data
    echo "Starting replay"
    taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE

    # Capture the average CPU usage of the process
    CPU_USAGE=$(ps -p $ZEEK_PID -o %cpu=)

    # Kill everything
    echo
    kill -2 $ZEEK_PID
    wait $TIME_PID
    wait $PERF_PID

    echo "Maximum memory usage (max_rss): $(head -n 1 ${TIME_FILE}) bytes"
    echo "Average CPU usage: ${CPU_USAGE}%"

    rm $TIME_FILE

elif [ "${MODE}" = "file" ]; then

    TIME_FILE=$(mktemp)

    echo "####### Testing reading the file directly from disk #######"
    taskset --cpu-list $ZEEK_CPU time -f "%e %M" -o $TIME_FILE $ZEEK_BUILD $ZEEK_ARGS -r $DATA_FILE
    TIME_PID=$!
    ZEEK_PID=$(ps -ef | awk -v timepid="${TIME_PID}" '{ if ($3 == timepid) { print $2 } }')
    renice -20 -p $ZEEK_PID > /dev/null
    awk '{print "Time spent: " $1 " seconds\nMax memory usage: " $2 " bytes"}' $TIME_FILE

    rm $TIME_FILE

elif [ "${MODE}" = "flamegraph" ]; then

    echo "####### Generating flamegraph data #######"

    PERF_RECORD_FILE=$(mktemp)
    PERF_COLLAPSED_FILE=$(mktemp)

    # Start zeek under perf record, then sleep for a few seconds to let it actually start up. For runs with
    # shorter amounts of data or with slower traffic, you can add '-c 499' here to get finer-grained results.
    # With big data sets, it just results in the graph getting blown out by waits in the IO loop.
    LD_PRELOAD=${PRELOAD_LIBS} perf record -g -o $PERF_RECORD_FILE -- $ZEEK_BUILD $ZEEK_ARGS &
    PERF_PID=$!

    sleep 5

    ZEEK_PID=$(ps -ef | awk -v perfpid="${PERF_PID}" '{ if ($3 == perfpid) { print $2 } }')
    echo "Zeek running on PID ${ZEEK_PID}"

    # Start replaying the data
    echo "Starting replay"
    taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE

    # Kill everything
    echo
    kill -2 $ZEEK_PID
    wait $PERF_PID

    echo
    echo "####### Collapsing perf stack data #######"
    perf script -i $PERF_RECORD_FILE | ${FLAMEGRAPH_PATH}/stackcollapse-perf.pl > $PERF_COLLAPSED_FILE
    echo "####### Building normal flamegraph, writing to ${FLAMEGRAPH_PREFIX}.svg #######"
    cat $PERF_COLLAPSED_FILE | ${FLAMEGRAPH_PATH}/flamegraph.pl > "${FLAMEGRAPH_PREFIX}.svg"
    echo "####### Building reverse flamegraph, writing to ${FLAMEGRAPH_PREFIX}-reversed.svg #######"
    cat $PERF_COLLAPSED_FILE | ${FLAMEGRAPH_PATH}/flamegraph.pl --reverse > "${FLAMEGRAPH_PREFIX}-reversed.svg"

    rm $PERF_RECORD_FILE
    rm $PERF_COLLAPSED_FILE

fi
