#!/bin/sh
#
# Copyright (c) 2006-2007 Simon L. Nielsen <simon@FreeBSD.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.

# Out-of-band File Verification (OOBFV)
#
# This program generates checksums for a CVS repository on a master
# repository which can then be used to verify that mirrored
# repositories has not ben tampered with.  sha256 checksums are used
# for verification of files.

# The format of the "sumfile" is a line text file seperates with an
# entry per line.  Each line has the format:
#
# filename;file-sha256;rlog-sha256;head-sha256
#
# - filename Name of ,v file in repository, relative to repository root
# - file-sha256 Checksum of ,v file
# - rlog-sha256 Checksum of "rlog output" (IE. sha256 of the rlog of
#   filename)
# - head-sha256 Checksum of the "HEAD" version of the file

# TODO's:
# - Check if $TMPDIR has enough space

REPO="/home/ncvs"
VENDOR="freebsd"
COLLECTION="${VENDOR}-repo"
PUBKEY_KEYPRINT="25cbedae4cdf497e896c92336134276b8bc9911ac716a7fcc3ab153772bf54a4"
UPLOAD_PATH="oobfv:public_rsync"

SEP=";"
DBDIR="/var/db/oobfv"
LASTTS="lastrun" # Timestamp of last run
TMPTS="currun" # Timestamp of start of current run
CVSUP_LOGS="/var/log/cvsup.log*"

VERBOSE="NO"
USE_HG="YES"
HG="hg"
HG_CMD="${HG} --noninteractive"

# "Dependent" variables...
SUMFILE="${COLLECTION}-sums"
PRIVKEY="${DBDIR}/key-${VENDOR}"
PUBKEY="${DBDIR}/key-${VENDOR}.pub"
DISTSITE="rsync://oobfv.nitro.dk/oobfv"

# No surprises, thanks
LC_ALL="C"; export LC_ALL

###############
# Common / generic operations

# Debug info, only printed in verbose mode
debug() {
    if [ "$VERBOSE" = "YES" ]; then
	echo "$@" 1>&2
    fi
}

# Info messages
info() {
    echo "$@" 1>&2
}

# Errors
err() {
    echo "$@" 1>&2
}

# Errors which we can not / should not recover from
fatal() {
    err "Fatal:" "$@"
    exit 2
}

# Create signature for a file
sign_create() {
    local SUM SIGFILE SIGLINE SIGDATE
    SIGDATE=$1
    F=$2
    SIGFILE=$3

    [ ! -r "${F}" ] && fatal "File to sign ${F} is not readable"
    [ -z "$SIGDATE" ] && fatal "Signature date not specified"
    [ -z "$SIGFILE" ] && fatal "Signature file not specified"

    debug "Generating sha256 for ${F}"
    SUM=$(sha256 -q ${F})

    SIGLINE="oobfv|${SIGDATE}|SHA256|${SUM}"
    echo ${SIGLINE} | openssl rsautl -inkey ${PRIVKEY} -sign -out ${SIGFILE}
    [ $? -ne 0 ] && fatal "Could not sign ${F}"
}

sign_extract() {
    local SUM
    F=$1

    SIGINFO=$(openssl rsautl -pubin -inkey ${PUBKEY} -verify -in ${F})
    [ $? -ne 0 ] && fatal "Could not get signature sum from ${F}"
    echo $SIGINFO
}

# Check a signature on a file
sign_check() {
    local SUM
    F=$1

    SIGSUM=$(openssl rsautl -pubin -inkey ${PUBKEY} -verify -in ${F}.sig)
    [ $? -ne 0 ] && fatal "Could not get signature sum from ${F}.sig"
    debug "Generating sha256 for ${F}"
    SUM=$(sha256 -q ${F})

    [ "${SUM}" != "${SIGSUM}" ] && fatal "Invalid checksum for ${F}"
}

#
do_cvsuplog_repo_run() {
    CMD="$1"; shift
    # CLOG="$@"

    cd ${REPO}
    rdirs=$(echo * | tr ' ' \|)
    cd -

    for file ; do
	sed -Ee '/^[^ ]/d' -e '/^  /d' -e 's/^ [^ ]+ (to )?//' -e 's/ -> Attic$//' $1 | \
	    grep -E "^(${rdirs}).*,vcd " | while read file; do
	    $CMD $file
	done
	shift
    done
}

# Run a command on all files in the repository
do_repo_run() {
    local CMD FOPT DIR

    CMD="$1"
    FOPTS="$2"

    if [ -n "$PARTIALDIR" ]; then
	DIR="./${PARTIALDIR}"
    else
	DIR="."
    fi

    (
	cd ${REPO}
	set -f noglob

	debug "Running find on ${REPO} with opts '${FOPTS}'"
	find -E $DIR -name '*,v' ${FOPTS} | sed -Ee 's#^./##' | while read file; do
	    $CMD $file
	done
    )
}

########################################################################
# Misc operations

run_sanity_checks() {
    if [ ! -d "${DBDIR}" ]; then
	fatal "${DBDIR} does not exist"
    fi

    if [ "${USE_HG}" = "YES" -a ! -x "`which ${HG}`" ]; then
	fatal "USE_HG set, but HG could not be found"
    fi
}

########################################################################
# Sumfile operations

# Bootstrap export
sumfile_export0() {
    [ ! -r ${PUBKEY} ] && fatal "Cannot read public key ${PUBKEY}"
    info "Uploading public key..."
    rsync -L ${PUBKEY} ${UPLOAD_PATH}/`basename ${PUBKEY}`
    [ $? != 0 ] && fatal "Cannot upload public key"
}

sumfile_export() {
    EXPTIME=$(date +%s)

    EXPSUMFILE=${SUMFILE}-${EXPTIME}
    rm -f ${EXPSUMFILE}.bz2
    bzip2 -9 < ${SUMFILE} > ${EXPSUMFILE}.bz2
    [ $? -ne 0 -o ! -r ${EXPSUMFILE}.bz2 ] && fatal "Error compressing ${SUMFILE}"
    ln -s ${SUMFILE} ${EXPSUMFILE}
    [ $? -ne 0 ] && fatal "Error linking ${SUMFILE} ${EXPSUMFILE}"

    sign_create $EXPTIME ${EXPSUMFILE} ${COLLECTION}-latest.ssl
    sign_create $EXPTIME ${EXPSUMFILE}.bz2 ${COLLECTION}-latest-bz2.ssl

    # Hack for now...
    chmod 644 ${SUMFILE}

    info "Uploading..."
    rsync -L ${EXPSUMFILE} ${EXPSUMFILE}.bz2 ${UPLOAD_PATH}
    rsync ${COLLECTION}-latest.ssl ${COLLECTION}-latest-bz2.ssl ${UPLOAD_PATH}
    # tar cf ${SUMFILE}.tar ${SUMFILE}.bz2 ${SUMFILE}.bz2.sig

    # Cleanup
    rm -f ${EXPSUMFILE}.bz2
    rm -f ${EXPSUMFILE} ${COLLECTION}-latest.ssl ${COLLECTION}-latest-bz2.ssl
}

sumfile_checksig() {
    sign_check ${SUMFILE}
}

publickey_verify() {
    local SUM

    [ ! -r "${PUBKEY}" ] && fatal "Cannot read public key ${PUBKEY}"

    SUM=`sha256 -q ${PUBKEY}`
    [ "${PUBKEY_KEYPRINT}" != "$SUM" ] && \
	fatal "Fingerprint for public key is incorrect"
    debug "Verified public key"
}

publickey_fetch() {
    local PUBKEY_URL

    [ -r ${PUBKEY} ] && return

    PUBKEY_URL="${DISTSITE}/`basename ${PUBKEY}`"
    info "Fetching public key ${PUBKEY_URL}..."
    rsync -L ${PUBKEY_URL} ${PUBKEY}
    publickey_verify
}

# XXX, I think read could be used to parse sigline
sumfile_fetch() {
    SIG_BINF=$(mktemp ${DISTFILE}.XXXXXX)
    SIG_URL="${DISTSITE}/${COLLECTION}-latest.ssl"


    # Fetch signature
    info "Fetching signature ${SIG_URL}"
    rsync -q ${SIG_URL} ${SIG_BINF}
    if [ $? != 0 ]; then
	rm -f ${SIG_BINF}
	fatal "Could not download signature file from ${SIG_URL}"
    fi

    # Check/extract signature
    SIGINFO=$(sign_extract ${SIG_BINF})
    [ $? != 0 ] && fatal "Invalid signature from ${SIG_URL}"
    rm -f ${SIG_BINF}
    # Format: SIGLINE="oobfv|${SIGDATE}|SHA256|${SUM}"
    SIG_MAGIC=$(echo $SIGINFO | cut -f 1 -d \|)
    if [ "$SIG_MAGIC" != oobfv ]; then
	fatal "Bad signature file; magic oobfv != $SIG_MAGIC"
    fi
    SIG_DATE=$(echo $SIGINFO | cut -f 2 -d \|)
    SIG_MD_TYPE=$(echo $SIGINFO | cut -f 3 -d \|)
    SIG_SUM=$(echo $SIGINFO | cut -f 4 -d \|)

    SUMFILE_RPATH="${DISTSITE}/${COLLECTION}-sums-${SIG_DATE}"
    NSUMFILE="${DBDIR}/${SUMFILE}.${SIG_DATE}"

    # Fetch the sum collection
    info "Fetching sum collection ${SUMFILE_RPATH}..."
    rsync ${SUMFILE_RPATH} ${NSUMFILE}

    # In theory we might have other message-digest types
    case "${SIG_MD_TYPE}" in
	SHA256)
	    NSUMFILE_SUM=`sha256 -q < ${NSUMFILE}`
	    ;;
	*)
	    fatal "Unsupported checksum type ${SIG_MD_TYPE}"
	    ;;
    esac

    [ "${NSUMFILE_SUM}" != "${SIG_SUM}" ] && \
	fatal "Invalid checksum on fetched sums file"

    debug "Signature on sumfile verified successfully"
}

########################################################################
# Sumfile generation operations

sum_file() {
    F=$1
    FF=${REPO}/${F}

    [ ! -r $FF ] && fatal "Could not read ,v file $FF"

    debug "Processing $F"

    # We need to string "RCS file:"
    Vs=$(rlog $FF | sha256 -q)
    [ $? -ne 0 ] && fatal "Failed to generate rlog checksum for $FF ($?)"
    Fs=$(sha256 -q $FF)
    [ $? -ne 0 ] && fatal "Failed to generate checksum for $FF ($?)"
    Hs=$(co -ko -q -p $FF | sha256 -q)
    [ $? -ne 0 ] && fatal "Failed to generate checksum for $FF HEAD ($?)"
    Ms=$(stat -f %m $FF)
    [ $? -ne 0 ] && fatal "Failed to modification time for $FF ($?)"

    echo "${F}${SEP}${Fs}${SEP}${Vs}${SEP}${Hs}${SEP}${Ms}"
}

# Generate a file header
gen_hdr() {
    # Note that the will be sorted.
    echo "# Generated `date` on `hostname`"
    echo "% fmt=filename${SEP}file-sha256${SEP}rlog-sha256${SEP}head-sha256${SEP}modtime"
    echo "% gendate=`date +%s`"
}

# Create sums for a CVS repository
create_repo_sums() {
    tmpfile_gen=$(mktemp -t tmpsum-gen)
    [ $? -ne 0 ] && fatal "Error generating temporary file"
    tmpfile_merged=$(mktemp -t tmpsum-merged)
    [ $? -ne 0 ] && fatal "Error generating temporary file"

    if [ "${INCREMENTAL}" = "YES" -a -e ${LASTTS} ]; then
	FINDOPTS="-newer `realpath ${LASTTS}`"
    fi
    if [ -n "${FILTER}" ]; then
	FINDOPTS="${FINDOPTS} -regex ${FILTER}"
    fi

    # We record the start of our run since that's the timestamp which
    # should be used for the next incremental run.
    touch ${TMPTS}

    # Start actual sumfule generation.
    gen_hdr | sort > ${tmpfile_gen}
    do_repo_run sum_file "${FINDOPTS}" | sort >> ${tmpfile_gen}
    if [ $? -ne 0 ]; then
	rm -f ${TMPTS} ${tmpfile_gen} ${tmpfile_merged}
	fatal "Error running summing repository"
    fi

    # Merge new sums with old sumfile, if needed
    if [ -e ${SUMFILE} ]; then
	debug "Merging new and old sumfiles ${tmpfile_gen} ${SUMFILE}"
	# We ignore all "comment" lines in the original file so they
	# can be overwritten by the new file.
	grep -E '^[^#%]' ${SUMFILE} | sort -m - ${tmpfile_gen} | uniq \
	    > ${tmpfile_merged}
	if [ $? -ne 0 ]; then
	    rm -f ${tmpfile_gen} ${tmpfile_merged}
	    fatal "Failed to merge $tmpfile_gen and $SUMFILE to $tmpfile_merged"
	fi

        # Store the old file (at least for now)
	mv ${SUMFILE} ${SUMFILE}.`date +%s`

	sumfile_new="${tmpfile_merged}"
    else
	sumfile_new="${tmpfile_gen}"
    fi

    debug "Installing new sumfiles ${sumfile_new} as ${SUMFILE}"
    mv ${sumfile_new} ${SUMFILE}
    if [ $? -ne 0 ]; then
	rm -f ${tmpfile_gen} ${tmpfile_merged}
	fatal "Failed to install ${sumfile_new} as $SUMFILE"
    fi
    if [ -e "${tmpfile_gen}" ]; then
	rm ${tmpfile_gen}
    fi

    # Touch our timestamp know that we know we installed new database.
    touch -r ${TMPTS} ${LASTTS}
}

vcs_init() {
    # Currently this is just hg...

    if [ "${USE_HG}" != "YES" ]; then
	return
    fi
    if [ ! -d ${DBDIR}/.hg ]; then
	info "hg repository not found, initializing"
	(
	    cd ${DBDIR}
	    ${HG_CMD} init
	    ${HG_CMD} add ${SUMFILE}
	)
    fi
}

vcs_commit() {
    if [ "${USE_HG}" = "YES" ]; then
	(
	    cd ${DBDIR}
	    h=$(head -n 1 ${SUMFILE} | cut -c 3-)
	    ${HG_CMD} commit -m "Auto commit: ${h}" ${SUMFILE}
	)
    fi
}

# Create sums for a CVS repository
create_sumfile() {
    create_repo_sums
    vcs_init
    vcs_commit
}

########################################################################
# Check mode

# Create status files and prepare them so we can just cat(1) them to
# the user.
init_result_tmpfiles() {

    # Mismatched files which we know nothing about.
    FMISMATCH_UNK=$(mktemp -t mismatch_unk)

    # Mismatched files which where our sum info is out-of-date (IE. rlog
    # | sha256 is unknown).
    FMISMATCH_OLD=$(mktemp -t mismatch_old)

    # Files which doesn't match our sums.
    FMISMATCH_BAD=$(mktemp -t mismatch_bad)

    # Files which doesn't match our sums for ,v file, but HEAD version is
    # OK.
    FMISMATCH_BAD_HEADOK=$(mktemp -t mismatch_bad_HEADOK)

    FTMPALL="${FMISMATCH_UNK} ${FMISMATCH_OLD} ${FMISMATCH_BAD}"
    FTMPALL="${FTMPALL} ${FMISMATCH_BAD_HEADOK}"

    return
}

# Check if a file is valid - result will be put in $FMISMATCH_UNK etc.
# init_result_tmpfiles() must have ben called before this function.
#
# This function is optimized for the common case, IE. that we have a
# valid checksum which means that we in the common case can skip a
# bunch of operations (forks).
check_file() {
    local l r

    F=$1
    FF=${REPO}/${F}

    # Sanity checks
    if [ -z "${FMISMATCH_UNK}" ]; then
	fatal "Internal error: init_result_tmpfiles() not called"
    fi
    [ ! -r $FF ] && fatal "Could not read ,v file $FF"

    # First we check if we already have the sha1 of the file "on record"
    Fs=$(sha256 -q $FF)
    [ $? -ne 0 ] && fatal "Failed to generate RCS file checksum for $FF ($?)"
    l=$(look "${F}${SEP}${Fs}${SEP}" ${RSUMFILE})
    r=$?
    if [ $r -eq 0 ]; then
	debug "OK  : $F"
	# We found our file, no reason to go any further
	return
    elif [ $r -gt 1 ]; then
	fatal "Failed to search sumdb for $F ($r)"
    fi

    # The sumfile did not have an correct entry for the file, now we
    # will try to give a more useful answer of why that is.

    # Check if we know anything about $F in the first place...
    l=$(look "${F}${SEP}" ${RSUMFILE})
    r=$?
    [ $r -gt 1 ] && fatal "Failed to search sumdb for $F ($r)"
    if [ $? -eq 0 ]; then
	debug "WARN: Nothing is know about $F"
	echo "${F}" >> ${FMISMATCH_UNK}
	return
    fi

    Vs=$(rlog $FF | sha256 -q)
    [ $? -ne 0 ] && fatal "Failed to generate rlog checksum for $FF ($?)"

    # Search for an entry for $F with our local rlog checksum - if we
    # don't find that it's most likely that the sumfile simply is out
    # of date.
    lg=$(echo $l | grep -E "${F}${SEP}[^${SEP}]+${SEP}${Vs}")
    if [ $? -ne 0 ]; then
	debug "WARN: Could not find up-to-date entry for $F"
	echo "${F}" >> ${FMISMATCH_OLD}
	return
    fi

    Hs=$(co -ko -q -p $FF | sha256)
    [ $? -ne 0 ] && fatal "Failed to generate HEAD checksum for $FF ($?)"

    SIFS="$IFS"
    IFS="${SEP}"
    # XXX TODO: No need to loop here
    echo "$lg" | while read FILE FFs FVs FHs; do
	if [ "$Hs" = "$FHs" ]; then
	    echo "$F" >> ${FMISMATCH_BAD_HEADOK}
	    debug "ERR : $F (but HEAD OK)"
	else
	    echo "$F" >> ${FMISMATCH_BAD}
	    debug "ERR : $F"
	fi
    done
    IFS="$SIFS"
}

check_sums() {

    init_result_tmpfiles

    if [ -n "${FILTER}" ]; then
	FINDOPTS="-regex ${FILTER}"
    fi

    if [ "${CVSUP_INCR}" = "YES" ]; then
	#do_cvsuplog_repo_run check_file ${CVSUP_LOGS}
	do_cvsuplog_repo_run echo ${CVSUP_LOGS}
    else
	do_repo_run check_file "${FINDOPTS}"
    fi

    # Report to user
    # XXX Should be improved, e.g. use less
    if [ -s "${FMISMATCH_BAD}" ]; then
	info "The following files were found in the local repository, but "
	info "nothing was known about them in the repository checksum file:"
	cat ${FMISMATCH_BAD}
	FOUNDPROB="YES"
    fi
    if [ -s "${FMISMATCH_BAD_HEADOK}" ]; then
	info "The following files had ,v but not HEAD checksum mismatch"
	cat ${FMISMATCH_BAD_HEADOK}
	FOUNDPROB="YES"
    fi
    if [ -s "${FMISMATCH_UNK}" ]; then
	info "The following files had ,v and HEAD checksum mismatch"
	cat ${FMISMATCH_UNK}
	FOUNDPROB="YES"
    fi
    if [ -s "${FMISMATCH_OLD}" ]; then
	info "The following files were found but the checksum info for "
	info "the files was out-of-date:"
	cat ${FMISMATCH_OLD}
	FOUNDPROB="YES"
    fi

    if [ -z "${FOUNDPROB}" ]; then
	info "Did not find any problems in checked repository"
    fi

    # Cleanup
    rm -f ${FTMPALL}
}


########################################################################
# Command line parsing / main

args=`getopt civf:p:FEGCS $*`
if [ $? -ne 0 ]; then
    echo "Usage: [-vS] [-f filter] <-C|-G>"
    exit 64
fi
set -- $args

for i; do
    case "$i" in
	-G)
	    GENERATE="YES"
	    shift
	    ;;
	-c)
	    CVSUP_INCR="YES"
	    shift
	    ;;
	-C)
	    CHECK="YES"
	    shift
	    ;;
	-E)
	    EXPORT="YES"
	    shift
	    ;;
	-F)
	    FETCH="YES"
	    shift
	    ;;
	-f)
	    FILTER="$2"
	    shift
	    shift
	    ;;
	-p)
	    PARTIALDIR="$2"
	    shift; shift
	    ;;
	-i)
	    INCREMENTAL="YES"
	    shift
	    ;;
	-v)
	    VERBOSE="YES"
	    shift
	    ;;
	--)
	    shift
	    break
	    ;;
    esac
done

run_sanity_checks
RSUMFILE="${DBDIR}/${SUMFILE}"
cd ${DBDIR}

if [ -n "${GENERATE}" ]; then
    create_sumfile
fi
if [ "${EXPORT}" = "YES" ]; then
    sumfile_export0
    sumfile_export
fi
if [ "${FETCH}" = "YES" ]; then
    publickey_fetch
    publickey_verify
    sumfile_fetch
fi
if [ -n "${CHECK}" ]; then
    check_sums
fi


