#! /usr/bin/ksh
#
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source.  A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright 2022 Marcel Telka
#


function usage
{
	[[ -n "$1" ]] && printf "ERROR: %s\n\n" "$1" >&2
	printf "Usage: license-detector [-d] [-l LICENSE] LICENSE_FILE\n" >&2
	[[ -n "$1" ]] && exit 1
	exit 0
}


DEBUG=0
LICENSE="*"
while getopts ":hdl:" OPT ; do
	case "$OPT" in
	"?"|"h")	usage ;;
	"d")		DEBUG=1 ;;
	"l")		LICENSE="*.$OPTARG" ;;
	esac
done
shift $((OPTIND - 1))

(($# == 0)) && usage
(($# > 1)) && usage "Too many arguments"

LICENSE_FILE="$1"
[[ -e "$LICENSE_FILE" ]] || usage "$LICENSE_FILE not found"
[[ -d "$LICENSE_FILE" ]] && usage "$LICENSE_FILE is directory"
[[ -r "$LICENSE_FILE" ]] || usage "Unable to read $LICENSE_FILE"

WS_TOOLS=$(dirname $0)


F="$LICENSE_FILE"

if grep -q -i "Artistic License" "$F" ; then
	if ! grep -q -i "Artistic License.*2" "$F" ; then
		D=
		grep -q "7\. C subroutines" "$F" && grep -q "10\. THIS PACKAGE IS PROVIDED" "$F" && D="Artistic-1.0-Perl"
		grep -q "7\. C or perl subroutines" "$F" && grep -q "10\. THIS PACKAGE IS PROVIDED" "$F" && D="Artistic-1.0-cl8"
		grep -q "7\. C or perl subroutines" "$F" && grep -q "9\. THIS PACKAGE IS PROVIDED" "$F" && D="Artistic-1.0"
	else
		D="Artistic-2.0"
	fi
	[[ -n "$L" && -n "$D" ]] && L="$L OR " ; L="$L$D"
fi

if grep -A 1 "GNU GENERAL PUBLIC LICENSE" "$F" | grep -q "Version 1, February 1989" ; then
	D="GPL-1.0-only"
	grep -A 2 "GNU General Public License as published by the" "$F" | grep -q "or (at your option) any" && D="GPL-1.0-or-later"
	[[ -n "$L" ]] && L="$L OR " ; L="$L$D"
fi

TMPFILE=$(mktemp -q)
[[ -z "$TMPFILE" ]] && printf "ERROR: Temporary file creation failed\n" >&2 && exit 1

typeset -A matched
for l in "$WS_TOOLS"/licenses/$LICENSE ; do
	[[ -f "$l" ]] || continue
	# skip filters
	[[ "$l" != "${l%.filter}" ]] && continue

	# extract license identifier
	license_id="${l##*/}"
	license_id="${license_id#header.}"
	license_id="${license_id#license.}"
	# sanity check, this should never happen
	[[ -z "$license_id" ]] && continue

	# make sure we do not match one license twice
	[[ -n "$matched[$license_id]" ]] && continue

	cat <<#EOF > "$TMPFILE"
		dos2unix -ascii \\
			| tr -d '\\014' 2>/dev/null \\
			| LC_ALL=C sed -E -e 's/^[[:space:]]+\$//g' \\
			| awk '/^#/{next}/^\$/{\$0="\n"}1' ORS=' ' \\
			| LC_ALL=C sed -E -e 's/[[:space:]]+/ /g' -e 's/^ //' -e 's/ \$//' -e '/^\$/d' \\
	EOF
	# Remove some reStructuredText markup
	if [[ "${F%.rst}" != "$F" ]] ; then
		cat <<#EOF >> "$TMPFILE"
				| sed -e '/^\*\*\$/d' -e 's/^\*\*\([^*]\)/\1/' -e 's/\([^*]\)\*\*\$/\1/' -e 's/\([^*]\)\*\*\([^*]\)/\1\2/g' \\
		EOF
	fi
	# Punctuation - Quotes
	printf '\t| sed -e %cs/“/"/g%c -e %cs/”/"/g%c \\\n' "'" "'" "'" "'" >> "$TMPFILE"
	# Apply filter if any
	[[ -x "$l.filter" ]] && printf '\t| LC_ALL=C %s \\\n' "$l.filter" >> "$TMPFILE"
	cat <<#EOF >> "$TMPFILE"
			| LC_ALL=C tr '[:upper:]' '[:lower:]' \\
			| sed -e 's|http://|https://|g' \
			| tr ' ' '\\n' | fmt
	EOF

	REDIRECT="/dev/null"

	if ((DEBUG)) ; then
		REDIRECT="/dev/stdout"
		printf "[DBG] TEMPLATE %s\n" "${l##*/}"
		. "$TMPFILE" < "$l"
		printf "[DBG] FILE\n"
		. "$TMPFILE" < "$F"
		printf "[DBG] DIFFS\n"
	fi

	diff -i <(. "$TMPFILE" < "$l") <(. "$TMPFILE" < "$F") > "$REDIRECT" || continue

	matched[$license_id]="$l"

	[[ -n "$L" ]] && L="$L OR "
	L="$L$license_id"
done

rm -f "$TMPFILE"

[[ -n "$L" ]] && printf "%s\n" "$L"