#!/bin/sh
# $Id: indent-html,v 1.20 2022/10/01 13:48:41 tom Exp $
# vi:ts=4 sw=4
# -----------------------------------------------------------------------------
# Copyright 2011-2021,2022 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------

LANG=C; export LANG
LC_ALL=C; export LC_ALL
LC_CTYPE=C; export LC_CTYPE

: ${FGREP:=grep -F}

doit=yes
show=no
form=-i
wide=
temp=$(mktemp -d)
trap "rm -rf $temp" 0 1 2 3 15

output=$temp/output
config=$temp/config
script=$temp/script

case $(tidy -v 2>/dev/null | head -n 1) in
*version?[1234]*)
	touch $config
	;;
*)
	cat >$config <<EOF
vertical-space: yes
EOF
	cat >$script <<'EOF'
BEGIN { last = ""; }
/^$/ {
	if ( $0 != last ) print last;
	last = $0;
	next;
}
/^[ \t]*<\/(p|li|h[1-9])>$/ {
	if ( match(last, "<[/]a>$") ) {
		gsub("^[ \t]*","");
		last = last $0;
		next;
	}
}
{
	if ( NR > 1 ) {
		print last;
	}
	last = $0;
}
END {
	if ( last != "" ) print last;
}
EOF
	;;
esac

usage() {
	cat <<EOF
usage: $0 [options] [html-files]

options:
  -i   indent (default)
  -n   no-op
  -U   text uses UTF-8; use that in the charset
  -u   unindent (overrides -i)
  -v   verbose, showing diff
  -w   wrap at 132 columns (default: 80)
EOF
	exit 1
}

set -- `getopt 'inUuvw' $*`
if test $? != 0 || test $# = 1
then
	usage
fi

UTF8=no
for name in $*
do
	case $name in
	-i)
		form=-i
		;;
	-n)
		doit=no
		;;
	-U)
		UTF8=yes
		;;
	-u)
		form="-wrap 4096"
		;;
	-v)
		show=yes
		;;
	-w)
		wide="-wrap 132"
		;;
	--)
		;;
	*.htm|*.html|*.html.in)
		DTD="--doctype strict"
		$FGREP '<font color='  $name >/dev/null && DTD=
		$FGREP '<frameset'     $name >/dev/null && DTD=
		$FGREP '<base target=' $name >/dev/null && DTD=

		opts=
		what=$(file "$name")
		if [ $UTF8 = yes ]
		then
			opts="-utf8"
		else
			case "$what" in
			$name:*XML*)
				opts="-utf8"
				;;
			*)
				opts="-ascii"
				;;
			esac
		fi

		if [ -s $config ]
		then
			tidy -config $config $opts $DTD $wide $form < $name  2>/dev/null | \
				awk -f $script >$output
		else
			tidy $opts $DTD $wide $form < $name  2>/dev/null >$output
		fi
		if cmp -s $name $output
		then
			test $show = yes && echo "... unchanged $name"
		else
			test $show = yes && diff -u $name $output | sed -e "s,$output,UPDATE/${name##*/},"
			test $doit = yes && copy -v $output $name
		fi
		rm -f $output
		;;
	*)
		echo "... skip $name"
		;;
	esac
done
