#!/bin/sh
# $Id: indent-html,v 1.18 2020/01/15 01:03:44 tom Exp $
# vi:ts=4 sw=4

LANG=C; export LANG
LC_ALL=C; export LC_ALL
LC_CTYPE=C; export LC_CTYPE

doit=yes
show=no
form=-i
wide=
temp=$(mktemp -d)
trap "rm -rf $temp" 0 1 2 3 15

output=$temp/output
config=$temp/config
script=$temp/script

case $(tidy -v 2>/dev/null | head -n 1) in
*version?[1234]*)
	touch $config
	;;
*)
	cat >$config <<EOF
vertical-space: yes
EOF
	cat >$script <<'EOF'
BEGIN { last = ""; }
/^$/ {
	if ( $0 != last ) print last;
	last = $0;
	next;
}
/^[ \t]*<\/(p|li|h[1-9])>$/ {
	if ( match(last, "<[/]a>$") ) {
		gsub("^[ \t]*","");
		last = last $0;
		next;
	}
}
{
	if ( NR > 1 ) {
		print last;
	}
	last = $0;
}
END {
	if ( last != "" ) print last;
}
EOF
	;;
esac

usage() {
	cat <<EOF
usage: $0 [options] [html-files]

options:
  -i   indent (default)
  -n   no-op
  -u   unindent (overrides -i)
  -v   verbose, showing diff
  -w   wrap at 132 columns (default: 80)
EOF
	exit 1
}

set -- `getopt 'inuvw' $*`
if test $? != 0 || test $# = 1
then
	usage
fi

for name in $*
do
	case $name in
	-i)
		form=-i
		;;
	-n)
		doit=no
		;;
	-u)
		form="-wrap 4096"
		;;
	-v)
		show=yes
		;;
	-w)
		wide="-wrap 132"
		;;
	--)
		;;
	*.htm|*.html|*.html.in)
		DTD="--doctype strict"
		fgrep '<font color=' $name >/dev/null && DTD=
		fgrep '<frameset' $name >/dev/null && DTD=
		fgrep '<base target=' $name >/dev/null && DTD=

		opts=
		what=$(file "$name")
		case "$what" in
		$name:*XML*)
			opts="-utf8"
			;;
		*)
			opts="-ascii"
			;;
		esac

		if [ -s $config ]
		then
			tidy -config $config $opts $DTD $wide $form < $name  2>/dev/null | \
				awk -f $script >$output
		else
			tidy $opts $DTD $wide $form < $name  2>/dev/null >$output
		fi
		if cmp -s $name $output
		then
			test $show = yes && echo "... unchanged $name"
		else
			test $show = yes && diff -u $name $output | sed -e "s,$output,UPDATE/${name##*/},"
			test $doit = yes && copy -v $output $name
		fi
		rm -f $output
		;;
	*)
		echo "... skip $name"
		;;
	esac
done
