#!/usr/bin/env perl
# $Id: check-manpage,v 1.70 2022/07/14 19:51:01 tom Exp $
# -----------------------------------------------------------------------------
# Copyright 2002-2021,2022 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------
# Scan a directory tree, looking for nroff (man/ms) files, to check their
# syntax as well as verify that their macros are consistent.

# If checknr were portable and handled manpages, it would be useful.

# TODO: utp notes that "echo .pm |nroff -man" gives list of predefinitions
# that could be filtered.  Solaris nroff gives a useful list with the
# predefined characters, groff - not so useful.  To get that, start with
# "man groff_char" for a list.
#
# For example, this is groff syntax
#	=       \[==]        equivalence     u2261       +
# but utp would say this:
#	=       \(==         identically equal
#
# Solaris recognizes these,
#	\(`` \('' \(** \(aa \(*b \(br \(bs \(bu \(da \(de \(dg \(em \(ga
#	\(hy \(lq \(mi \(or \(pd \(rg \(rn \(rq \(sl \(su \(ts \(ua \(ul
# since they are used in macros under
#	/usr/share/lib/tmac/an
#
# but the device tables (in the "charset" sections) recognize far more.
#
# Also, the traditional escapes mentioned in groff_char appear to work:
#	They include `\\', `\'', `\`', `\-', `\.', and `\e'; see groff(7).

use warnings;
use strict;
use diagnostics;

use Getopt::Std;

$| = 1;

our ( $opt_d, $opt_r, $opt_v, $opt_w, $opt_x );
our %predef;
our @predef = (
    ".de bP",
    '.ie n  .IP \(bu 4',
    '.el    .IP \(bu 2',
    '..',
    '.de NE',
    '.fi',
    '.ft R',
    '.ie n  .in -4',
    '.el    .in -2',
    '..',
    '.de NS',
    '.ie n  .sp',
    '.el    .sp .5',
    '.ie n  .in +4',
    '.el    .in +2',
    '.nf',
    '.ft C			\" Courier',
    '..',
    ".ie \\n(.g .ds AQ \\(aq",
    ".el       .ds AQ '",
    ".ie \\n(.g .ds `` \\(lq",
    ".el       .ds `` ``",
    ".ie \\n(.g .ds '' \\(rq",
    ".el       .ds '' ''",

    # ded.man
    '.de Es',
    '.ne \\\\$1',
    '.nr mE \\\\n(.f',
    '.RS 5n',
    '.sp .7',
    '.nf',
    '.nh',
    '.ta 9n 17n 25n 33n 41n 49n',
    '.ft CW',
    '..',
    '.de Eh',
    '.ft \\\\n(mE',
    '.fi',
    '.hy \\\\n(HY',
    '.RE',
    '.sp .7',
    '..',

    # dialog.1, dialog.3
    ".de ES",
    ".ne 8",
    ".IP",
    "..",
    ".de Ex",
    ".RS +7",
    ".PP",
    ".nf",
    ".ft CW",
    "..",
    ".de Ee",
    ".fi",
    ".ft R",
    ".RE",
    "..",

    # xterm.man
    ".de iP",
    ".br",
    ".if n .sp",
    "..",

    # cdk
    ".de It",
    ".br",
    '.ie \\\\n(.$>=3 .ne \\\\$3',
    ".el .ne 3",
    '.IP "\\\\$1" \\\\$2',
    "..",
    ".de XX",
    "..",

    # libXcursor
    ".de TA",
    ".ie n  .ta 0.8i 1.6i 2.4i 3.2i",
    ".el    .ta 0.5i 1.0i 1.5i 2.0i",
    "..",
    ".de PS",
    ".sp",
    ".ns",
    ".TP \\\\\$1",
    ".na",
    ".nf",
    ".ie n  .ta 0.8i 3.0i",
    ".el    .ta 0.5i 2.0i",
    "..",
    ".de PE",
    ".br",
    ".ad",
    ".fi",
    ".sp",
    ".TA",
    "..",
    ".de QS",
    ".in +.2i",
    ".nf",
    ".na",
    ".ie n  .ta 1.0i 3.0i",
    ".el    .ta 0.6i 2.0i",
    "..",
    ".de QC",
    ".QS",
    ".ie n  .ta 2.0i 3.0i",
    ".el    .ta 1.6i 2.6i",
    ".ft CR",
    "..",
    ".de QE",
    ".in -.2i",
    ".ft",
    ".fi",
    ".ad",
    ".TA",
    "..",
);

# man2html
our @predef_m2h = (

    # OPTION FLAG MACRO		.Of -x [arg]
    '.de Of',
    '.ie \\\n(.$==1      \%[\|\fB\\\$1\fR\|]',
    '.el .if \\\n(.$==2  \%[\|\fB\\\\$1\fR\0\fI\fI\\\\$2\fR\|]',
    '..',

    # SYNOPSIS START MACRO	.Ss name
    '.de Ss',
    '.na',
    '.nr aA \w\\\\$1\\\\0u',
    '.in +\\\\n(aAu',
    '\'ti -\\\\n(aAu',
    '.ta  \\\\n(aAu',
    '\&\fB\\\\$1\fR\t\c',
    '..',

    # SYNOPSIS END MACRO		.Se
    '.de Se',
    '.ad',
    '.in',
    '..',

    # bullet consistently narrow
    '.de b2',
    '.ie n  .IP \(bu 2',
    '.el    .IP \(bu 2',
    '..',
);

# libX11
our $assume_X11 = 0;
our @predef_X11 = (
    '.de Ds',
    '.nf',
    '.\\\\$1D \\\\$2 \\\\$1',
    '.ft CW',
    '.\\".ps \\\\n(PS',
    '.\\".if \\\\n(VS>=40 .vs \\\\n(VSu',
    '.\\".if \\\\n(VS<=39 .vs \\\\n(VSp',
    '..',
    '.de De',
    '.ce 0',
    '.if \\\\n(BD .DF',
    '.nr BD 0',
    '.in \\\\n(OIu',
    '.if \\\\n(TM .ls 2',
    '.sp \\\\n(DDu',
    '.fi',
    '..',
    '.de IN		\\" send an index entry to the stderr',
    '..',
    '.de Pn',
    '.ie t \\\\$1\\fB\\^\\\\$2\\^\\fR\\\\$3',
    '.el \\\\$1\\fI\\^\\\\$2\\^\\fP\\\\$3',
    '..',
    '.de ZN',
    '.ie t \\fB\\^\\\\$1\\^\\fR\\\\$2',
    '.el \\fI\\^\\\\$1\\^\\fP\\\\$2',
    '..',
    '.de hN',
    '.ie t <\\fB\\\\$1\\fR>\\\\$2',
    '.el <\\fI\\\\$1\\fP>\\\\$2',
    '..',

    # adapted from NS/NE, probably different from groff
    '.de EX',
    '.sp',
    '.nf',
    '.ft CW',
    '..',
    '.de EE',
    '.ft R',
    '.fi',
    '.sp',
    '..',
);

# libXt
our $assume_Xt = 0;
our @predef_Xt = (
    '.de De',
    '.ce 0',
    '.fi',
    '..',
    '.de Ds',
    '.nf',
    '.in +0.4i',
    '.ft CW',
    '..',
    '.de IN		\" send an index entry to the stderr',
    '..',
    '.de Pn',
    '.ie t \\\$1\fB\^\\\$2\^\fR\\\$3',
    '.el \\\$1\fI\^\\\$2\^\fP\\\$3',
    '..',
    '.de ZN',
    '.ie t \fB\^\\\$1\^\fR\\\$2',
    '.el \fI\^\\\$1\^\fP\\\$2',
    '..',
    '.de ny',
    '..',
);

sub read_file($) {
    my $path = shift;
    my @result;
    if ( open my $fh, $path ) {
        @result = <$fh>;
        close $fh;
        for my $n ( 0 .. $#result ) {
            chomp $result[$n];
        }
    }
    return @result;
}

sub IsComment($) {
    my $text   = shift;
    my $result = 0;
    $result = 1 if ( $text =~ /^\s*\.\s*\\"/ );
    return $result;
}

sub IsCommand($) {
    my $text   = shift;
    my $result = 0;
    $result = 1 if ( $text =~ /^\.\s*[[:alpha:]][[:alnum:]]([^[:alnum:]])?/ );
    return $result;
}

sub CommandName($) {
    my $text = shift;
    $text =~ s/^.\s*([[:alpha:]][[:alnum:]]).*/$1/;
    return $text;
}

sub CommandArgs($) {
    my $text = shift;
    $text =~ s/^.\s*([[:alpha:]][[:alnum:]])\s*//;
    return $text;
}

sub MacroName($) {
    my $text = shift;
    $text =~ s/^(\.de)\s*([^\s]{1,2})\s.*/$1 $2/;
    return $text;
}

sub StringName($) {
    my $text = shift;
    $text =~ s/^.*\s(\.ds)\s*([^\s]{1,2})\s.*/$1 $2/;
    return $text;
}

sub macro_name($) {
    my $text   = shift;
    my $result = "";
    if ( $text =~ /^\.\s*[^\s]{1,2}\b/ ) {
        $result = $text;
        $result =~ s/^\.\s*//;
        $result =~ s/\s.*$//;
    }
    return $result;
}

# Scan the file, looking for mismatches between parameter counts
sub check_pcounts($$$) {
    my $path  = $_[0];
    my $type  = $_[1];
    my @lines = @{ $_[2] };
    my $asis  = 0;

    for my $n ( 0 .. $#lines ) {
        $asis |= 1 if ( $lines[$n] =~ /^\.nf/ );
        $asis |= 2 if ( $lines[$n] =~ /^\.na/ );
        $asis |= 4 if ( $lines[$n] =~ /^\.TS/ );
        $asis &= ~4 if ( $lines[$n] =~ /^\.TE/ );
        $asis &= ~2 if ( $lines[$n] =~ /^\.ad/ );
        $asis &= ~1 if ( $lines[$n] =~ /^\.fi/ );
        if (    ( $opt_v or not &IsComment( $lines[$n] ) )
            and $asis == 0
            and ( $n == 0 or $lines[ $n - 1 ] !~ /^\s*\.\s*SH\s+NAME\b/ )
            and ( length $lines[$n] ) > $opt_w )
        {
            my $strip = $lines[$n];
            $strip =~ s/\\f[RIBP]//g;
            $strip =~ s/\\././g;
            if ( $strip ne $lines[$n] ) {
                printf "%s:%s: line longer than %d columns (%d formatted)\n",
                  $path, $n + 1, $opt_w, length($strip);
            }
            else {
                printf "%s:%s: line longer than %d columns (%d)\n", $path,
                  $n + 1, $opt_w, length($strip);
            }
        }
        if (    not &IsComment( $lines[$n] )
            and ( $asis & 5 ) == 0
            and index( $lines[$n], "." ) > 1 )
        {
            my $col = index( $lines[$n], ". " );
            printf "%s:%s:%s: embedded sentence ending\n", $path,
              $n + 1, $col + 1
              if (  $col > 1
                and ( substr( $lines[$n], $col - 1, 1 ) !~ /[[:upper:]]/ )
                and ( substr( $lines[$n], $col - 2, 2 ) ne ".." ) );
        }
        my $minc = -1;
        my $maxc = -1;
        my $name = &macro_name( $lines[$n] );
        if ( $lines[$n] =~ /^\.\s*(BR|BI|IB|IR|RB|RI|It)\b/ ) {
            $minc = 2;
            $maxc = 6;
        }
        elsif ( $lines[$n] =~ /^\.\s*/ ) {
            if ( $predef{ ".de " . $name } ) {
                my %obj = %{ $predef{ ".de " . $name } };
                $minc = $obj{MINC};
                $maxc = $obj{MAXC};
            }
        }
        if ( $maxc > 0 ) {
            my $actual = 0;
            my $text   = $lines[$n];
            $text =~ s/^\.\s*[[:alpha:]]+\b\s*//;
            $text =~ s/\s*$//;
            $text =~ s/\s+/ /g;
            if ( ( index $text, "\\\"" ) >= 0 ) {
                printf "%s:%s: argument contains escaped quote\n", $path,
                  $n + 1;
                next;
            }
            while ( $text ne "" ) {
                my $l = $text;
                $actual++;
                if ( $l =~ /^"/ ) {
                    my $s = substr $l, 1;
                    my $n = index $s, '"';
                    $text = substr $text, $n + 2;
                }
                else {
                    $l =~ s/\s.*//;
                    $text = substr $text, length($l);
                }
                $text =~ s/^\s+//;
                last if ( $text eq "" );
            }
            if ( $actual > $maxc or $minc > $actual ) {
                printf "%s:%d: have %d parameter%s for $name, expected %s %d\n",
                  $path, $n + 1, $actual, ( ( $actual == 1 ) ? "" : "s" ),
                  ( $actual > $maxc ) ? "no more than" : "at least",
                  ( $actual > $maxc ) ? $maxc          : $minc;
            }
        }
    }
}

# Scan the file, looking for places to optimize font-switching.  Warn about
# cases where a bold/italic font is left dangling at the end of a line.
sub check_fonting($$$) {
    my $path    = $_[0];
    my $type    = $_[1];
    my @lines   = @{ $_[2] };
    my $changes = 0;

    for my $n ( 0 .. $#lines ) {
        my $font = "R";
        my $safe = 0;
        if ( $lines[$n] =~ /^\.B\b/ ) {
            $font = "B";
            $safe = 1;
        }
        elsif ( $lines[$n] =~ /^\.I\b/ ) {
            $font = "I";
            $safe = 1;
        }
        elsif ( $lines[$n] =~ /^\./ ) {
            next;    # ignore other cases
        }
        next unless ( $lines[$n] =~ /\\f[RIBP]/ );
        my $update = $lines[$n];
        $update =~ s/\\f[RP](\\f[BI])/$1/g;
        if ( $update ne $lines[$n] ) {
            $update =~ s/\\fP/\\fR/g;
            printf "%s:%d: shorten %d to %d\n", $path, $n + 1,
              length( $lines[$n] ), length($update);
            printf "<\t%s\n", $lines[$n];
            printf ">\t%s\n", $update;
            ++$changes;
        }
        if ( $safe == 0 ) {
            my $dangle = $update;
            $dangle =~ s/^.*(\\f[RIBP])/$1/;
            if ( $dangle =~ /^\\f[BI]/ ) {
                printf "%s:%d: dangling font: %s\n", $path, $n + 1, $lines[$n];
            }
        }
        $lines[$n] = $update if ( $opt_x and $changes > 0 );
    }
    if ( $opt_x and $changes > 0 ) {
        my $newfile = $path . ".new";
        open( my $fh, ">", $newfile ) or die "cannot open $newfile $!";
        for my $n ( 0 .. $#lines ) {
            printf $fh "%s\n", $lines[$n];
        }
        close $fh;
        rename $newfile, $path;
    }
}

# Scan the file, looking for fake quotes:
#	``quote''
#	`quote'
sub check_dquotes($$$) {
    my $path  = $_[0];
    my $type  = $_[1];
    my @lines = @{ $_[2] };
    for my $n ( 0 .. $#lines ) {
        next if ( $lines[$n] =~ /^\./ );
        my $ref = $lines[$n];
        $ref =~ s/\\\*\(``//g;
        $ref =~ s/\\\*\(''//g;
        if ( $ref =~ /``[^']*''/ ) {
            printf "%s:%d: ``fake quotes''\n", $path, $n + 1;
        }
        elsif ( $ref =~ /`[^']*'/ ) {
            printf "%s:%d: `fake quotes'\n", $path, $n + 1;
        }
    }
}

# Scan the file, looking for strings defined using ".ds", and for references
# to strings.  groff will warn about undefined strings if the reference is
# correctly formatted, but ignore some misformatted cases:
sub check_strings($$$) {
    my $path  = $_[0];
    my $type  = $_[1];
    my @lines = @{ $_[2] };
    my %where;
    my %usage;
    my %builtin;
    my $state = 0;

    # The macros are preferred to the builtin, since the latter interfere
    # with editing (because the word boundaries are merged).
    if ( not $opt_v ) {
        $builtin{lq} = 0;
        $builtin{rq} = 0;
        $builtin{R}  = 0;
    }

    for my $n ( 0 .. $#lines ) {
        if ( $lines[$n] =~ /^\.de\s/ ) {
            $state = 1;
        }
        elsif ( $state != 0 ) {
            $state = 0 if ( $lines[$n] =~ /^\.\./ );
        }
        elsif ( $lines[$n] =~ /\.ds\s+../ ) {
            my $name = $lines[$n];
            $name =~ s/^.*\.ds\s+//;
            $name =~ s/\s.*//;
            printf "string %d{%s}:%s\n", $n + 1, $name, $lines[$n] if ($opt_d);
            $where{$name} = $n + 1;
            $usage{$name} = 0;
            my $value = $lines[$n];
            $value =~ s/^\.ds\s+//;

            if ( $value =~ /^tk\s+X Toolkit$/ ) {
                if ( $assume_Xt++ == 0 ) {
                    %predef =
                      &find_macros( "<predefined>", \@predef_Xt, \%predef );
                }
            }
            elsif ( $assume_Xt > 0 ) {

                # ignore the X11 case
            }
            elsif ( $value =~ /^xT\s+X Toolkit Intrinsics.*/ ) {
                if ( $assume_X11++ == 0 ) {
                    %predef =
                      &find_macros( "<predefined>", \@predef_X11, \%predef );
                }
            }
            $usage{$name} = 1 if ( $assume_Xt or $assume_X11 );
        }
        elsif ( $type eq "ms" and $lines[$n] =~ /^\.\[\]\s/ ) {
            my $name = $lines[$n];
            $name =~ s/^.*\.\[\]\s+//;
            $name =~ s/\s.*//;
            printf "string %d{%s}:%s\n", $n + 1, $name, $lines[$n] if ($opt_d);
            $where{$name} = $n + 1;
            $usage{$name} = 0;
        }
        else {
            my $text = $lines[$n];
            my $name;
            my $find;
            my $s;
            if ( $text =~ /^\.[[:alpha:]].*/ ) {
                $find = '\\*';
            }
            elsif ( $text =~ /^\..*/ ) {
                next;
            }
            else {
                $find = '\*';
            }
            printf "XXX %s\n", $text if ($opt_d);
            while ( ( $s = ( index $text, $find ) ) >= 0 ) {
                $text = substr $text, $s + length($find);
                printf "GOT %s\n", $text if ($opt_d);
                if ( $text =~ /^\(..*/ ) {
                    $name = substr $text, 1, 2;
                }
                else {
                    $name = substr $text, 0, 1;
                }
                if ( defined $usage{$name} ) {
                    $usage{$name}++;
                }
                elsif ( defined $builtin{$name} ) {
                    $builtin{$name}++;
                }
                else {
                    printf "%s:%d: undefined string %s\n", $path, $n + 1, $name;
                }
            }
        }
    }
    for my $key ( sort keys %usage ) {
        printf "%s:%d: unused string definition '%s'\n", $path, $where{$key},
          $key
          if ( $usage{$key} == 0 );
    }
}

# Scan the file, looking for patterns like
#	\fBfoo(1)\fP
# which should be
#	\fBfoo\fP(1)
sub check_externs($$$) {
    my $path  = $_[0];
    my $type  = $_[1];
    my @lines = @{ $_[2] };
    for my $n ( 0 .. $#lines ) {
        if (
            $lines[$n] =~ /\\f[BI]
			   [[:alnum:]_-]+
			   \([1-9][[:alnum:]]*\)
			   \\f[PR]/x
          )
        {
            printf "%s:%d: BAD link: %s\n", $path, $n + 1, $lines[$n];
        }
        if (
            $lines[$n] =~ /\\fI
			   [[:alnum:]_-]+
			   \\f[PR]
			   \([1-9][[:alnum:]]*\)/x
          )
        {
            printf "%s:%d: BAD link: %s\n", $path, $n + 1, $lines[$n];
        }
    }
}

# Return a hash on "de XX" or "ds XX", which in turn points to hashes with
# these keys:
#   LINE - the beginning line-number of the macro/string definition
#   DATA - an array of the contents of the macro/string definition.
#   MINC - minimum expected number of parameters
#   MAXC - maximum expected number of parameters
sub find_macros($$$) {
    my $path   = $_[0];
    my @lines  = @{ $_[1] };
    my %result = %{ $_[2] };
    my $state  = 0;
    my $first  = -1;
    my $named  = "";
    my $prior  = "";
    my $cname  = "";
    my $lists  = 0;

    for my $n ( 0 .. $#lines ) {
        my $saved = $state;
        if ( $lines[$n] =~ /^\.de\s/ ) {
            $named = &MacroName( $lines[$n] );
            $state = 1;
            $first = $n;
        }
        elsif ( $lines[$n] =~ /^\.\.(\s.*$)?/ ) {
            if ( $state == 1 ) {
                printf "\tMACRO($named): %d..%d\n", $first + 1, $n + 1
                  if ($opt_v);
                my @result;
                my $minc = -1;
                my $maxc = -1;
                for my $r ( $first .. $n ) {
                    my $text = $lines[$r];
                    my $eqls = ( $text =~ /\.\$[!=]=/ );
                    $result[ $r - $first ] = $text;
                    while ( $text =~ /\$\d/ ) {
                        $text =~ s/^[^\$]\$*//;
                        if ( $text =~ /^\d+/ ) {
                            my $value = $text;
                            $text =~ s/^\d+//;
                            $value =~ s/[^\d].*//;
                            $minc = $value
                              if (  $value > $minc
                                and $text ne ""
                                and not $eqls );
                            $maxc = $value if ( $value > $maxc );
                        }
                    }
                }
                my %obj;
                $obj{LINE}      = $first;
                $obj{DATA}      = \@result;
                $obj{MINC}      = $minc;
                $obj{MAXC}      = $maxc;
                $result{$named} = \%obj;
            }
            $state = 0;
            $named = "";
        }
        elsif ( $lines[$n] =~ /\s\.nr\s+LL\b/ ) {

            # ignore ctlseqs.ms changing line-length
            $state = 0;
            $named = "";
        }
        elsif ( $lines[$n] =~ /^\.ie\s.*\.ds\s/ ) {
            if ( $state == 0 ) {
                $named = &StringName( $lines[$n] );
                $state = 2;
                $first = $n;
            }
        }
        elsif ( $lines[$n] =~ /^\.el\s.*\.ds\s/ ) {
            if ( $state == 2 ) {
                printf "\tSTRING($named): %d..%d\n", $first + 1, $n + 1
                  if ($opt_v);
                my @result;
                for my $r ( $first .. $n ) {
                    $result[ $r - $first ] = $lines[$r];
                }
                my %obj;
                $obj{LINE}      = $first;
                $obj{DATA}      = \@result;
                $result{$named} = \%obj;
                $state          = 0;
                $named          = "";
            }
            elsif ( $state != 1 ) {
                printf "%s:%d: unexpected state\n", $path, $n + 1;
            }
        }
        elsif ( $state != 1 and $lines[$n] =~ /^\.(ie|el)\b/ ) {
            printf "%s:%d: unexpected state\n", $path, $n + 1;
        }
        elsif ( $lines[$n] =~ /^\.St/ ) {
            $lists++;
        }
        elsif ( $lines[$n] =~ /^\.Ed/ ) {
            $lists--;
        }
        if ( $state == 2 and $first != $n ) {
            printf "%s:%d:expected .el after this\n", $path, $n + 1;
        }
        if ( $state == 0 ) {
            if ( &IsCommand( $lines[$n] ) ) {
                $cname = &CommandName( $lines[$n] );
                if ( $cname eq "IP" ) {
                    printf "%s:%d:expected .iP first\n", $path, $n + 1
                      if (  $result{".de iP"}
                        and ( $lists > 0 )
                        and
                        ( $n <= 0 or ( $prior ne "iP" and $prior ne "St" ) ) );
                }
                elsif ( $cname eq "iP" ) {
                    printf "%s:%d:redundant .iP\n", $path, $n + 1
                      if ( $prior eq "St" or $prior eq "sP" );
                }
                $prior = $cname;
            }
            elsif ( $lines[$n] !~ /^\./ ) {
                $prior = "";
            }
        }
        printf "%s:%d:%s\n", $path, $n + 1, $lines[$n]
          if ( $opt_v and ( $state != 0 and $saved == 0 ) );
    }
    return %result;
}

sub trimmed($) {
    my $text = shift;
    $text =~ s/\s*$//;
    $text =~ s/\s+/ /g;
    return $text;
}

sub Mismatched($$) {
    my $cmp    = shift;
    my $ref    = shift;
    my $result = 0;
    $result = 1 if ( &trimmed($cmp) ne &trimmed($ref) );
    return $result;
}

sub only_so($) {
    my @data   = @{ $_[0] };
    my $result = 0;
    $result = 1 if ( $#data == 0 and $data[0] =~ /^\.so\b/ );
    return $result;
}

sub do_file($) {
    my $path = shift;
    my $type = "";
    $type = "man" if ( $path =~ /\.man$|\.[1-9]$|\.[1-9][[:alpha:]]*$/ );
    $type = "ms"  if ( $path =~ /\.ms$/ );
    %predef = &find_macros( "<predefined>", \@predef_m2h, \%predef )
      if ( $path =~ /man2html/ );
    if ( $type ne "" ) {
        my @lines = &read_file($path);
        system("tbl $path | groff -$type -w all -z") unless &only_so( \@lines );
        my %empty;
        my %macro = &find_macros( $path, \@lines, \%empty );
        &check_dquotes( $path, $type, \@lines );
        &check_strings( $path, $type, \@lines );
        &check_externs( $path, $type, \@lines );
        &check_pcounts( $path, $type, \@lines );
        &check_fonting( $path, $type, \@lines );

        for my $name ( sort keys %macro ) {

            my %cmp = %{ $macro{$name} };
            if ( $predef{$name} ) {
                my @cmp = @{ $cmp{DATA} };
                my %ref = %{ $predef{$name} };
                my @ref = @{ $ref{DATA} };
                if ( $#cmp != $#ref ) {
                    printf "%s:%d: have %d lines, expected %d\n",
                      $path, $cmp{LINE} + 1, $#cmp + 1, $#ref + 1;
                    for my $n ( 0 .. $#ref ) {
                        printf "\t%s\n", $ref[$n];
                    }
                }
                else {
                    for my $n ( 0 .. $#cmp ) {
                        if ( &Mismatched( $cmp[$n], $ref[$n] ) ) {
                            my $tag = sprintf( "%s:%d: expected ",
                                $path, $cmp{LINE} + 1 + $n );
                            printf "%s\"%s\"\n", $tag, $ref[$n];
                            $tag =~ s/./ /g;
                            $tag =~ s/^.........../... have ->/;
                            printf "%s\"%s\"\n", $tag, $cmp[$n];
                        }
                    }
                }
            }
            elsif ( $type eq "man" ) {
                printf "%s:%d: missing $name\n", $path, $cmp{LINE} + 1;
            }
        }
    }
}

sub ignore_dir($) {
    my $path   = shift;
    my $result = 0;
    if ($opt_r) {
        $result = 1 if ( $path =~ /\b\.git$/ );
        $result = 1 if ( $path =~ /\b\.svn$/ );
        $result = 1 if ( $path =~ /\bCVS$/ );
        $result = 1 if ( $path =~ /\bRCS$/ );
        $result = 1 if ( $path =~ /\bSCCS$/ );
    }
    return $result;
}

sub do_tree($) {
    my $path = shift;
    if ( -l $path ) {

        # ignore
    }
    elsif ( -d $path ) {
        if ( &ignore_dir($path) ) {

            # ignore
        }
        elsif ( opendir( my $dh, $path ) ) {
            my @list = sort readdir($dh);
            closedir $dh;
            for my $n ( 0 .. $#list ) {
                chomp $list[$n];
                &do_tree( sprintf "%s/%s", $path, $list[$n] )
                  unless ( $list[$n] =~ /^\.(\.)?$/ );
            }
        }
    }
    elsif ( -f $path ) {
        &do_file($path);
    }
}

sub main::HELP_MESSAGE() {
    printf STDERR <<EOF
Usage: $0 [options]

Options:
 -d       debug
 -r       recur on directories
 -v       verbose
 -w COLS  line-length (default: 80)
 -x       update files where fonting can be shortened
EOF
      ;
    exit 1;
}

$Getopt::Std::STANDARD_HELP_VERSION = 1;
&getopts('drvw:x') || &main::HELP_MESSAGE;
$opt_w = 80 unless ($opt_w);

%predef = &find_macros( "<predefined>", \@predef, \%predef );

if ( $#ARGV >= 0 ) {
    while ( $#ARGV >= 0 ) {
        &do_tree( shift @ARGV );
    }
}
else {
    &do_tree(".");
}

1;
