#!/usr/bin/env perl
# $Id: count-nroff,v 1.5 2017/03/05 20:37:11 tom Exp $
# Scan directory-tree or file(s) counting nroff files adjust for comments
# -----------------------------------------------------------------------------
# Copyright 2017 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
# ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the sale,
# use or other dealings in this Software without prior written authorization.
# -----------------------------------------------------------------------------

use warnings;
use strict;
use diagnostics;

use Getopt::Std;

our ( $opt_d, $opt_v );

our $total_files = 0;
our $nroff_files = 0;

our %nroff;
our %macro;
our %notes;
our %plain;

sub read_file($) {
    my $path = shift;
    my @result;
    if ( open my $fh, $path ) {
        @result = <$fh>;
        close $fh;
    }
    return @result;
}

sub is_text($) {
    my @data   = @{ $_[0] };
    my $result = 0;
    if ( $#data >= 0 ) {
        $result = 1;
        for my $n ( 0 .. $#data ) {
            if ( $data[$n] !~ /^[[:print:]\t]*[\r\n]+$/ ) {
                $result = 0;
                last;
            }
        }
    }
    return $result;
}

sub count_it($$) {
    my %hash  = %{ $_[0] };
    my $text  = $_[1];
    my @words = split /\s+/, $text;
    $hash{CHARS} += length $text;
    $hash{WORDS} += $#words;
    $hash{LINES} += 1;
    return %hash;
}

sub count_nroff($) {
    my $path = shift;

    $total_files++;
    return unless ( $path =~ /\.(man|ms|(\d[[:alpha:]]?))$/ );

    my @lines = &read_file($path);
    return unless ( &is_text( \@lines ) );
    return if ( $lines[0] =~ /^\s*[#]/ );

    printf "** $path\n" if ($opt_v);
    $nroff_files++;
    for my $n ( 0 .. $#lines ) {
        %nroff = &count_it( \%nroff, $lines[$n] );
        my $type;
        if ( $lines[$n] =~ /^\.\\"/ ) {
            %notes = &count_it( \%notes, $lines[$n] );
            $type = "*";
        }
        elsif ( $lines[$n] =~ /^\./ ) {
            %macro = &count_it( \%macro, $lines[$n] );
            $type = ".";
        }
        else {
            %plain = &count_it( \%plain, $lines[$n] );
            $type = " ";
        }
        printf "%6d%s %s", $n + 1, $type, $lines[$n] if ($opt_d);
    }
}

sub doit($) {
    my $path = shift;

    if ( -l $path ) {

        # ignore
    }
    elsif ( -d $path ) {
        opendir( DIR, $path ) || die "can't opendir $path: $!";
        my @list = sort readdir(DIR);
        closedir DIR;

        for my $n ( 0 .. $#list ) {
            next if ( $list[$n] eq "." );
            next if ( $list[$n] eq ".." );
            next if ( $list[$n] =~ /^\.(git|svn).*/ );
            &doit( sprintf( "%s/%s", $path, $list[$n] ) );
        }
    }
    elsif ( -f $path ) {
        &count_nroff($path);
    }
}

sub show_it($$) {
    my $name = shift;
    my $NAME = shift;
    printf "%7d %s (%d macro, %d notes, %d plain)\n", $nroff{$NAME}, $name,
      $macro{$NAME}, $notes{$NAME}, $plain{$NAME};
}

sub main::HELP_MESSAGE() {
    printf <<EOF;
Usage: $0 [options] [file1 [file2 [...]]]

Options:
    -d   debug, show lines read and their type (macro, note, plain)
    -v   verbose, print names of files and directories scanned
EOF
    exit 1;
}

$Getopt::Std::STANDARD_HELP_VERSION = 1;
&getopts('dv') || &main::HELP_MESSAGE;
$opt_v = 1 if ($opt_d);

if ( $#ARGV >= 0 ) {
    while ( $#ARGV >= 0 ) {
        &doit( shift @ARGV );
    }
}
else {
    &doit(".");
}

# TODO report per-file or summary only
# TODO report number of files
# TODO report number of bytes/words/lines
# TODO report amount of comment and markup
# TODO report amount of macros (and uses of macros)

printf "%d/%d files are nroff format\n", $nroff_files, $total_files;

&show_it( "chars", "CHARS" );
&show_it( "words", "WORDS" );
&show_it( "lines", "LINES" );

1;
