#!/usr/bin/perl -w
# $Id: report-terminfo,v 1.31 2022/05/14 23:34:35 tom Exp $
# -----------------------------------------------------------------------------
# Copyright 2016-2020,2022 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------
# Analyze one or more terminfo descriptions to get some useful statistics.
# This assumes that you have tic, toe and infocmp.

use strict;

use Getopt::Std;
use POSIX qw(ceil floor);

$| = 1;

our ( $opt_a, $opt_D, $opt_d, $opt_e, $opt_s, $opt_u, $opt_v, $opt_x );

our %canonical_names;
our %all_names;
our %all_caps;

our $total_full_cur = 0;
our $total_part_cur = 0;
our $total_no_cur   = 0;
our $total_ctl_cur  = 0;
our $total_ansi     = 0;
our $total_values   = 0;

our $total_switches = 0;
our $ansi_switches  = 0;

our $total_start_csi = 0;
our $total_start_ss3 = 0;
our $total_stay_csi  = 0;
our $total_stay_ss3  = 0;

our $total_none_tabs = 0;    # xt
our $total_soft_tabs = 0;
our $total_hard_tabs = 0;    # cbt, hts

our %pair_keys;
our %total_num_colors;
our %total_num_pairs;
our %total_extensions;
our %total_num_AX;
our %total_num_RGB;
our %total_num_U8;
our %total_num_XT;
our %total_num_kmous;
our %total_num_XM;
our %total_num_fkeys;

sub failed($) {
    my $text = shift;
    printf STDERR "? %s\n", $text;
    exit 1;
}

sub ncurses_version($) {
    my $name   = shift;
    my $result = 0;
    if ( open my $fh, "\"$name\" -V 2>/dev/null |" ) {
        my @data = <$fh>;
        close $fh;
        $result = 1 if ( $#data == 0 && $data[0] =~ /^ncurses/ );
    }
    return $result;
}

sub utility($) {
    my $name = shift;
    $name .= "6" if ( &ncurses_version( $name . "6" ) );
    return $name;
}

sub resolve_uses($$) {
    my $term  = shift;
    my $level = shift;
    my %result;
    if ( defined $all_names{$term} ) {
        my %values = %{ $all_names{$term} };
        my @values = sort keys %values;
        for my $n ( 0 .. $#values ) {
            my $name = $values[$n];
            if ( $name =~ /^use\d*$/ ) {
                my %merge = %{ &resolve_uses( $values{$name}, $level + 1 ) };
                my @merge = sort keys %merge;
                for my $k ( 0 .. $#merge ) {
                    my $merge = $merge[$k];
                    $result{$merge} = $merge{$merge};
                    printf " %d.%d:merge %s=%s\n", $level,
                      &count_hash( \%result ), $merge, $result{$merge}
                      if ($opt_D);
                }
            }
            else {
                $result{$name} = $values{$name};
                printf " %d.%d:recpy %s=%s\n", $level, &count_hash( \%result ),
                  $name, $result{$name}
                  if ($opt_D);
            }
        }
    }
    else {
        printf STDERR "? found no entry for '%s'\n", $term;
    }
    return \%result;
}

sub count_blocks($) {
    my %hash   = %{ $_[0] };
    my $result = 0;
    for my $name ( keys %hash ) {
        $result++ if ( $name =~ /\+/ );
    }
    return $result;
}

sub count_hash($) {
    my %hash = %{ $_[0] };
    my @hash = keys %hash;
    return ( $#hash + 1 );
}

sub csi_length($) {
    my $value  = shift;
    my $result = 0;
    $result = 2 if ( $value =~ /^\\E\[/ );
    $result = 4 if ( $value =~ /^\\233/ );
    return $result;
}

sub ss3_length($) {
    my $value  = shift;
    my $result = 0;
    $result = 2 if ( $value =~ /^\\EO/ );
    $result = 4 if ( $value =~ /^\\217/ );
    return $result;
}

sub csi_key($) {
    my $result = 0;
    $result = 1 if ( $_[0] and ( &csi_length( $_[0] ) > 0 ) );
    return $result;
}

sub ss3_key($) {
    my $result = 0;
    $result = 1 if ( $_[0] and ( &ss3_length( $_[0] ) > 0 ) );
    return $result;
}

sub ansi_key($) {
    my $value  = shift;
    my $result = 0;
    my $check  = 0;
    if ( ( $check = &csi_length($value) ) > 0 ) {
        $result = 1 if ( substr $value, $check =~ /^[ABCD]$/ );
    }
    elsif ( ( $check = &ss3_length($value) ) > 0 ) {
        $result = 1 if ( substr $value, $check =~ /^[ABCD]$/ );
    }
    return $result;
}

sub control_key($) {
    my $value  = shift;
    my $result = 0;
    $result = 1 if ( $value and ( $value =~ /^\^[[:print:]]$/ ) );
    return $result;
}

sub pair_keys($$) {
    my $result = 0;
    if ( $_[0] and $_[1] ) {
        $result = 1 if ( $_[0] eq $_[1] );
    }
    return $result;
}

sub analyze_entry($) {
    my $term = shift;
    printf "%s\n", $term if ($opt_v);
    if ( $term !~ /\+/ ) {
        printf "%s before %d\n", $term, &count_hash( $all_names{$term} )
          if ($opt_D);
        my %values = %{ &resolve_uses( $term, 1 ) };
        printf "%s before %d\n", $term, &count_hash( \%values ) if ($opt_D);
        my $is_control  = 0;
        my $is_ansi     = 0;
        my $is_switched = 0;

        $pair_keys{ich1}++  if ( &pair_keys( $values{ich1},  $values{kich1} ) );
        $pair_keys{dch1}++  if ( &pair_keys( $values{dch1},  $values{kdch1} ) );
        $pair_keys{il1}++   if ( &pair_keys( $values{il1},   $values{kil1} ) );
        $pair_keys{dl1}++   if ( &pair_keys( $values{dl1},   $values{kdl1} ) );
        $pair_keys{cub1}++  if ( &pair_keys( $values{cub1},  $values{kcub1} ) );
        $pair_keys{cuf1}++  if ( &pair_keys( $values{cuf1},  $values{kcuf1} ) );
        $pair_keys{cud1}++  if ( &pair_keys( $values{cud1},  $values{kcud1} ) );
        $pair_keys{cuu1}++  if ( &pair_keys( $values{cuu1},  $values{kcuu1} ) );
        $pair_keys{cbt}++   if ( &pair_keys( $values{cbt},   $values{kcbt} ) );
        $pair_keys{home}++  if ( &pair_keys( $values{home},  $values{khome} ) );
        $pair_keys{clear}++ if ( &pair_keys( $values{clear}, $values{kclr} ) );

        if (    $values{kcub1}
            and $values{kcud1}
            and $values{kcuf1}
            and $values{kcuu1} )
        {
            $total_full_cur++;
            if (    &control_key( $values{kcub1} )
                and &control_key( $values{kcud1} )
                and &control_key( $values{kcuf1} )
                and &control_key( $values{kcuu1} ) )
            {
                $is_control = 1;
            }
            elsif ( &ansi_key( $values{kcub1} )
                and &ansi_key( $values{kcud1} )
                and &ansi_key( $values{kcuf1} )
                and &ansi_key( $values{kcuu1} ) )
            {
                printf "...%s is ANSI\n", $term if ($opt_v);
                $is_ansi = 1;
            }
        }
        elsif (&control_key( $values{kcub1} )
            or &control_key( $values{kcud1} )
            or &control_key( $values{kcuf1} )
            or &control_key( $values{kcuu1} ) )
        {
            if ($opt_v) {
                my $msg = "";
                $msg .= " bak"  if ( $values{kcub1} );
                $msg .= " down" if ( $values{kcud1} );
                $msg .= " fwd"  if ( $values{kcuf1} );
                $msg .= " up"   if ( $values{kcuu1} );
                printf "...%s has some cursor-keys:$msg\n", $term;
            }
            $total_part_cur++;
        }
        else {
            printf "...%s has no cursor-keys\n", $term if ($opt_v);
            $total_no_cur++;
        }
        if ( $values{smkx} and $values{rmkx} ) {
            printf "...%s switches keypad mode\n", $term if ($opt_v);
            $is_switched = 1;
        }
        if (    &csi_key( $values{kcub1} )
            and &csi_key( $values{kcud1} )
            and &csi_key( $values{kcuf1} )
            and &csi_key( $values{kcuu1} ) )
        {
            if ($is_switched) {
                $total_start_csi++;
            }
            else {
                $total_stay_csi++;
            }
        }
        elsif ( &ss3_key( $values{kcub1} )
            and &ss3_key( $values{kcud1} )
            and &ss3_key( $values{kcuf1} )
            and &ss3_key( $values{kcuu1} ) )
        {
            if ($is_switched) {
                $total_start_ss3++;
            }
            else {
                $total_stay_ss3++;
            }
        }

        $total_ctl_cur  += $is_control;
        $total_ansi     += $is_ansi;
        $total_switches += $is_switched;
        $ansi_switches  += $is_switched if ($is_ansi);

        # count tab support (none, soft, hard)
        if ( $values{xt} ) {
            ++$total_none_tabs;
        }
        elsif ( $values{cbt} and $values{hts} ) {
            ++$total_hard_tabs;
        }
        else {
            ++$total_soft_tabs;
        }

        # count colors
        if ( $values{colors} ) {
            my $value = $values{colors};
            if ( $value ne "@" ) {
                $value = oct $value if ( $value =~ /^0x/ );
                $total_num_colors{$value}++;
            }
        }
        if ( $values{pairs} ) {
            my $value = $values{pairs};
            if ( $value ne "@" ) {
                $value = oct $value if ( $value =~ /^0x/ );
                $total_num_pairs{$value}++;
            }
        }

        # count extensions
        my $extension = "";

        # extensions that ncurses uses
        foreach my $checkext ( "AX", "RGB", "XT", "U8" ) {
            if ( defined $values{$checkext} ) {
                my $value = $values{$checkext};
                $extension .= " $checkext" if ( $value ne "@" );
            }
        }

        # extensions that screen might use
        foreach my $checkext ( "Z0", "Z1", "WS", "CS", "C8" ) {
            if ( defined $values{$checkext} ) {
                my $value = $values{$checkext};
                $extension .= " $checkext" if ( $value ne "@" );
            }
        }
        $extension =~ s/^\s+//;
        $total_extensions{$extension}++ if ( $extension ne "" );

        # count mouse
        if ( $values{kmous} ) {
            my $value = $values{kmous};
            if ( $value ne "@" ) {
                $total_num_kmous{$value}++;
            }
        }
        if ( $values{XM} ) {
            my $value = $values{XM};
            if ( $value ne "@" ) {
                $total_num_XM{$value}++;
            }
        }

        # count function-keys
        my $num_fkeys = 0;
        for my $key ( sort keys %values ) {
            next unless ( $key =~ /^k/ );
            next         if ( $values{$key} eq "" );
            next         if ( $values{$key} eq "@" );
            $num_fkeys++ if ( $key ne "kmous" );
        }
        $total_num_fkeys{$num_fkeys}++ if ($num_fkeys);
    }
}

sub parse_name($) {
    my $text = shift;
    my $name = $text;
    $name =~ s/^\s+//;
    $name =~ s/,\s*$//;
    $name =~ s/[#=@].*$//;
    return $name;
}

sub parse_value($) {
    my $text  = shift;
    my $name  = &parse_name($text);
    my $value = $text;
    $value =~ s/^\s+//;
    $value = substr $value, length($name);
    $value =~ s/,\s*$//;
    if ( $value eq "" ) {
        $value = "true";
    }
    else {
        $value =~ s/^[#=]//;
    }
    return $value;
}

sub copyhash($) {
    my %hash = %{ $_[0] };
    my %result;
    for my $n ( keys %hash ) {
        $result{$n} = $hash{$n};
    }
    return \%result;
}

# We can have multiple use-clauses
sub bump($) {
    my $use = shift;
    my $num = $use;
    $num =~ s/^use//;
    if ( $num eq "" ) {
        $num = 1;
    }
    else {
        $num++;
    }
    $use = "use$num";
    return $use;
}

sub list_entry($) {
    my $term = shift;
    my %result;
    if ( open my $fh, &utility("infocmp") . " -1$opt_x \"$term\"|" ) {
        my @data = <$fh>;
        close($fh);
        for my $n ( 0 .. $#data ) {
            chomp $data[$n];
            next if ( $data[$n] =~ /^#/ );
            if ( $data[$n] =~ /^\s/ ) {
                my $name = &parse_name( $data[$n] );
                $name = &bump($name)
                  while ( $name =~ /^use\d*$/ and $result{$name} );
                $result{$name} = &parse_value( $data[$n] );
                $all_caps{$name} += 1;
                $total_values++;
            }
        }
    }
    return \%result;
}

sub list_all_entries() {
    if ($opt_s) {
        my $prog = &utility("tic");
        if ( open my $fh, "$prog -1I$opt_x \"$opt_s\" |" ) {
            my @data = <$fh>;
            close $fh;

            my @aliases;
            my %values;
            for my $n ( 0 .. $#data ) {
                chomp $data[$n];
                next if ( $data[$n] =~ /^#/ );
                printf "%05d:%s\n", $n, $data[$n] if ($opt_D);
                if ( $data[$n] =~ /^\s/ ) {
                    my $name = &parse_name( $data[$n] );
                    $name = &bump($name)
                      while ( $name =~ /^use\d*$/ and $values{$name} );
                    $values{$name} = &parse_value( $data[$n] );
                    $all_caps{$name} += 1;
                    $total_values++;
                }
                else {
                    if (%values) {
                        for my $a ( 0 .. $#aliases ) {
                            printf "flush %s...\n", $aliases[$a] if ($opt_v);
                            $all_names{ $aliases[$a] } = &copyhash( \%values );
                        }
                        $canonical_names{ $aliases[0] } = &copyhash( \%values );
                        undef %values;
                    }
                    my @fields = split /\|/, $data[$n];
                    if ( $#fields > 0 ) {
                        delete $fields[$#fields]
                          if ( $fields[$#fields] =~ /\s/ );
                    }
                    @aliases = @fields;
                }
            }
            if (%values) {
                for my $a ( 0 .. $#aliases ) {
                    printf "end-flush %s...\n", $aliases[$a] if ($opt_v);
                    $all_names{ $aliases[$a] } = &copyhash( \%values );
                }
                $canonical_names{ $aliases[0] } = &copyhash( \%values );
                undef %values;
            }
        }
        else {
            &failed("cannot open pipe to $prog");
        }
    }
    else {
        my $prog = &utility("toe");
        if ( open my $fh, "$prog -a|" ) {
            my @data = sort <$fh>;
            close $fh;
            my %scanned;
            for my $n ( 0 .. $#data ) {
                chomp $data[$n];
                $data[$n] =~ s/\s.*//;
                my $name = $data[$n];
                next if ( $scanned{$name} );
                $scanned{$name}         = 1;
                $all_names{$name}       = &list_entry($name);
                $canonical_names{$name} = &copyhash( $all_names{$name} );
            }
        }
        else {
            &failed("cannot open pipe to $prog");
        }
    }
}

sub percent($$) {
    my $num = shift;
    my $den = shift;
    return ( $den > 0 ? sprintf( "%.0f", 100.0 * $num / $den ) : "?" );
}

sub show_bucket_numbers($$) {
    my %data = %{ $_[0] };
    my $name = $_[1];
    if (%data) {
        printf "\n";
        my $total = 0;
        for my $value ( sort keys %data ) {
            $total += $data{$value};
        }
        for my $value ( sort { $a <=> $b } keys %data ) {
            printf "%8d entries with %d $name (%s%%)\n", $data{$value}, $value,
              &percent( $data{$value}, $total );
        }
        printf "%8d entries with $name\n", $total;
    }
}

sub show_bucket_percent($$) {
    my %data = %{ $_[0] };
    my $name = $_[1];
    if (%data) {
        my @keys = sort { $a <=> $b } keys %data;
        if ( $#keys < 20 ) {
            &show_bucket_numbers( \%data, $name );
        }
        else {
            printf "\n";
            my $total = 0;
            my %blocks;
            my $chunk = ceil( $keys[$#keys] / 10 );
            for my $value (@keys) {
                $total += $data{$value};
                my $lo    = $chunk * floor( $value / $chunk );
                my $hi    = $chunk - 1 + $lo;
                my $block = sprintf( "%4d-%4d", $lo, $hi );
                $blocks{$block} += $data{$value};
            }
            for my $value ( sort keys %blocks ) {
                next unless ( $blocks{$value} );
                my $block = $value;
                $block =~ s/\s//g;
                printf "%8d entries with %s $name (%s%%)\n", $blocks{$value},
                  $block,
                  &percent( $blocks{$value}, $total );
            }
            printf "%8d entries with $name\n", $total;
        }
    }
}

sub show_bucket_strings($$) {
    my %data = %{ $_[0] };
    my $name = $_[1];
    if (%data) {
        printf "\n";
        my $total = 0;
        for my $value ( sort keys %data ) {
            $total += $data{$value};
        }
        for my $value ( sort keys %data ) {
            printf "%8d entries with \"%s\" $name (%s%%)\n", $data{$value},
              $value,
              &percent( $data{$value}, $total );
        }
        printf "%8d entries with $name\n", $total;
    }
}

sub show_pair_keys() {
    my @keys = sort keys %pair_keys;
    printf "\n" if ( $#keys >= 0 );
    for my $key ( 0 .. $#keys ) {
        my $display  = $keys[$key];
        my $keyboard = "k" . $display;
        $keyboard = "kclr" if ( $keyboard eq "kclear" );
        my $count = $pair_keys{$display};
        printf "%8d entries with %s == %s\n", $count, $display, $keyboard;
    }
}

sub report_terminfo() {
    my $num_primary = &count_hash( \%canonical_names );
    my $num_alias   = &count_hash( \%all_names );
    my $num_blocks  = &count_blocks( \%canonical_names );

    printf "%8d primary terminal names\n", $num_primary;
    printf "%8d terminal aliases\n",       $num_alias - $num_primary;
    printf "%8d (total)\n",                $num_alias;
    printf "\n";

    printf "%8d different descriptions\n", $num_primary - $num_blocks;
    printf "%8d building blocks\n",        $num_blocks;
    printf "\n";
    printf "%8d instances of capabilities\n", $total_values;
    printf "%8d distinct capabilities\n",     &count_hash( \%all_caps );
    printf "\n";
    printf "%8d terminals have all cursor-keys\n",  $total_full_cur;
    printf "%8d terminals have some cursor-keys\n", $total_part_cur;
    printf "%8d terminals have no cursor-keys\n",   $total_no_cur;
    printf "\n";
    printf "%8d terminals use control cursor-keys\n", $total_ctl_cur;
    printf "%8d terminals use ANSI cursor-keys\n",    $total_ansi;
    printf "\n";
    printf "%8d terminals switch key-modes\n",  $total_switches;
    printf "%8d ANSI terminals switch modes\n", $ansi_switches;
    printf "\n";
    printf "%8d terminals start with CSI cursor\n", $total_start_csi;
    printf "%8d terminals start with SS3 cursor\n", $total_start_ss3;
    printf "%8d terminals stay with CSI cursor\n",  $total_stay_csi;
    printf "%8d terminals stay with SS3 cursor\n",  $total_stay_ss3;
    printf "\n";
    printf "%8d terminals use no tabs\n",   $total_none_tabs;
    printf "%8d terminals use soft tabs\n", $total_soft_tabs;
    printf "%8d terminals use hard tabs\n", $total_hard_tabs;
    &show_bucket_numbers( \%total_num_colors, "colors" );
    &show_bucket_numbers( \%total_num_pairs,  "color-pairs" );
    &show_bucket_strings( \%total_extensions, "extension" );
    &show_bucket_strings( \%total_num_kmous,  "mouse" );
    &show_bucket_strings( \%total_num_XM,     "extended mouse" );
    &show_bucket_percent( \%total_num_fkeys, "function-keys" );
    &show_pair_keys;
}

sub main::HELP_MESSAGE() {
    printf STDERR <<EOF
Usage: $0 [options] [term1 [term2 [...]]]

Options:

  -a         analyze "all" terminal descriptions
  -D         debug
  -d dir     use this directory for terminal database
  -e         permit environment with TERMINFO and TERMINFO_DIRS
  -s source  use this file for terminal descriptions
  -u         count only canonical names, ignore aliases
  -v         verbose (shows progress)
  -x         count ncurses user-defined capabilities
EOF
      ;
    exit;
}

&getopts('aDd:es:uvx') || main::HELP_MESSAGE;

delete $ENV{TERMCAP}       unless $opt_e;
delete $ENV{TERMPATH}      unless $opt_e;
delete $ENV{TERMINFO}      unless $opt_e;
delete $ENV{TERMINFO_DIRS} unless $opt_e;

$opt_a = 1   if ($opt_s);
$opt_x = "x" if ($opt_x);
$opt_x = "" unless ($opt_x);

$ENV{TERMINFO} = $opt_d if ($opt_d);

if ($opt_a) {
    &list_all_entries;
}
elsif ( $#ARGV >= 0 ) {
    while ( $#ARGV >= 0 ) {
        my $name = shift @ARGV;
        $all_names{$name} = &list_entry($name);
    }
}
else {
    $all_names{ $ENV{TERM} } = &list_entry( $ENV{TERM} );
}

if ($opt_u) {
    for my $name ( sort keys %canonical_names ) {
        &analyze_entry($name);
    }
}
else {
    for my $name ( sort keys %all_names ) {
        &analyze_entry($name);
    }
}

&failed("no terminals found") unless ( &count_hash( \%canonical_names ) > 0 );

&report_terminfo;

1;
