#!/usr/bin/perl -w
# $Id: analyze-curses-symbols,v 1.12 2015/02/21 01:37:30 tom Exp $
# -----------------------------------------------------------------------------
# Copyright 2015 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------
#
# Count symbol usage for the given executables, and summarize their usage with
# categories for these types:
#	ncurses (if using ncurses extension)
#	curses (if not using ncurses extension)
#	wide(curses/ncurses)
#	termcap
#	terminfo
#	slang (perhaps check on wide/whatever)
#
# TODO
# + check and give percentage of total applications from Debian apt using the
# given library.
# + give percentage of total symbols from the library
# + give percentage of applications using no-libs, curses-libs or X-libs
# + distinguish between wide/normal

use strict;

$| = 1;

use Getopt::Std;

our ( $opt_d, $opt_l, $opt_q, $opt_r, $opt_s, $opt_v );

our %all_libs;
our %count_by_type;

our %ll_symbols = qw(
  PC	tc
  UP	tc
  BC	tc
  ospeed	tc
  tgetent	tc
  tgetflag	tc
  tgetnum	tc
  tgetstr	tc
  tgoto	tc
  tputs	tc
  setupterm	ti
  setterm	ti
  set_curterm	ti
  del_curterm	ti
  restartterm	ti
  tparm	ti
  tputs	ti
  putp	ti
  vidputs	ti
  vidattr	ti
  vid_puts	ti
  vid_attr	ti
  mvcur	ti
  tigetflag	ti
  tigetnum	ti
  tigetstr	ti
  tiparm	ti
);

our %extended_symbols = qw(
  assume_default_colors	n5
  curses_version	n5
  define_key	n5
  get_escdelay	n5
  getmouse	n5
  has_key	n5
  has_mouse	n5
  is_cleared	n5
  is_idcok	n5
  is_idlok	n5
  is_immedok	n5
  is_keypad	n5
  is_leaveok	n5
  is_nodelay	n5
  is_notimeout	n5
  is_pad	n5
  is_scrollok	n5
  is_subwin	n5
  is_syncok	n5
  is_term_resized	n5
  key_defined	n5
  keybound	n5
  keyok	n5
  mcprint	n5
  mouse_trafo	n5
  mouseinterval	n5
  mousemask	n5
  nofilter	n5
  resize_term	n5
  resizeterm	n5
  set_escdelay	n5
  set_tabsize	n5
  ungetmouse	n5
  use_default_colors	n5
  use_extended_names	n5
  use_legacy_coding	n5
  use_screen	n5
  use_window	n5
  wenclose	n5
  wgetparent	n5
  wgetscrreg	n5
  wmouse_trafo	n5
  wresize	n5
);

sub read_pipe($) {
    my $pipe = shift;
    my $fh;
    my @data;
    if ( open( $fh, "$pipe|" ) ) {
        @data = <$fh>;
        close $fh;
    }
    else {
        warn "can't read from $pipe: $!";
    }
    return @data;
}

# using "nm", obtain a list of unresolved symbols from the given application.
sub read_app_syms($) {
    my $path = shift;
    my %counts;

    printf "...reading %s\n", $path if ($opt_v);
    my @data = &read_pipe("nm -PD $path");
    my $k    = 0;
    for my $n ( 0 .. $#data ) {
        my $value = $data[$n];
        chomp $value;
        next unless ( $value =~ /\b[U]\b/ );
        $value =~ s/\s.*//;
        $counts{$value} = 0;
    }
    return %counts;
}

# using "nm -D", obtain a list of symbols from the given library.
sub read_lib_syms($) {
    my $path = shift;
    my %counts;
    if (%all_libs) {
        %counts = %{ $all_libs{$path} } if ( $all_libs{$path} );
    }
    if ( not %counts ) {
        printf "...reading %s\n", $path if ($opt_v);
        my @data = &read_pipe("nm -PD $path");
        my $k    = 0;
        for my $n ( 0 .. $#data ) {
            my $value = $data[$n];
            chomp $value;
            next unless ( $value =~ /\b[TDB]\b/ );
            $value =~ s/\s.*//;
            $counts{$value} = 0;
        }
        $all_libs{$path} = \%counts;
    }
    return %counts;
}

# using "ldd", obtain a list of shared libraries used by the given application.
sub read_ldd($) {
    my $path = shift;
    my @data = &read_pipe("ldd $path");
    for my $n ( 0 .. $#data ) {
        chomp $data[$n];
        $data[$n] = "" unless ( $data[$n] =~ /=>\s+\// );
        $data[$n] =~ s/^.*=>\s+//;
        $data[$n] =~ s/\s\(.*$//;
    }
    return @data;
}

sub is_extended($) {
    my $sym    = shift;
    my $result = "?";
    if ( $extended_symbols{$sym} ) {
        $result = $extended_symbols{$sym};
    }
    elsif ( $sym =~ /_sp$/ ) {
        $result = "nc";
    }
    return $result;
}

sub analyze_sym($) {
    my $sym    = shift;
    my $result = "?";
    if ( $sym =~ /^_nc_/ ) {
        $result = "*n";
    }
    elsif ( $sym =~ /^SL/ ) {
        $result = "s";
    }
    elsif ( $ll_symbols{$sym} ) {
        $result = $ll_symbols{$sym};
    }
    return $result;
}

sub lib_is_terminal($) {
    my $lib    = shift;
    my $result = 0;
    if (   $lib =~ /lib(n)?curses/
        or $lib =~ /libtinfo/
        or $lib =~ /libterm/
        or $lib =~ /libslang/ )
    {
        $result = 1;
    }
    return $result;
}

sub lib_depends_on_terminal($) {
    my $path   = shift;
    my $result = 0;
    my @libs   = &read_ldd($path);
    for my $n ( 0 .. $#libs ) {
        if ( &lib_is_terminal( $libs[$n] ) ) {
            $result = 1;
            last;
        }
    }
    return $result;
}

sub merge_syms($$) {
    my %current = %{ $_[0] };
    my %updates = %{ $_[1] };
    my %result;
    foreach my $sym ( keys %current ) {
        $result{$sym} = $current{$sym};
    }
    foreach my $sym ( keys %updates ) {
        $result{$sym} = $updates{$sym};
    }
    return %result;
}

sub analyze_file($$) {
    my $path  = shift;
    my $recur = shift;
    if ( -d $path ) {
        if ( -l $path ) {
            printf "link:%s\n", $path if ($opt_v);
        }
        elsif ( $opt_r or ( $recur == 0 ) ) {
            &analyze_dir( $path, $recur + 1 );
        }
    }
    else {
        printf "file:%s\n", $path if ($opt_v);
        my @libs = &read_ldd($path);
        my %libs;
        my %syms;
        my $type;
        my $found = 0;
        for my $n ( 0 .. $#libs ) {
            my $lib = $libs[$n];
            if ( $lib !~ /^\/.+\/lib.+/ ) {

                # ignore
            }
            elsif ( &lib_is_terminal($lib) ) {
                my %counts = &read_lib_syms($lib);
                $libs{$lib} = \%counts;
                $found++;
            }
            elsif ( &lib_depends_on_terminal($lib) ) {
                my %counts = &read_app_syms($lib);
                %syms = &merge_syms( \%syms, \%counts );
                $found++;
            }
            if ( $lib =~ /libncurses/ ) {
                $type = "ncurses";
            }
            elsif ( $lib =~ /libslang/ ) {
                $type = "slang" unless ($type);
            }
        }
        if ($found) {
            my %counts = &read_app_syms($path);
            %counts = &merge_syms( \%syms, \%counts );

            $type = "";
            if ( defined $counts{"initscr"} or defined $counts{"newterm"} ) {
                $type = "c";
            }
            if ( defined $counts{"tgetent"} ) {
                $type .= "+" if ( $type ne "" );
                $type .= "tc";
            }
            if ( $type eq "" ) {
                for my $sym ( keys %counts ) {
                    my $test = &analyze_sym($sym);
                    if ( $test ne "?" ) {
                        $type = $test;
                        last;
                    }
                }
            }
            if ( $type ne "" ) {
                for my $sym ( keys %counts ) {
                    my $test = &is_extended($sym);
                    if ( $test ne "?" ) {
                        $type =~ s/\bc\b/nc/;
                        last;
                    }
                }
            }
            $type = "i" if ( $type eq "" );
            $count_by_type{$type} += 1;
            printf "%s\t%s\n", $type, $path unless ($opt_q);

            foreach my $sym ( keys %counts ) {
                foreach my $lib ( keys %libs ) {
                    my %data = %{ $libs{$lib} };
                    if ( defined $data{$sym} ) {
                        $data{$sym} += 1;
                        $libs{$lib}     = \%data;
                        $all_libs{$lib} = \%data;
                        last;
                    }
                }
            }
        }
        else {
            printf "%s\t%s\n", "?", $path unless ($opt_q);
            $count_by_type{"?"} += 1;
        }
    }
}

sub analyze_dir($$) {
    my $path  = shift;
    my $recur = shift;
    printf "dir:%s\n", $path if ($opt_v);
    if ( opendir( my $dh, $path ) ) {
        my @entries = sort readdir($dh);
        closedir $dh;
        for my $n ( 0 .. $#entries ) {
            next if ( $entries[$n] =~ /^\.\.?$/ );
            &analyze_file( $path . "/" . $entries[$n], $recur + 1 );
        }
    }
    else {
        warn "can't opendir $path: $!";
    }
}

sub main::HELP_MESSAGE() {
    printf STDERR <<EOF
Usage: $0 [options]

Options:

-d         debug, shows parsed values
-l         show report of symbols used from libraries
-q         quiet, do not show filenames and their types
-r         recur into subdirectories
-s         show summary of application types
-v         verbose, shows files opened
EOF
      ;
    exit;
}

&getopts('dlqrsv') || main::HELP_MESSAGE;

if ( $#ARGV >= 0 ) {
    while ( $#ARGV >= 0 ) {
        &analyze_file( shift @ARGV, 0 );
    }
}
else {
    while ( !eof(STDIN) ) {
        last unless defined( $_ = <STDIN> );
        chomp $_;
        &analyze_file( $_, 0 );
    }
}

if ($opt_l) {
    for my $lib ( sort keys %all_libs ) {
        printf "lib: %s\n", $lib;
        my %counts = %{ $all_libs{$lib} };
        for my $sym ( sort keys %counts ) {
            printf "\t%d\t%s\n", $counts{$sym}, $sym if ( $counts{$sym} > 0 );
        }
    }
}

if ($opt_s) {
    my @types = ( sort keys %count_by_type );
    my $count = 0;
    for my $n ( 0 .. $#types ) {
        $count += $count_by_type{ $types[$n] };
    }
    printf "%d files\n", $count;
    for my $n ( 0 .. $#types ) {
        printf "%d\t%s\n", $count_by_type{ $types[$n] }, $types[$n];
    }
}

1;
