#!/usr/bin/perl -w
# $Id: report-linux-consolefonts,v 1.55 2016/11/01 00:00:55 tom Exp $
# -----------------------------------------------------------------------------
# Copyright 2016 by Thomas E. Dickey
#
#                         All Rights Reserved
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name(s) of the above copyright
# holders shall not be used in advertising or otherwise to promote the
# sale, use or other dealings in this Software without prior written
# authorization.
# -----------------------------------------------------------------------------
# For each "psf" font listed in /usr/share/consolefonts, make a report showing
# which ISO-8859-x variants are covered, as well as the line-drawing used by
# ncurses.
# -----------------------------------------------------------------------------
# TODO: if Perl has encoding, report codes in order to show gaps
# TODO: derive the hashes from Perl encoding
# TODO: iso-8859-5 is the only fully-supported code other than ascii (show ranking)

use strict;
use Getopt::Std;
use Encode 'encode_utf8';

$| = 1;

our %all_files;
our %all_fonts;
our $font_dir = "/usr/share/consolefonts";

our ( $opt_d, $opt_f, $opt_F, $opt_m, $opt_s, $opt_t, $opt_v );

# console fonts use a feature of Unicode: some glyphs are repeated.
our %same_glyph = (
    "U+00a0", "U+0020", "U+00ad", "U+002d", "U+0410", "U+0041",
    "U+0391", "U+0041", "U+0412", "U+0042", "U+0392", "U+0042",
    "U+0421", "U+0043", "U+0415", "U+0045", "U+0395", "U+0045",
    "U+041d", "U+0048", "U+0397", "U+0048", "U+0406", "U+0049",
    "U+0399", "U+0049", "U+0408", "U+004a", "U+041a", "U+004b",
    "U+039a", "U+004b", "U+041c", "U+004d", "U+039c", "U+004d",
    "U+039d", "U+004e", "U+041e", "U+004f", "U+039f", "U+004f",
    "U+0420", "U+0050", "U+03a1", "U+0050", "U+0405", "U+0053",
    "U+0422", "U+0054", "U+03a4", "U+0054", "U+0425", "U+0058",
    "U+03a7", "U+0058", "U+0396", "U+005a", "U+0430", "U+0061",
    "U+0441", "U+0063", "U+0435", "U+0065", "U+0456", "U+0069",
    "U+0458", "U+006a", "U+043e", "U+006f", "U+0440", "U+0070",
    "U+0455", "U+0073", "U+0445", "U+0078", "U+0443", "U+0079",
    "U+03bc", "U+00b5", "U+0401", "U+00cb", "U+0407", "U+00cf",
    "U+0110", "U+00d0", "U+0451", "U+00eb", "U+0457", "U+00ef",
    "U+0413", "U+0393", "U+041f", "U+03a0", "U+0424", "U+03a6",
);

our %vt100_minimal = (
    "U+2500", "─", "U+2502", "│", "U+250c", "┌", "U+2510", "┐",
    "U+2514", "└", "U+2518", "┘", "U+251c", "├", "U+2524", "┤",
    "U+252c", "┬", "U+2534", "┴", "U+253c", "┼",
);

our %line_drawing = (
    "U+250c", "┌", "U+2514", "└", "U+2510", "┐", "U+2518", "┘",
    "U+251c", "├", "U+2524", "┤", "U+2534", "┴", "U+252c", "┬",
    "U+2500", "─", "U+2502", "│", "U+253c", "┼", "U+23ba", "⎺",
    "U+23bd", "⎽", "U+25c6", "◆", "U+2592", "▒", "U+00b0", "°",
    "U+00b1", "±",  "U+00b7", "·",  "U+2190", "←", "U+2192", "→",
    "U+2193", "↓", "U+2191", "↑", "U+2603", "☃", "U+25ae", "▮",
    "U+23bb", "⎻", "U+23bc", "⎼", "U+2264", "≤", "U+2265", "≥",
    "U+03c0", "π",  "U+2260", "≠", "U+00a3", "£",  "U+250f", "┏",
    "U+2517", "┗", "U+2513", "┓", "U+251b", "┛", "U+2523", "┣",
    "U+252b", "┫", "U+253b", "┻", "U+2533", "┳", "U+2501", "━",
    "U+2503", "┃", "U+254b", "╋", "U+2554", "╔", "U+255a", "╚",
    "U+2557", "╗", "U+255d", "╝", "U+2563", "╣", "U+2560", "╠",
    "U+2569", "╩", "U+2566", "╦", "U+2550", "═", "U+2551", "║",
    "U+256c", "╬",
);

our %us_ascii = (
    "U+0020", ' ',  "U+0021", '!', "U+0022", '"', "U+0023", '#',
    "U+0024", '$',  "U+0025", '%', "U+0026", '&', "U+0027", '\'',
    "U+0028", '(',  "U+0029", ')', "U+002a", '*', "U+002b", '+',
    "U+002c", ',',  "U+002d", '-', "U+002e", '.', "U+002f", '/',
    "U+0030", '0',  "U+0031", '1', "U+0032", '2', "U+0033", '3',
    "U+0034", '4',  "U+0035", '5', "U+0036", '6', "U+0037", '7',
    "U+0038", '8',  "U+0039", '9', "U+003a", ':', "U+003b", ';',
    "U+003c", '<',  "U+003d", '=', "U+003e", '>', "U+003f", '?',
    "U+0040", '@',  "U+0041", 'A', "U+0042", 'B', "U+0043", 'C',
    "U+0044", 'D',  "U+0045", 'E', "U+0046", 'F', "U+0047", 'G',
    "U+0048", 'H',  "U+0049", 'I', "U+004a", 'J', "U+004b", 'K',
    "U+004c", 'L',  "U+004d", 'M', "U+004e", 'N', "U+004f", 'O',
    "U+0050", 'P',  "U+0051", 'Q', "U+0052", 'R', "U+0053", 'S',
    "U+0054", 'T',  "U+0055", 'U', "U+0056", 'V', "U+0057", 'W',
    "U+0058", 'X',  "U+0059", 'Y', "U+005a", 'Z', "U+005b", '[',
    "U+005c", '\\', "U+005d", ']', "U+005e", '^', "U+005f", '_',
    "U+0060", '`',  "U+0061", 'a', "U+0062", 'b', "U+0063", 'c',
    "U+0064", 'd',  "U+0065", 'e', "U+0066", 'f', "U+0067", 'g',
    "U+0068", 'h',  "U+0069", 'i', "U+006a", 'j', "U+006b", 'k',
    "U+006c", 'l',  "U+006d", 'm', "U+006e", 'n', "U+006f", 'o',
    "U+0070", 'p',  "U+0071", 'q', "U+0072", 'r', "U+0073", 's',
    "U+0074", 't',  "U+0075", 'u', "U+0076", 'v', "U+0077", 'w',
    "U+0078", 'x',  "U+0079", 'y', "U+007a", 'z', "U+007b", '{',
    "U+007c", '|',  "U+007d", '}', "U+007e", '~',
);

our %iso_8859_1 = (
    "U+00a0", ' ', "U+00a1", '¡', "U+00a2", '¢', "U+00a3", '£',
    "U+00a4", '¤', "U+00a5", '¥', "U+00a6", '¦', "U+00a7", '§',
    "U+00a8", '¨', "U+00a9", '©', "U+00aa", 'ª', "U+00ab", '«',
    "U+00ac", '¬', "U+00ad", '­', "U+00ae", '®', "U+00af", '¯',
    "U+00b0", '°', "U+00b1", '±', "U+00b2", '²', "U+00b3", '³',
    "U+00b4", '´', "U+00b5", 'µ', "U+00b6", '¶', "U+00b7", '·',
    "U+00b8", '¸', "U+00b9", '¹', "U+00ba", 'º', "U+00bb", '»',
    "U+00bc", '¼', "U+00bd", '½', "U+00be", '¾', "U+00bf", '¿',
    "U+00c0", 'À', "U+00c1", 'Á', "U+00c2", 'Â', "U+00c3", 'Ã',
    "U+00c4", 'Ä', "U+00c5", 'Å', "U+00c6", 'Æ', "U+00c7", 'Ç',
    "U+00c8", 'È', "U+00c9", 'É', "U+00ca", 'Ê', "U+00cb", 'Ë',
    "U+00cc", 'Ì', "U+00cd", 'Í', "U+00ce", 'Î', "U+00cf", 'Ï',
    "U+00d0", 'Ð', "U+00d1", 'Ñ', "U+00d2", 'Ò', "U+00d3", 'Ó',
    "U+00d4", 'Ô', "U+00d5", 'Õ', "U+00d6", 'Ö', "U+00d7", '×',
    "U+00d8", 'Ø', "U+00d9", 'Ù', "U+00da", 'Ú', "U+00db", 'Û',
    "U+00dc", 'Ü', "U+00dd", 'Ý', "U+00de", 'Þ', "U+00df", 'ß',
    "U+00e0", 'à', "U+00e1", 'á', "U+00e2", 'â', "U+00e3", 'ã',
    "U+00e4", 'ä', "U+00e5", 'å', "U+00e6", 'æ', "U+00e7", 'ç',
    "U+00e8", 'è', "U+00e9", 'é', "U+00ea", 'ê', "U+00eb", 'ë',
    "U+00ec", 'ì', "U+00ed", 'í', "U+00ee", 'î', "U+00ef", 'ï',
    "U+00f0", 'ð', "U+00f1", 'ñ', "U+00f2", 'ò', "U+00f3", 'ó',
    "U+00f4", 'ô', "U+00f5", 'õ', "U+00f6", 'ö', "U+00f7", '÷',
    "U+00f8", 'ø', "U+00f9", 'ù', "U+00fa", 'ú', "U+00fb", 'û',
    "U+00fc", 'ü', "U+00fd", 'ý', "U+00fe", 'þ', "U+00ff", 'ÿ',
);

our %iso_8859_2 = (
    "U+00a0", " ", "U+0104", "Ą", "U+02d8", "˘", "U+0141", "Ł",
    "U+00a4", "¤", "U+013d", "Ľ", "U+015a", "Ś", "U+00a7", "§",
    "U+00a8", "¨", "U+0160", "Š", "U+015e", "Ş", "U+0164", "Ť",
    "U+0179", "Ź", "U+00ad", "­", "U+017d", "Ž", "U+017b", "Ż",
    "U+00b0", "°", "U+0105", "ą", "U+02db", "˛", "U+0142", "ł",
    "U+00b4", "´", "U+013e", "ľ", "U+015b", "ś", "U+02c7", "ˇ",
    "U+00b8", "¸", "U+0161", "š", "U+015f", "ş", "U+0165", "ť",
    "U+017a", "ź", "U+02dd", "˝", "U+017e", "ž", "U+017c", "ż",
    "U+0154", "Ŕ", "U+00c1", "Á", "U+00c2", "Â", "U+0102", "Ă",
    "U+00c4", "Ä", "U+0139", "Ĺ", "U+0106", "Ć", "U+00c7", "Ç",
    "U+010c", "Č", "U+00c9", "É", "U+0118", "Ę", "U+00cb", "Ë",
    "U+011a", "Ě", "U+00cd", "Í", "U+00ce", "Î", "U+010e", "Ď",
    "U+0110", "Đ", "U+0143", "Ń", "U+0147", "Ň", "U+00d3", "Ó",
    "U+00d4", "Ô", "U+0150", "Ő", "U+00d6", "Ö", "U+00d7", "×",
    "U+0158", "Ř", "U+016e", "Ů", "U+00da", "Ú", "U+0170", "Ű",
    "U+00dc", "Ü", "U+00dd", "Ý", "U+0162", "Ţ", "U+00df", "ß",
    "U+0155", "ŕ", "U+00e1", "á", "U+00e2", "â", "U+0103", "ă",
    "U+00e4", "ä", "U+013a", "ĺ", "U+0107", "ć", "U+00e7", "ç",
    "U+010d", "č", "U+00e9", "é", "U+0119", "ę", "U+00eb", "ë",
    "U+011b", "ě", "U+00ed", "í", "U+00ee", "î", "U+010f", "ď",
    "U+0111", "đ", "U+0144", "ń", "U+0148", "ň", "U+00f3", "ó",
    "U+00f4", "ô", "U+0151", "ő", "U+00f6", "ö", "U+00f7", "÷",
    "U+0159", "ř", "U+016f", "ů", "U+00fa", "ú", "U+0171", "ű",
    "U+00fc", "ü", "U+00fd", "ý", "U+0163", "ţ", "U+02d9", "˙",
);

our %iso_8859_3 = (
    "U+00a0", " ", "U+0126", "Ħ", "U+02d8", "˘", "U+00a3", "£",
    "U+00a4", "¤", "U+0124", "Ĥ", "U+00a7", "§", "U+00a8", "¨",
    "U+0130", "İ", "U+015e", "Ş", "U+011e", "Ğ", "U+0134", "Ĵ",
    "U+00ad", "­", "U+017b", "Ż", "U+00b0", "°", "U+0127", "ħ",
    "U+00b2", "²", "U+00b3", "³", "U+00b4", "´", "U+00b5", "µ",
    "U+0125", "ĥ", "U+00b7", "·", "U+00b8", "¸", "U+0131", "ı",
    "U+015f", "ş", "U+011f", "ğ", "U+0135", "ĵ", "U+00bd", "½",
    "U+017c", "ż", "U+00c0", "À", "U+00c1", "Á", "U+00c2", "Â",
    "U+00c4", "Ä", "U+010a", "Ċ", "U+0108", "Ĉ", "U+00c7", "Ç",
    "U+00c8", "È", "U+00c9", "É", "U+00ca", "Ê", "U+00cb", "Ë",
    "U+00cc", "Ì", "U+00cd", "Í", "U+00ce", "Î", "U+00cf", "Ï",
    "U+00d1", "Ñ", "U+00d2", "Ò", "U+00d3", "Ó", "U+00d4", "Ô",
    "U+0120", "Ġ", "U+00d6", "Ö", "U+00d7", "×", "U+011c", "Ĝ",
    "U+00d9", "Ù", "U+00da", "Ú", "U+00db", "Û", "U+00dc", "Ü",
    "U+016c", "Ŭ", "U+015c", "Ŝ", "U+00df", "ß", "U+00e0", "à",
    "U+00e1", "á", "U+00e2", "â", "U+00e4", "ä", "U+010b", "ċ",
    "U+0109", "ĉ", "U+00e7", "ç", "U+00e8", "è", "U+00e9", "é",
    "U+00ea", "ê", "U+00eb", "ë", "U+00ec", "ì", "U+00ed", "í",
    "U+00ee", "î", "U+00ef", "ï", "U+00f1", "ñ", "U+00f2", "ò",
    "U+00f3", "ó", "U+00f4", "ô", "U+0121", "ġ", "U+00f6", "ö",
    "U+00f7", "÷", "U+011d", "ĝ", "U+00f9", "ù", "U+00fa", "ú",
    "U+00fb", "û", "U+00fc", "ü", "U+016d", "ŭ", "U+015d", "ŝ",
    "U+02d9", "˙",

);

our %iso_8859_4 = (
    "U+00a0", " ", "U+0104", "Ą", "U+0138", "ĸ", "U+0156", "Ŗ",
    "U+00a4", "¤", "U+0128", "Ĩ", "U+013b", "Ļ", "U+00a7", "§",
    "U+00a8", "¨", "U+0160", "Š", "U+0112", "Ē", "U+0122", "Ģ",
    "U+0166", "Ŧ", "U+00ad", "­", "U+017d", "Ž", "U+00af", "¯",
    "U+00b0", "°", "U+0105", "ą", "U+02db", "˛", "U+0157", "ŗ",
    "U+00b4", "´", "U+0129", "ĩ", "U+013c", "ļ", "U+02c7", "ˇ",
    "U+00b8", "¸", "U+0161", "š", "U+0113", "ē", "U+0123", "ģ",
    "U+0167", "ŧ", "U+014a", "Ŋ", "U+017e", "ž", "U+014b", "ŋ",
    "U+0100", "Ā", "U+00c1", "Á", "U+00c2", "Â", "U+00c3", "Ã",
    "U+00c4", "Ä", "U+00c5", "Å", "U+00c6", "Æ", "U+012e", "Į",
    "U+010c", "Č", "U+00c9", "É", "U+0118", "Ę", "U+00cb", "Ë",
    "U+0116", "Ė", "U+00cd", "Í", "U+00ce", "Î", "U+012a", "Ī",
    "U+0110", "Đ", "U+0145", "Ņ", "U+014c", "Ō", "U+0136", "Ķ",
    "U+00d4", "Ô", "U+00d5", "Õ", "U+00d6", "Ö", "U+00d7", "×",
    "U+00d8", "Ø", "U+0172", "Ų", "U+00da", "Ú", "U+00db", "Û",
    "U+00dc", "Ü", "U+0168", "Ũ", "U+016a", "Ū", "U+00df", "ß",
    "U+0101", "ā", "U+00e1", "á", "U+00e2", "â", "U+00e3", "ã",
    "U+00e4", "ä", "U+00e5", "å", "U+00e6", "æ", "U+012f", "į",
    "U+010d", "č", "U+00e9", "é", "U+0119", "ę", "U+00eb", "ë",
    "U+0117", "ė", "U+00ed", "í", "U+00ee", "î", "U+012b", "ī",
    "U+0111", "đ", "U+0146", "ņ", "U+014d", "ō", "U+0137", "ķ",
    "U+00f4", "ô", "U+00f5", "õ", "U+00f6", "ö", "U+00f7", "÷",
    "U+00f8", "ø", "U+0173", "ų", "U+00fa", "ú", "U+00fb", "û",
    "U+00fc", "ü", "U+0169", "ũ", "U+016b", "ū", "U+02d9", "˙",
);

our %iso_8859_5 = (
    "U+00a0", " ",  "U+0401", "Ё", "U+0402", "Ђ", "U+0403", "Ѓ",
    "U+0404", "Є",  "U+0405", "Ѕ", "U+0406", "І", "U+0407", "Ї",
    "U+0408", "Ј",  "U+0409", "Љ", "U+040a", "Њ", "U+040b", "Ћ",
    "U+040c", "Ќ",  "U+00ad", "­", "U+040e", "Ў", "U+040f", "Џ",
    "U+0410", "А",  "U+0411", "Б", "U+0412", "В", "U+0413", "Г",
    "U+0414", "Д",  "U+0415", "Е", "U+0416", "Ж", "U+0417", "З",
    "U+0418", "И",  "U+0419", "Й", "U+041a", "К", "U+041b", "Л",
    "U+041c", "М",  "U+041d", "Н", "U+041e", "О", "U+041f", "П",
    "U+0420", "Р",  "U+0421", "С", "U+0422", "Т", "U+0423", "У",
    "U+0424", "Ф",  "U+0425", "Х", "U+0426", "Ц", "U+0427", "Ч",
    "U+0428", "Ш",  "U+0429", "Щ", "U+042a", "Ъ", "U+042b", "Ы",
    "U+042c", "Ь",  "U+042d", "Э", "U+042e", "Ю", "U+042f", "Я",
    "U+0430", "а",  "U+0431", "б", "U+0432", "в", "U+0433", "г",
    "U+0434", "д",  "U+0435", "е", "U+0436", "ж", "U+0437", "з",
    "U+0438", "и",  "U+0439", "й", "U+043a", "к", "U+043b", "л",
    "U+043c", "м",  "U+043d", "н", "U+043e", "о", "U+043f", "п",
    "U+0440", "р",  "U+0441", "с", "U+0442", "т", "U+0443", "у",
    "U+0444", "ф",  "U+0445", "х", "U+0446", "ц", "U+0447", "ч",
    "U+0448", "ш",  "U+0449", "щ", "U+044a", "ъ", "U+044b", "ы",
    "U+044c", "ь",  "U+044d", "э", "U+044e", "ю", "U+044f", "я",
    "U+2116", "№", "U+0451", "ё", "U+0452", "ђ", "U+0453", "ѓ",
    "U+0454", "є",  "U+0455", "ѕ", "U+0456", "і", "U+0457", "ї",
    "U+0458", "ј",  "U+0459", "љ", "U+045a", "њ", "U+045b", "ћ",
    "U+045c", "ќ",  "U+00a7", "§", "U+045e", "ў", "U+045f", "џ",
);

our %iso_8859_6 = (
    "U+00a0", " ", "U+00a4", "¤", "U+060c", "،", "U+00ad", "­",
    "U+061b", "؛", "U+061f", "؟", "U+0621", "ء", "U+0622", "آ",
    "U+0623", "أ", "U+0624", "ؤ", "U+0625", "إ", "U+0626", "ئ",
    "U+0627", "ا", "U+0628", "ب", "U+0629", "ة", "U+062a", "ت",
    "U+062b", "ث", "U+062c", "ج", "U+062d", "ح", "U+062e", "خ",
    "U+062f", "د", "U+0630", "ذ", "U+0631", "ر", "U+0632", "ز",
    "U+0633", "س", "U+0634", "ش", "U+0635", "ص", "U+0636", "ض",
    "U+0637", "ط", "U+0638", "ظ", "U+0639", "ع", "U+063a", "غ",
    "U+0640", "ـ", "U+0641", "ف", "U+0642", "ق", "U+0643", "ك",
    "U+0644", "ل", "U+0645", "م", "U+0646", "ن", "U+0647", "ه",
    "U+0648", "و", "U+0649", "ى", "U+064a", "ي", "U+064b", "ً",
    "U+064c", "ٌ", "U+064d", "ٍ", "U+064e", "َ", "U+064f", "ُ",
    "U+0650", "ِ", "U+0651", "ّ", "U+0652", "ْ",
);

our %iso_8859_7 = (
    "U+00a0", " ",  "U+2018", "‘", "U+2019", "’", "U+00a3", "£",
    "U+20ac", "€", "U+20af", "₯", "U+00a6", "¦",  "U+00a7", "§",
    "U+00a8", "¨",  "U+00a9", "©",  "U+037a", "ͺ",  "U+00ab", "«",
    "U+00ac", "¬",  "U+00ad", "­",  "U+2015", "―", "U+00b0", "°",
    "U+00b1", "±",  "U+00b2", "²",  "U+00b3", "³",  "U+0384", "΄",
    "U+0385", "΅",  "U+0386", "Ά",  "U+00b7", "·",  "U+0388", "Έ",
    "U+0389", "Ή",  "U+038a", "Ί",  "U+00bb", "»",  "U+038c", "Ό",
    "U+00bd", "½",  "U+038e", "Ύ",  "U+038f", "Ώ",  "U+0390", "ΐ",
    "U+0391", "Α",  "U+0392", "Β",  "U+0393", "Γ",  "U+0394", "Δ",
    "U+0395", "Ε",  "U+0396", "Ζ",  "U+0397", "Η",  "U+0398", "Θ",
    "U+0399", "Ι",  "U+039a", "Κ",  "U+039b", "Λ",  "U+039c", "Μ",
    "U+039d", "Ν",  "U+039e", "Ξ",  "U+039f", "Ο",  "U+03a0", "Π",
    "U+03a1", "Ρ",  "U+03a3", "Σ",  "U+03a4", "Τ",  "U+03a5", "Υ",
    "U+03a6", "Φ",  "U+03a7", "Χ",  "U+03a8", "Ψ",  "U+03a9", "Ω",
    "U+03aa", "Ϊ",  "U+03ab", "Ϋ",  "U+03ac", "ά",  "U+03ad", "έ",
    "U+03ae", "ή",  "U+03af", "ί",  "U+03b0", "ΰ",  "U+03b1", "α",
    "U+03b2", "β",  "U+03b3", "γ",  "U+03b4", "δ",  "U+03b5", "ε",
    "U+03b6", "ζ",  "U+03b7", "η",  "U+03b8", "θ",  "U+03b9", "ι",
    "U+03ba", "κ",  "U+03bb", "λ",  "U+03bc", "μ",  "U+03bd", "ν",
    "U+03be", "ξ",  "U+03bf", "ο",  "U+03c0", "π",  "U+03c1", "ρ",
    "U+03c2", "ς",  "U+03c3", "σ",  "U+03c4", "τ",  "U+03c5", "υ",
    "U+03c6", "φ",  "U+03c7", "χ",  "U+03c8", "ψ",  "U+03c9", "ω",
    "U+03ca", "ϊ",  "U+03cb", "ϋ",  "U+03cc", "ό",  "U+03cd", "ύ",
    "U+03ce", "ώ",
);

our %iso_8859_8 = (
    "U+00a0", " ", "U+00a2", "¢", "U+00a3", "£",  "U+00a4", "¤",
    "U+00a5", "¥", "U+00a6", "¦", "U+00a7", "§",  "U+00a8", "¨",
    "U+00a9", "©", "U+00d7", "×", "U+00ab", "«",  "U+00ac", "¬",
    "U+00ad", "­", "U+00ae", "®", "U+00af", "¯",  "U+00b0", "°",
    "U+00b1", "±", "U+00b2", "²", "U+00b3", "³",  "U+00b4", "´",
    "U+00b5", "µ", "U+00b6", "¶", "U+00b7", "·",  "U+00b8", "¸",
    "U+00b9", "¹", "U+00f7", "÷", "U+00bb", "»",  "U+00bc", "¼",
    "U+00bd", "½", "U+00be", "¾", "U+2017", "‗", "U+05d0", "א",
    "U+05d1", "ב", "U+05d2", "ג", "U+05d3", "ד",  "U+05d4", "ה",
    "U+05d5", "ו", "U+05d6", "ז", "U+05d7", "ח",  "U+05d8", "ט",
    "U+05d9", "י", "U+05da", "ך", "U+05db", "כ",  "U+05dc", "ל",
    "U+05dd", "ם", "U+05de", "מ", "U+05df", "ן",  "U+05e0", "נ",
    "U+05e1", "ס", "U+05e2", "ע", "U+05e3", "ף",  "U+05e4", "פ",
    "U+05e5", "ץ", "U+05e6", "צ", "U+05e7", "ק",  "U+05e8", "ר",
    "U+05e9", "ש", "U+05ea", "ת", "U+200e", "‎", "U+200f", "‏",
);

our %iso_8859_9 = (
    "U+00a0", " ", "U+00a1", "¡", "U+00a2", "¢", "U+00a3", "£",
    "U+00a4", "¤", "U+00a5", "¥", "U+00a6", "¦", "U+00a7", "§",
    "U+00a8", "¨", "U+00a9", "©", "U+00aa", "ª", "U+00ab", "«",
    "U+00ac", "¬", "U+00ad", "­", "U+00ae", "®", "U+00af", "¯",
    "U+00b0", "°", "U+00b1", "±", "U+00b2", "²", "U+00b3", "³",
    "U+00b4", "´", "U+00b5", "µ", "U+00b6", "¶", "U+00b7", "·",
    "U+00b8", "¸", "U+00b9", "¹", "U+00ba", "º", "U+00bb", "»",
    "U+00bc", "¼", "U+00bd", "½", "U+00be", "¾", "U+00bf", "¿",
    "U+00c0", "À", "U+00c1", "Á", "U+00c2", "Â", "U+00c3", "Ã",
    "U+00c4", "Ä", "U+00c5", "Å", "U+00c6", "Æ", "U+00c7", "Ç",
    "U+00c8", "È", "U+00c9", "É", "U+00ca", "Ê", "U+00cb", "Ë",
    "U+00cc", "Ì", "U+00cd", "Í", "U+00ce", "Î", "U+00cf", "Ï",
    "U+011e", "Ğ", "U+00d1", "Ñ", "U+00d2", "Ò", "U+00d3", "Ó",
    "U+00d4", "Ô", "U+00d5", "Õ", "U+00d6", "Ö", "U+00d7", "×",
    "U+00d8", "Ø", "U+00d9", "Ù", "U+00da", "Ú", "U+00db", "Û",
    "U+00dc", "Ü", "U+0130", "İ", "U+015e", "Ş", "U+00df", "ß",
    "U+00e0", "à", "U+00e1", "á", "U+00e2", "â", "U+00e3", "ã",
    "U+00e4", "ä", "U+00e5", "å", "U+00e6", "æ", "U+00e7", "ç",
    "U+00e8", "è", "U+00e9", "é", "U+00ea", "ê", "U+00eb", "ë",
    "U+00ec", "ì", "U+00ed", "í", "U+00ee", "î", "U+00ef", "ï",
    "U+011f", "ğ", "U+00f1", "ñ", "U+00f2", "ò", "U+00f3", "ó",
    "U+00f4", "ô", "U+00f5", "õ", "U+00f6", "ö", "U+00f7", "÷",
    "U+00f8", "ø", "U+00f9", "ù", "U+00fa", "ú", "U+00fb", "û",
    "U+00fc", "ü", "U+0131", "ı", "U+015f", "ş", "U+00ff", "ÿ",
);

our %iso_8859_10 = (
    "U+00a0", " ", "U+0104", "Ą",  "U+0112", "Ē", "U+0122", "Ģ",
    "U+012a", "Ī", "U+0128", "Ĩ",  "U+0136", "Ķ", "U+00a7", "§",
    "U+013b", "Ļ", "U+0110", "Đ",  "U+0160", "Š", "U+0166", "Ŧ",
    "U+017d", "Ž", "U+00ad", "­",  "U+016a", "Ū", "U+014a", "Ŋ",
    "U+00b0", "°", "U+0105", "ą",  "U+0113", "ē", "U+0123", "ģ",
    "U+012b", "ī", "U+0129", "ĩ",  "U+0137", "ķ", "U+00b7", "·",
    "U+013c", "ļ", "U+0111", "đ",  "U+0161", "š", "U+0167", "ŧ",
    "U+017e", "ž", "U+2015", "―", "U+016b", "ū", "U+014b", "ŋ",
    "U+0100", "Ā", "U+00c1", "Á",  "U+00c2", "Â", "U+00c3", "Ã",
    "U+00c4", "Ä", "U+00c5", "Å",  "U+00c6", "Æ", "U+012e", "Į",
    "U+010c", "Č", "U+00c9", "É",  "U+0118", "Ę", "U+00cb", "Ë",
    "U+0116", "Ė", "U+00cd", "Í",  "U+00ce", "Î", "U+00cf", "Ï",
    "U+00d0", "Ð", "U+0145", "Ņ",  "U+014c", "Ō", "U+00d3", "Ó",
    "U+00d4", "Ô", "U+00d5", "Õ",  "U+00d6", "Ö", "U+0168", "Ũ",
    "U+00d8", "Ø", "U+0172", "Ų",  "U+00da", "Ú", "U+00db", "Û",
    "U+00dc", "Ü", "U+00dd", "Ý",  "U+00de", "Þ", "U+00df", "ß",
    "U+0101", "ā", "U+00e1", "á",  "U+00e2", "â", "U+00e3", "ã",
    "U+00e4", "ä", "U+00e5", "å",  "U+00e6", "æ", "U+012f", "į",
    "U+010d", "č", "U+00e9", "é",  "U+0119", "ę", "U+00eb", "ë",
    "U+0117", "ė", "U+00ed", "í",  "U+00ee", "î", "U+00ef", "ï",
    "U+00f0", "ð", "U+0146", "ņ",  "U+014d", "ō", "U+00f3", "ó",
    "U+00f4", "ô", "U+00f5", "õ",  "U+00f6", "ö", "U+0169", "ũ",
    "U+00f8", "ø", "U+0173", "ų",  "U+00fa", "ú", "U+00fb", "û",
    "U+00fc", "ü", "U+00fd", "ý",  "U+00fe", "þ", "U+0138", "ĸ",
);

our %iso_8859_11 = (
    "U+00a0", " ",  "U+0e01", "ก", "U+0e02", "ข", "U+0e03", "ฃ",
    "U+0e04", "ค", "U+0e05", "ฅ", "U+0e06", "ฆ", "U+0e07", "ง",
    "U+0e08", "จ", "U+0e09", "ฉ", "U+0e0a", "ช", "U+0e0b", "ซ",
    "U+0e0c", "ฌ", "U+0e0d", "ญ", "U+0e0e", "ฎ", "U+0e0f", "ฏ",
    "U+0e10", "ฐ", "U+0e11", "ฑ", "U+0e12", "ฒ", "U+0e13", "ณ",
    "U+0e14", "ด", "U+0e15", "ต", "U+0e16", "ถ", "U+0e17", "ท",
    "U+0e18", "ธ", "U+0e19", "น", "U+0e1a", "บ", "U+0e1b", "ป",
    "U+0e1c", "ผ", "U+0e1d", "ฝ", "U+0e1e", "พ", "U+0e1f", "ฟ",
    "U+0e20", "ภ", "U+0e21", "ม", "U+0e22", "ย", "U+0e23", "ร",
    "U+0e24", "ฤ", "U+0e25", "ล", "U+0e26", "ฦ", "U+0e27", "ว",
    "U+0e28", "ศ", "U+0e29", "ษ", "U+0e2a", "ส", "U+0e2b", "ห",
    "U+0e2c", "ฬ", "U+0e2d", "อ", "U+0e2e", "ฮ", "U+0e2f", "ฯ",
    "U+0e30", "ะ", "U+0e31", "ั", "U+0e32", "า", "U+0e33", "ำ",
    "U+0e34", "ิ", "U+0e35", "ี", "U+0e36", "ึ", "U+0e37", "ื",
    "U+0e38", "ุ", "U+0e39", "ู", "U+0e3a", "ฺ", "U+0e3f", "฿",
    "U+0e40", "เ", "U+0e41", "แ", "U+0e42", "โ", "U+0e43", "ใ",
    "U+0e44", "ไ", "U+0e45", "ๅ", "U+0e46", "ๆ", "U+0e47", "็",
    "U+0e48", "่", "U+0e49", "้", "U+0e4a", "๊", "U+0e4b", "๋",
    "U+0e4c", "์", "U+0e4d", "ํ", "U+0e4e", "๎", "U+0e4f", "๏",
    "U+0e50", "๐", "U+0e51", "๑", "U+0e52", "๒", "U+0e53", "๓",
    "U+0e54", "๔", "U+0e55", "๕", "U+0e56", "๖", "U+0e57", "๗",
    "U+0e58", "๘", "U+0e59", "๙", "U+0e5a", "๚", "U+0e5b", "๛",
);

our %iso_8859_13 = (
    "U+00a0", " ",  "U+201d", "”", "U+00a2", "¢", "U+00a3", "£",
    "U+00a4", "¤",  "U+201e", "„", "U+00a6", "¦", "U+00a7", "§",
    "U+00d8", "Ø",  "U+00a9", "©",  "U+0156", "Ŗ", "U+00ab", "«",
    "U+00ac", "¬",  "U+00ad", "­",  "U+00ae", "®", "U+00c6", "Æ",
    "U+00b0", "°",  "U+00b1", "±",  "U+00b2", "²", "U+00b3", "³",
    "U+201c", "“", "U+00b5", "µ",  "U+00b6", "¶", "U+00b7", "·",
    "U+00f8", "ø",  "U+00b9", "¹",  "U+0157", "ŗ", "U+00bb", "»",
    "U+00bc", "¼",  "U+00bd", "½",  "U+00be", "¾", "U+00e6", "æ",
    "U+0104", "Ą",  "U+012e", "Į",  "U+0100", "Ā", "U+0106", "Ć",
    "U+00c4", "Ä",  "U+00c5", "Å",  "U+0118", "Ę", "U+0112", "Ē",
    "U+010c", "Č",  "U+00c9", "É",  "U+0179", "Ź", "U+0116", "Ė",
    "U+0122", "Ģ",  "U+0136", "Ķ",  "U+012a", "Ī", "U+013b", "Ļ",
    "U+0160", "Š",  "U+0143", "Ń",  "U+0145", "Ņ", "U+00d3", "Ó",
    "U+014c", "Ō",  "U+00d5", "Õ",  "U+00d6", "Ö", "U+00d7", "×",
    "U+0172", "Ų",  "U+0141", "Ł",  "U+015a", "Ś", "U+016a", "Ū",
    "U+00dc", "Ü",  "U+017b", "Ż",  "U+017d", "Ž", "U+00df", "ß",
    "U+0105", "ą",  "U+012f", "į",  "U+0101", "ā", "U+0107", "ć",
    "U+00e4", "ä",  "U+00e5", "å",  "U+0119", "ę", "U+0113", "ē",
    "U+010d", "č",  "U+00e9", "é",  "U+017a", "ź", "U+0117", "ė",
    "U+0123", "ģ",  "U+0137", "ķ",  "U+012b", "ī", "U+013c", "ļ",
    "U+0161", "š",  "U+0144", "ń",  "U+0146", "ņ", "U+00f3", "ó",
    "U+014d", "ō",  "U+00f5", "õ",  "U+00f6", "ö", "U+00f7", "÷",
    "U+0173", "ų",  "U+0142", "ł",  "U+015b", "ś", "U+016b", "ū",
    "U+00fc", "ü",  "U+017c", "ż",  "U+017e", "ž", "U+2019", "’",
);

our %iso_8859_14 = (
    "U+00a0", " ",  "U+1e02", "Ḃ", "U+1e03", "ḃ", "U+00a3", "£",
    "U+010a", "Ċ",  "U+010b", "ċ",  "U+1e0a", "Ḋ", "U+00a7", "§",
    "U+1e80", "Ẁ", "U+00a9", "©",  "U+1e82", "Ẃ", "U+1e0b", "ḋ",
    "U+1ef2", "Ỳ", "U+00ad", "­",  "U+00ae", "®",  "U+0178", "Ÿ",
    "U+1e1e", "Ḟ", "U+1e1f", "ḟ", "U+0120", "Ġ",  "U+0121", "ġ",
    "U+1e40", "Ṁ", "U+1e41", "ṁ", "U+00b6", "¶",  "U+1e56", "Ṗ",
    "U+1e81", "ẁ", "U+1e57", "ṗ", "U+1e83", "ẃ", "U+1e60", "Ṡ",
    "U+1ef3", "ỳ", "U+1e84", "Ẅ", "U+1e85", "ẅ", "U+1e61", "ṡ",
    "U+00c0", "À",  "U+00c1", "Á",  "U+00c2", "Â",  "U+00c3", "Ã",
    "U+00c4", "Ä",  "U+00c5", "Å",  "U+00c6", "Æ",  "U+00c7", "Ç",
    "U+00c8", "È",  "U+00c9", "É",  "U+00ca", "Ê",  "U+00cb", "Ë",
    "U+00cc", "Ì",  "U+00cd", "Í",  "U+00ce", "Î",  "U+00cf", "Ï",
    "U+0174", "Ŵ",  "U+00d1", "Ñ",  "U+00d2", "Ò",  "U+00d3", "Ó",
    "U+00d4", "Ô",  "U+00d5", "Õ",  "U+00d6", "Ö",  "U+1e6a", "Ṫ",
    "U+00d8", "Ø",  "U+00d9", "Ù",  "U+00da", "Ú",  "U+00db", "Û",
    "U+00dc", "Ü",  "U+00dd", "Ý",  "U+0176", "Ŷ",  "U+00df", "ß",
    "U+00e0", "à",  "U+00e1", "á",  "U+00e2", "â",  "U+00e3", "ã",
    "U+00e4", "ä",  "U+00e5", "å",  "U+00e6", "æ",  "U+00e7", "ç",
    "U+00e8", "è",  "U+00e9", "é",  "U+00ea", "ê",  "U+00eb", "ë",
    "U+00ec", "ì",  "U+00ed", "í",  "U+00ee", "î",  "U+00ef", "ï",
    "U+0175", "ŵ",  "U+00f1", "ñ",  "U+00f2", "ò",  "U+00f3", "ó",
    "U+00f4", "ô",  "U+00f5", "õ",  "U+00f6", "ö",  "U+1e6b", "ṫ",
    "U+00f8", "ø",  "U+00f9", "ù",  "U+00fa", "ú",  "U+00fb", "û",
    "U+00fc", "ü",  "U+00fd", "ý",  "U+0177", "ŷ",  "U+00ff", "ÿ",
);

our %iso_8859_15 = (
    "U+00a0", " ",  "U+00a1", "¡", "U+00a2", "¢", "U+00a3", "£",
    "U+20ac", "€", "U+00a5", "¥", "U+0160", "Š", "U+00a7", "§",
    "U+0161", "š",  "U+00a9", "©", "U+00aa", "ª", "U+00ab", "«",
    "U+00ac", "¬",  "U+00ad", "­", "U+00ae", "®", "U+00af", "¯",
    "U+00b0", "°",  "U+00b1", "±", "U+00b2", "²", "U+00b3", "³",
    "U+017d", "Ž",  "U+00b5", "µ", "U+00b6", "¶", "U+00b7", "·",
    "U+017e", "ž",  "U+00b9", "¹", "U+00ba", "º", "U+00bb", "»",
    "U+0152", "Œ",  "U+0153", "œ", "U+0178", "Ÿ", "U+00bf", "¿",
    "U+00c0", "À",  "U+00c1", "Á", "U+00c2", "Â", "U+00c3", "Ã",
    "U+00c4", "Ä",  "U+00c5", "Å", "U+00c6", "Æ", "U+00c7", "Ç",
    "U+00c8", "È",  "U+00c9", "É", "U+00ca", "Ê", "U+00cb", "Ë",
    "U+00cc", "Ì",  "U+00cd", "Í", "U+00ce", "Î", "U+00cf", "Ï",
    "U+00d0", "Ð",  "U+00d1", "Ñ", "U+00d2", "Ò", "U+00d3", "Ó",
    "U+00d4", "Ô",  "U+00d5", "Õ", "U+00d6", "Ö", "U+00d7", "×",
    "U+00d8", "Ø",  "U+00d9", "Ù", "U+00da", "Ú", "U+00db", "Û",
    "U+00dc", "Ü",  "U+00dd", "Ý", "U+00de", "Þ", "U+00df", "ß",
    "U+00e0", "à",  "U+00e1", "á", "U+00e2", "â", "U+00e3", "ã",
    "U+00e4", "ä",  "U+00e5", "å", "U+00e6", "æ", "U+00e7", "ç",
    "U+00e8", "è",  "U+00e9", "é", "U+00ea", "ê", "U+00eb", "ë",
    "U+00ec", "ì",  "U+00ed", "í", "U+00ee", "î", "U+00ef", "ï",
    "U+00f0", "ð",  "U+00f1", "ñ", "U+00f2", "ò", "U+00f3", "ó",
    "U+00f4", "ô",  "U+00f5", "õ", "U+00f6", "ö", "U+00f7", "÷",
    "U+00f8", "ø",  "U+00f9", "ù", "U+00fa", "ú", "U+00fb", "û",
    "U+00fc", "ü",  "U+00fd", "ý", "U+00fe", "þ", "U+00ff", "ÿ",
);

our %iso_8859_16 = (
    "U+00a0", " ",  "U+0104", "Ą",  "U+0105", "ą", "U+0141", "Ł",
    "U+20ac", "€", "U+201e", "„", "U+0160", "Š", "U+00a7", "§",
    "U+0161", "š",  "U+00a9", "©",  "U+0218", "Ș", "U+00ab", "«",
    "U+0179", "Ź",  "U+00ad", "­",  "U+017a", "ź", "U+017b", "Ż",
    "U+00b0", "°",  "U+00b1", "±",  "U+010c", "Č", "U+0142", "ł",
    "U+017d", "Ž",  "U+201d", "”", "U+00b6", "¶", "U+00b7", "·",
    "U+017e", "ž",  "U+010d", "č",  "U+0219", "ș", "U+00bb", "»",
    "U+0152", "Œ",  "U+0153", "œ",  "U+0178", "Ÿ", "U+017c", "ż",
    "U+00c0", "À",  "U+00c1", "Á",  "U+00c2", "Â", "U+0102", "Ă",
    "U+00c4", "Ä",  "U+0106", "Ć",  "U+00c6", "Æ", "U+00c7", "Ç",
    "U+00c8", "È",  "U+00c9", "É",  "U+00ca", "Ê", "U+00cb", "Ë",
    "U+00cc", "Ì",  "U+00cd", "Í",  "U+00ce", "Î", "U+00cf", "Ï",
    "U+0110", "Đ",  "U+0143", "Ń",  "U+00d2", "Ò", "U+00d3", "Ó",
    "U+00d4", "Ô",  "U+0150", "Ő",  "U+00d6", "Ö", "U+015a", "Ś",
    "U+0170", "Ű",  "U+00d9", "Ù",  "U+00da", "Ú", "U+00db", "Û",
    "U+00dc", "Ü",  "U+0118", "Ę",  "U+021a", "Ț", "U+00df", "ß",
    "U+00e0", "à",  "U+00e1", "á",  "U+00e2", "â", "U+0103", "ă",
    "U+00e4", "ä",  "U+0107", "ć",  "U+00e6", "æ", "U+00e7", "ç",
    "U+00e8", "è",  "U+00e9", "é",  "U+00ea", "ê", "U+00eb", "ë",
    "U+00ec", "ì",  "U+00ed", "í",  "U+00ee", "î", "U+00ef", "ï",
    "U+0111", "đ",  "U+0144", "ń",  "U+00f2", "ò", "U+00f3", "ó",
    "U+00f4", "ô",  "U+0151", "ő",  "U+00f6", "ö", "U+015b", "ś",
    "U+0171", "ű",  "U+00f9", "ù",  "U+00fa", "ú", "U+00fb", "û",
    "U+00fc", "ü",  "U+0119", "ę",  "U+021b", "ț", "U+00ff", "ÿ",
);

our %codeset_list;
our @iso_8859_hash;
our @iso_8859_name;
our $inx_TOTAL = -2;
our $inx_LINES = -1;

# encode U+xxxx
sub encode_hex($) {
    my $utf = shift;
    return sprintf "U+%04x", ord($utf);
}

# decode U+xxxx
sub decode_hex($) {
    my $hex = shift;
    $hex =~ s/^U\+/0x/i;
    my $dec = hex $hex;
    return chr($dec);
}

# convert literal UTF-8 bytes to Perl wide-characters
sub decode_utf($) {
    my $value = shift;
    return Encode::decode_utf8($value);
}

sub encode_utf($) {
    my $value = shift;
    return Encode::encode_utf8($value);
}

sub valid_iso8859($) {
    my $iso = shift;
    my $result = ( $iso >= 0 and $iso <= $#iso_8859_hash ) ? 1 : 0;
}

sub is_iso8859($$) {
    my $code   = shift;
    my $iso    = shift;
    my $result = -1;
    if ( $code eq "U+fffd" ) {
        $result = 0;    # replacement character
    }
    elsif ( defined $us_ascii{$code} ) {
        $result = 1;
    }
    elsif ( &valid_iso8859($iso) ) {
        $result = 3 if ( $iso_8859_hash[$iso]{$code} );
    }
    elsif ( $line_drawing{$code} ) {
        $result = 2;    # do this after ISO-8859-x since some graphics overlap
    }
    return $result;
}

sub long_name($) {
    my $index  = shift;
    my $result = "?";
    if ( $index < 0 ) {
        if ( $index == $inx_TOTAL ) {
            $result = "totals";    # total number of glyphs in font
        }
        elsif ( $index == $inx_LINES ) {
            $result = "line-drawing";    # line-drawing
        }
        else {
            $result = $index;
        }
    }
    elsif ( $index <= $#iso_8859_name ) {
        $result = $iso_8859_name[$index];
    }
    return $result;
}

sub short_name($) {
    my $index  = shift;
    my $result = "?";
    if ( $index < 0 ) {
        if ( $index == $inx_TOTAL ) {
            $result = "*";    # total number of glyphs in font
        }
        elsif ( $index == $inx_LINES ) {
            $result = "L";    # line-drawing
        }
        else {
            $result = $index;
        }
    }
    elsif ( $index <= $#iso_8859_name ) {
        $result = $iso_8859_name[$index];
        if ( $result =~ /ascii/ ) {
            $result = "C";    # POSIX character set C is US-ASCII
        }
        else {
            $result =~ s/^.*-//;
        }
    }
    return $result;
}

sub size_iso8859($) {
    my $iso    = shift;
    my $result = 0;
    my %hash;
    if ( &valid_iso8859($iso) ) {
        %hash = %{ $iso_8859_hash[$iso] };
        if (%hash) {
            my @keys = keys %hash;
            $result = $#keys + 1;
        }
    }
    return $result;
}

sub any_iso8859($) {
    my $code   = shift;
    my $result = -1;
    for my $iso ( 0 .. $#iso_8859_name ) {
        my $rc = &is_iso8859( $code, $iso );
        if ( $rc == 1 and $iso == 0 ) {
            $result = $iso;    # ASCII
            last;
        }
        elsif ( $rc == 3 and $iso > 0 ) {
            $result = $iso;    # ISO-8859-x
            last;
        }
    }
    return $result;
}

sub filter_folds($) {
    my %folds  = %{ $_[0] };
    my @folded = sort keys %folds;
    my @result;
    my $q = 0;
    for my $p ( 0 .. $#folded ) {
        if ( $opt_F and $p > 0 ) {
            next if ( $same_glyph{ $folded[$p] } eq $folded[0] );
        }
        $result[ $q++ ] = $folded[$p];
    }
    return @result;
}

# Read a font, returning a hash of the Unicode values listed in the font,
# mapped to a count of the uses of those values in the font.
sub read_font($) {
    my $psffile = $_[0];
    printf "** %s\n", $psffile if ($opt_v);
    my %result;
    my $open = (
        ( $psffile =~ /\.gz$/ )
        ? "zcat $psffile|psfgettable -"
        : "psfgettable $psffile"
    );
    if ( open my $fh, "$open 2>/dev/null |" ) {
        my @lines = <$fh>;
        close $fh;

        # The output can include comments (ignore those).  The first column is
        # the glyph number in the PSF file, which would be the ASCII or Latin1
        # value for those fonts.  The other columns are what we are interested
        # in:  the list of Unicode values which the font supplies.  Often there
        # is more than one (the same appearance for quotes, dashes, etc).
        # Those are (supposed to be) just 16-bit values, so warn/ignore any
        # which are not.
        my $mapping = 0;
        my $in_font = 0;
        for my $n ( 0 .. $#lines ) {
            my $line = $lines[$n];
            chomp $line;
            $line =~ s/^\s+//;
            printf "%s\n", $line if ($opt_d);
            next if ( $line =~ /^#/ );
            if ( $line !~ /^0x[[:xdigit:]]+\s/ ) {
                printf "?? expected hex value: %s\n", $line;
                next;
            }
            $line =~ s/^0x[[:xdigit:]]+\s+//;
            $line =~ s/\s+$//;
            $line =~ s/\s+/ /g;
            my @values = split /\s/, $line;
            my $exists = "";
            my %folded;
            $mapping += $#values if ( $#values > 0 );
            $in_font++;

            for my $v ( 0 .. $#values ) {

                if ( $values[$v] !~ /U\+[[:xdigit:]]{4}/ ) {
                    printf "OOPS %s\n", $values[$v];
                    next;
                }
                if ( $#values > 0 ) {
                    my $iso;
                    if ( $line_drawing{ $values[$v] } ) {
                        $iso = $inx_LINES;
                    }
                    else {
                        $iso = &any_iso8859( $values[$v] );
                        next unless ( $iso >= 0 );
                    }
                    $folded{ $values[$v] } = $iso;
                }
                $result{ $values[$v] } = 1;
            }
            if ($opt_f) {
                my @folded = &filter_folds( \%folded );
                if ( $#folded > 0 ) {
                    printf "...folded";
                    for my $e ( 0 .. $#folded ) {
                        printf " %d:%s '%s'", $folded{ $folded[$e] },
                          $folded[$e], &decode_hex( $folded[$e] );
                    }
                    printf "\n";
                }
            }
        }

        # Check for fonts without the VT100 line-drawing characters
        if ($opt_v) {
            my $vt100_minimal;
            my $vt100_missing;
            for my $code ( keys %vt100_minimal ) {
                $vt100_minimal++;
                $vt100_missing++ unless ( $result{$code} );
            }
            printf "?? missing %d/%d vt100 line-drawing\n", $vt100_missing,
              $vt100_minimal
              if ($vt100_missing);
        }

        # Now, count the number of characters which are part of the given
        # ISO-8859-x encodings.
        my %totals;
        for my $code ( sort keys %result ) {
            printf "%s", $code if ($opt_d);
            for my $iso ( 0 .. $#iso_8859_name ) {
                my $ok = &is_iso8859( $code, $iso );
                printf " %d", $ok if ($opt_d);
                $totals{$iso} += 1
                  if ( ( $ok == 3 and $iso > 0 )
                    or ( $ok == 1 and $iso == 0 ) );
            }
            $totals{$inx_LINES} += 1 if ( &is_iso8859( $code, 0 ) == 2 );
            printf "\n" if ($opt_d);
        }
        $totals{$inx_TOTAL} = $in_font;

        my @missing;
        for my $iso ( 0 .. $#iso_8859_name ) {
            my %hash = %{ $iso_8859_hash[$iso] };
            for my $code ( keys %hash ) {
                next if ( $result{$code} );
                $missing[$iso] .= $hash{$code};
            }
        }

        my %object;
        $object{TOTALS}      = \%totals;
        $object{MISSING}     = \@missing;
        $all_files{$psffile} = \%object;

        if ($opt_v) {
            for my $t ( sort { $a <=> $b } keys %totals ) {
                printf " %s:%02d", &short_name($t), $totals{$t};
                printf "#"
                  if ( $totals{$t} and $totals{$t} == &size_iso8859($t) );
            }
            printf "\n";
        }
        printf "?? no mapping found in $psffile\n" unless ( $mapping > 0 );
    }
    else {
        printf "?? cannot read $psffile\n";
    }
    return %result;
}

sub merge_fonts($$) {
    my $psffile = $_[0];
    my %alldata = %{ $_[1] };
    my %psfdata = &read_font($psffile);
    for my $code ( keys %psfdata ) {
        $alldata{$code} += 1;
    }
    return %alldata;
}

sub merge_codesets($$$) {
    my $psffile = $_[0];
    my %object  = %{ $_[1] };
    my %alldata = %{ $_[2] };

    my %totals;
    if ( $object{TOTALS} ) {
        %totals = %{ $object{TOTALS} };
    }

    my @missing;
    if ( $object{MISSING} ) {
        @missing = @{ $object{MISSING} };
    }

    my $code = $psffile;
    $code =~ s/^.*\///;
    $code =~ s/\..*$//;

    my $size = $code;
    $code =~ s/([_-])?(\d+x)?\d+$// unless ( $code =~ /^(cp|iso)([_-])?\d+$/ );
    $size = substr $size, ( length $code );
    $size =~ s/^([_-])//;
    $size = "?" if ( $size eq "" );

    if ( $alldata{$code} ) {
        my %obj   = %{ $alldata{$code} };
        my %sizes = %{ $obj{SIZES} };
        $sizes{$size} = $size;
        $obj{SIZES} = \%sizes;
        if ($opt_v) {
            my %check = %{ $obj{TOTALS} };
            my @check = sort keys %check;
            my @total = sort keys %totals;
            if ( $#check == $#total ) {
                for my $c ( 0 .. $#check ) {
                    my $key = $check[$c];
                    printf "?? mismatch for %s\n", &long_name($key)
                      if ( $check{$key} != $totals{$key} );
                }
            }
            else {
                printf "?? mismatch totals %d vs %d\n", $#check, $#total;
            }
        }
        $alldata{$code} = \%obj;
    }
    else {
        my %obj;
        my %sizes;
        $sizes{$size}   = $size;
        $obj{SIZES}     = \%sizes;
        $obj{TOTALS}    = \%totals;
        $obj{MISSING}   = \@missing;
        $alldata{$code} = \%obj;
    }

    return %alldata;
}

sub report_codeset($) {
    my %alldata = %{ $_[0] };
    my @keys    = sort keys %alldata;
    printf "Code Sets:\n";
    for my $c ( 0 .. $#keys ) {
        my $code   = $keys[$c];
        my %obj    = %{ $alldata{$code} };
        my %totals = %{ $obj{TOTALS} };
        printf "\t%s:", $code;
        for my $t ( sort keys %totals ) {
            my $total  = $totals{$t};
            my $expect = &size_iso8859($t);
            if ( $total == $expect ) {
                printf " %s", &long_name($t);
            }
            elsif ( $expect > $opt_m and $total >= ( $expect - $opt_m ) ) {
                printf " %s*", &long_name($t);
            }
        }
        printf "\n";
    }
}

sub report_missing($) {
    my %alldata = %{ $_[0] };
    my @keys    = sort keys %alldata;
    printf "Missing codes:\n";
    for my $c ( 0 .. $#keys ) {
        my $code    = $keys[$c];
        my %obj     = %{ $alldata{$code} };
        my @missing = @{ $obj{MISSING} };
        printf "\t%s:", $code;
        for my $t ( 0 .. $#missing ) {
            if ( $missing[$t] ) {
                my $result = &decode_utf( $missing[$t] );
                if ( length($result) > $opt_m ) {
                    printf " %s(%s)", &short_name($t), length $result;
                }
                else {
                    printf " %s:%s", &short_name($t), $result;
                }
            }
        }
        printf "\n";
    }
}

sub report_sizes($) {
    my %alldata = %{ $_[0] };
    my @keys    = sort keys %alldata;
    printf "Code Sizes:\n";
    for my $c ( 0 .. $#keys ) {
        my $code  = $keys[$c];
        my %obj   = %{ $alldata{$code} };
        my %sizes = %{ $obj{SIZES} };
        printf "\t%s:", $code;
        for my $t ( sort keys %sizes ) {
            printf " %s", $t;
        }
        printf "\n";
    }
}

sub report_coverage() {
    my %allfile = %{ $_[0] };
    my %alldata = %{ $_[1] };
    my $iso     = $_[2];
    my %isodata =
      ( $iso == $inx_LINES ) ? %line_drawing : %{ $iso_8859_hash[$iso] };
    my $isoname = &long_name($iso);
    my @isodata = sort keys %isodata;
    my $tab;

    printf "%s:\n", $isoname;

    my %support;
    for my $data ( sort keys %isodata ) {
        $support{$data} = $isodata{$data} if ( $alldata{$data} );
    }
    my @support = sort keys %support;

    printf "\t%d/%d supported\n", $#support + 1, $#isodata + 1;
    $tab = 0;
    for my $n ( 0 .. $#support ) {
        my $data = $support{ $support[$n] };
        my $utf  = &decode_utf($data);
        printf "\t%s '%s'", &encode_hex($utf), $utf;
        printf "\n" if ( ( ++$tab % 4 ) == 0 );
    }
    printf "\n" unless ( ( $tab % 4 ) == 0 );

    my %omitted;
    for my $data ( sort keys %isodata ) {
        $omitted{$data} = $isodata{$data} unless ( $alldata{$data} );
    }
    my @omitted = sort keys %omitted;

    printf "\t%d/%d omitted\n", $#omitted + 1, $#isodata + 1;
    $tab = 0;
    for my $n ( 0 .. $#omitted ) {
        my $data = $omitted{ $omitted[$n] };
        my $utf  = &decode_utf($data);
        printf "\t%s '%s'", &encode_hex($utf), $utf;
        printf "\n" if ( ( ++$tab % 4 ) == 0 );
    }
    printf "\n" unless ( ( $tab % 4 ) == 0 );
}

sub report_details($$) {
    my %allfile = %{ $_[0] };
    my %alldata = %{ $_[1] };
    my @files   = sort keys %allfile;
    my @codes   = sort keys %alldata;

    if ( $#codes >= 0 ) {
        &report_coverage( \%allfile, \%alldata, $inx_LINES );
        for my $iso ( 0 .. $#iso_8859_hash ) {
            &report_coverage( \%allfile, \%alldata, $iso );
        }
    }
}

sub report_summary($$) {
    my %allfile = %{ $_[0] };
    my %alldata = %{ $_[1] };
    my @files   = sort keys %allfile;
    my @codes   = sort keys %alldata;
    printf "%d Unicode values are mapped in %d files\n", $#codes + 1,
      $#files + 1;
    my $other = 0;
    for my $c ( 0 .. $#codes ) {
        $other++ if ( &any_iso8859( $codes[$c] ) < 0 );
    }
    printf "%d Unicode values are for \"other\" scripts (%.1f%%)\n", $other,
      ( 100 * $other ) / ( $#codes + 1 )
      if ( $#codes >= 0 );
}

sub test_encoding($$) {
    my %hash  = %{ $_[0] };
    my $name  = $_[1];
    my @codes = keys %hash;
    printf "table %s (%d codes)\n", $name, $#codes + 1;
    for my $code ( sort keys %hash ) {
        my $hex = &decode_hex($code);
        my $utf = &decode_utf( $hash{$code} );
        printf "\t%s %s %s\n", $code, $utf, $hex if ( $utf ne $hex );
    }

    # check if Perl's encoding matches the hash in this script.
    if ( $name ne "line-drawing" ) {
        my %mapped;
        my @mapped;
        my @encode;
        for my $code ( 0 .. 255 ) {
            my $octets = chr($code);
            my $encode = Encode::encode( $name, $octets );
            my $decode = Encode::decode( $name, $octets );
            my $mapped = &decode_utf($decode);
            my $result = sprintf "U+%04x", ord($mapped);
            printf "test U+%04x %s %s %s\n", $code, $encode, $decode, $result
              if ($opt_d);
            if ( ord($mapped) != 0xfffd ) {
                $mapped{$result} = $code;
                $mapped[$code]   = &encode_utf($mapped);
                $encode[$code]   = $decode;
            }
            if ( $hash{$result} ) {
                printf "oops %s vs %s\n", $decode, &decode_utf( $hash{$result} )
                  unless ( $decode eq &decode_utf( $hash{$result} ) );
            }
            elsif ( $code >= 128 and $code < 160 ) {
                printf "miss %d\n", $code
                  if ( ( $name !~ /ascii/ ) and ( $name !~ /^iso-8859/i ) );
            }
            elsif ( $code >= 160 ) {
                printf "%s: miss2 %d\n", $name, ord($mapped)
                  if ( $name ne "ascii" and ord($mapped) != 0xfffd );
            }
        }
        for my $code ( sort keys %mapped ) {
            if ( $hash{$code} ) {
                printf "oops1 %s: %s vs %s\n", $code,
                  &decode_utf( $hash{$code} ), $encode[ $mapped{$code} ]
                  if (
                    &decode_utf( $hash{$code} ) ne $encode[ $mapped{$code} ] );
            }
            else {
                my $ord = $mapped{$code};
                next if ( $ord < 32 or $ord == 127 );
                next if ( $name ne "ascii" and $ord < 128 );
                next
                  if ( ( $name eq "ascii" or $name =~ /^iso-8859/i )
                    and $ord < 160 );
                printf "miss3 %s %d\n", $code, $ord;
            }
        }
    }
}

# Verify that the hashes for each of the encoding tables decode the U+xxxx
# values to match the hashed character.
sub self_test() {
    printf "Self-test\n";
    &test_encoding( \%line_drawing, "line-drawing" );
    for my $n ( 0 .. $#iso_8859_hash ) {
        &test_encoding( $iso_8859_hash[$n], $iso_8859_name[$n] );
    }
    exit;
}

sub init_encoding($$) {
    my %hash = %{ $_[0] };
    my $name = $_[1];
    my $this = $#iso_8859_hash + 1;
    $iso_8859_hash[$this] = \%hash;
    $iso_8859_name[$this] = $name;
}

sub main::HELP_MESSAGE() {
    printf STDERR <<EOF
Usage: $0 [options] [psffile1 [psffile2 ...]]

Options:

-d         debug, shows parsed values
-f         report characters folded, usually onto ASCII
-F         like -f, but omitting equivalent glyphs
-m MAX     set maximum number of missing glyphs (default: 3)
-s         summary
-t         self-test
-v         verbose, shows files opened
EOF
      ;
    exit;
}

# Fedora kbd-mist package
$font_dir = "/lib/kbd/consolefonts" if ( -d "/lib/kbd/consolefonts" );

binmode( STDOUT, ":utf8" );

&getopts('dfFm:stv') || &main::HELP_MESSAGE;

&init_encoding( \%us_ascii,    "ascii" );
&init_encoding( \%iso_8859_1,  "iso-8859-1" );
&init_encoding( \%iso_8859_2,  "iso-8859-2" );
&init_encoding( \%iso_8859_3,  "iso-8859-3" );
&init_encoding( \%iso_8859_4,  "iso-8859-4" );
&init_encoding( \%iso_8859_5,  "iso-8859-5" );
&init_encoding( \%iso_8859_6,  "iso-8859-6" );
&init_encoding( \%iso_8859_7,  "iso-8859-7" );
&init_encoding( \%iso_8859_8,  "iso-8859-8" );
&init_encoding( \%iso_8859_9,  "iso-8859-9" );
&init_encoding( \%iso_8859_10, "iso-8859-10" );
&init_encoding( \%iso_8859_11, "iso-8859-11" );
&init_encoding( \%iso_8859_13, "iso-8859-13" );
&init_encoding( \%iso_8859_14, "iso-8859-14" );
&init_encoding( \%iso_8859_15, "iso-8859-15" );
&init_encoding( \%iso_8859_16, "iso-8859-16" );

&self_test if ($opt_t);

$opt_f = 1 if ($opt_F);
$opt_v = 1 if ($opt_d);
$opt_v = 1 if ($opt_f);
$opt_m = 3 unless ($opt_m);

if ( $#ARGV >= 0 ) {
    while ( $#ARGV >= 0 ) {
        $all_files{ shift @ARGV } = (1);
    }
}
else {
    opendir( my $dh, $font_dir ) || die "can't opendir $font_dir: $!";
    my @files = readdir($dh);
    closedir $dh;
    for my $n ( 0 .. $#files ) {
        my $file = sprintf "%s/%s", $font_dir, $files[$n];
        next unless ( -f $file );
        next unless ( $file =~ /\.psf(u)?(\.gz)?$/ );
        $all_files{$file} = (1);
    }
}

foreach my $psffile ( sort keys %all_files ) {
    %all_fonts = &merge_fonts( $psffile, \%all_fonts );
    %codeset_list =
      &merge_codesets( $psffile, $all_files{$psffile}, \%codeset_list );
}

&report_codeset( \%codeset_list ) unless ($opt_s);
&report_missing( \%codeset_list ) unless ($opt_s);
&report_sizes( \%codeset_list )   unless ($opt_s);
&report_details( \%all_files, \%all_fonts ) unless ($opt_s);
&report_summary( \%all_files, \%all_fonts );

1;

# vile:file-encoding=utf-8
