view findUnicode.pl @ 9:b86def18da67

closed-in-concat→ done
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sat, 09 Nov 2019 16:23:01 +0900
parents 4ed010b4a017
children a8642503caae
line wrap: on
line source

#!/usr/bin/perl

# find used unicode 
use strict;
use utf8;
use open qw(:std :utf8); # input/output default encoding will be UTF-8, it looks like default

my %used;
my @fonts = <[0-9]*.bdf> ;

use Getopt::Std;
our ($opt_f);

getopts('f:');

if ($opt_f) {
    @fonts = ($opt_f);
}

while(<>) {
    for my $ch ( /(.)/g ) {
        next if (ord($ch)<128);
        # next if (ord($ch)>=12288);  # ignore CJKV
        $used{ord($ch)}++;
    }
}

for my $bdf ( @fonts ) {
   open(my $f,"<",$bdf);
   my %has;
   while(<$f>) {
       if (/^ENCODING\s+(\d+)/) {    
          my $encoding=$1;
          $has{$encoding} = 1;
       }
   }
   my %no;
   for my $ch ( keys %used ) {
      $no{$ch} ++ if (! defined $has{$ch}) ;
   }
   for my $ch ( sort {$a<=>$b} keys %no ) {
       my $hex = sprintf("%x",$ch);
       print chr($ch)," $ch 0x$hex is not in $bdf\n";
   }
}