Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/perl -w | |
# uroman Nov. 12, 2015 - July 25, 2016 | |
# version v0.7 | |
# Author: Ulf Hermjakob | |
# Usage: uroman-quick.pl {-l [tur|uig|ukr|yid]} < STDIN | |
# currently only for Arabic script languages, incl. Uyghur | |
$|=1; | |
use FindBin; | |
use Cwd "abs_path"; | |
use File::Basename qw(dirname); | |
use File::Spec; | |
my $bin_dir = abs_path(dirname($0)); | |
my $root_dir = File::Spec->catfile($bin_dir, File::Spec->updir()); | |
my $data_dir = File::Spec->catfile($root_dir, "data"); | |
my $lib_dir = File::Spec->catfile($root_dir, "lib"); | |
use lib "$FindBin::Bin/../lib"; | |
use NLP::Romanizer; | |
use NLP::UTF8; | |
$romanizer = NLP::Romanizer; | |
%ht = (); | |
$lang_code = ""; | |
while (@ARGV) { | |
$arg = shift @ARGV; | |
if ($arg =~ /^-+(l|lc|lang-code)$/) { | |
$lang_code = lc (shift @ARGV || "") | |
} else { | |
print STDERR "Ignoring unrecognized arg $arg\n"; | |
} | |
} | |
$romanization_table_arabic_block_filename = File::Spec->catfile($data_dir, "romanization-table-arabic-block.txt"); | |
$romanization_table_filename = File::Spec->catfile($data_dir, "romanization-table.txt"); | |
$romanizer->load_romanization_table(*ht, $romanization_table_arabic_block_filename); | |
$romanizer->load_romanization_table(*ht, $romanization_table_filename); | |
$line_number = 0; | |
while (<>) { | |
$line_number++; | |
my $line = $_; | |
print $romanizer->quick_romanize($line, $lang_code, *ht) . "\n"; | |
if ($line_number =~ /0000$/) { | |
print STDERR $line_number; | |
} elsif ($line_number =~ /000$/) { | |
print STDERR "."; | |
} | |
} | |
print STDERR "\n"; | |
exit 0; | |