The OpenNET Project / Index page

[ /++ | | wiki | ]

Ispell (rus ispell)


<< src >>
 : rus, ispell,  (  )
From: <kiril@j2.ru> Subject: Ispell #!/usr/bin/perl # Ispell # (c) <kiril@j2.ru> http://ispell.narod.ru/ #$affix_filename='1.aff'; #$dic_file="full.win"; #$fin_dic_file="win.full_dic"; $affix_filename='english.aff'; $dic_file="english.full"; $fin_dic_file="english.all"; $garbage_output=0; #------------------------------------------------------------------------ sub russian_lc { my ($src_)=@_; $src_=~tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/ѣabcdefghijklmnopqrstuvwxyz/; return $src_; } sub russian_uc { my ($src_)=@_; $src_=~tr/ѣabcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/; return $src_; } #------------------------------------------------------------------------ sub is1stuc_rus { my ($src_)=@_; # get first char my($src_1st)=(split//,$src_)[0]; my($res)=($src_1st=~tr/ABCDEFGHIJKLMNOPQRSTUVWXYZ/ѣabcdefghijklmnopqrstuvwxyz/); return $res; } #------------------------------------------------------------------------ sub make1stuc_rus { my ($src_)=@_; my(@str_)=split//,$src_; $str_[0]=~tr/ѣabcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/; join "",@str_; } #------------------------------------------------------------------------ sub gen_word { my ($w_,$f_)=@_; my ($is_capital)=is1stuc_rus($w_); $w_=russian_lc($w_); my(@new_words); # useless print "Wanna generate from word '$w_', group '$f_'\n" if($full_debug==1); foreach my $aff_ (@{$affixes{$f_}}) { my($re_,$actions_)=split/\t/,$aff_; my($sub_,$app_); if ($actions_=~/,/) # both parts - substract and append { ($sub_,$app_)=split/,/,$actions_; $mode_=0; } else { $app_=$actions_; $mode_=1; } # copy primary word $nw_=$w_; $sub_=~s/-//; if ($app_=~/^-$/) { $app_=''; } print "\tCut string - '$sub_', append - '$app_'\n" if($full_debug==1); print "\tPattern - '$re_'\n" if($full_debug==1); if ($nw_=~/$re_$/) # patter match { if ($mode_ == 0) # cut 'n' paste { # cut $nw_=~s/^(.*)?$sub_$/$1/; print "\tAfter cut - '$nw_'\n" if($full_debug==1); } # paste $nw_.=$app_; print "\tAfter paste - '$nw_'\n" if($full_debug==1); # save push @new_words,$nw_; } } # return first letter if ($is_capital == 1) { @new_words=map {make1stuc_rus($_)} @new_words; } return @new_words; } #------------------------------------------------------------------------ sub append_prefix { my ($w_,$f_)=@_; my ($is_capital)=is1stuc_rus($w_); $w_=russian_lc($w_); my(@new_words); foreach my $aff_ (@{$affixes{$f_}}) { my($re_,$actions_)=split/\t/,$aff_; # save push @new_words,"$actions_$w_"; } # return first letter if ($is_capital == 1) { @new_words=map {make1stuc_rus($_)} @new_words; } return @new_words; } #------------------------------------------------------------------------ # MAIN ROUTINE #------------------------------------------------------------------------ print "Read affix file..."; open AFF,"$affix_filename"; while (<AFF>) { chomp; next if (/^#/); if (/^prefixes/) { $change_type=0; next; } if (/^suffixes/) { $change_type=1; next; } # begin aa: if (/flag\s+\*(\w):/) { $affix=$1; while (<AFF>) { chomp; if (/^prefixes/) { $change_type=0; goto aa; } if (/^suffixes/) { $change_type=1; goto aa; } if (/flag\s+\*(\w):/) { goto aa; } next if (/^#/ or $_ eq ''); s/^\s+(.*)?\s+>\s+(.*)?\s+?#.*$/$1\t$2/; s/\t\t/\t/g; s/ //g; $_=russian_lc($_); # save to hash push @{$affixes{$affix}},"$_"; # ... if ($change_type == 0) # prefixes { $prefix{$affix}=1; } } } } print "\rRead done. \n"; $words_total=0; open DIC,"$dic_file"; open NEWDIC,">$fin_dic_file"; while (<DIC>) { chomp; my($word,$flag)=split /\//; $prefix_exists=0; my(@flags)=split//,$flag; my(@preffss); foreach $fls_ (@flags) { if ($prefix{$fls_}==1) { push @preffss,$fls_; # $flag=~s/$fls_//g; $prefix_exists=1; } } # , my(@flags)=split//,$flag; # print @preffss; # main word print NEWDIC $word; if ($garbage_output ==0) { print NEWDIC "\n"; } else { print NEWDIC " "; } # if ($prefix_exists==1) { foreach $pre_ (@preffss) { print NEWDIC append_prefix($word,$pre_); if ($garbage_output ==0) { print NEWDIC "\n"; } else { print NEWDIC " "; } } } my(@noword_); foreach $fl (@flags) { foreach $res_ (gen_word($word,$fl)) { if ($prefix_exists==1) { push @noword_,$res_; } print NEWDIC "$res_"; if ($garbage_output ==0) { print NEWDIC "\n"; } else { print NEWDIC " "; } } } # - ... if ($prefix_exists==1) { foreach $pre_ (@preffss) { foreach $res_ (@noword_) { print NEWDIC append_prefix($res_,$pre_); if ($garbage_output ==0) { print NEWDIC "\n"; } else { print NEWDIC " "; } } } } $words_total++; print "\rGenerated - $words_total"; } close DIC; close NEWDIC;

<< src >>

[ RSS ]
 
  • 1, , 23:36, 17/03/2012 [] [ ]
  • +/
    ,
    1) , :
    /flag\s+\*(\w):/
    /flag\s+\*?(\w):/ , flag *M: flag T: "" .(T ģ, M -)

    2. print NEWDIC print NEWDIC $word,"\t",-- , .

     

    :         
    E-Mail:      
    :
    :





        
        
    Created 1996-2017 by Maxim Chirkov  
      
    Hosting by Ihor