Perl関数フックの実現

  1. EB::Hooksetのmagic_extにhookset_obj構造体を入れる。
  2. Perl関数でフックするときにフック関数 eb_text_hookを呼び出し、その中でPerl関数を実行して、返り値を書き込む。


EB.XSの一部

typedef struct hookset_obj_struct { 
  EB_Hookset *hookset; 
  AV *functions;  // perl関数(のリファレンス)を保存
} hookset_obj;

EB_Error_Code
eb_text_hook(EB_Book *book, EB_Appendix *appendix,
                            void *container, EB_Hook_Code hook_code, int argc,
                            const unsigned int *argv)
{
  if(!SvOK((SV *)container) || !SvROK((SV *)container))
    return EB_SUCCESS;

  SV *sv = SvRV((SV *)container);
  hookset_obj *obj = (hookset_obj *)mg_find(sv, PERL_MAGIC_ext)->mg_obj;
  SV *func = *av_fetch(obj->functions, (int)hook_code, 0);

  int n = 0;
  dSP;
  int count;
  ENTER;
  SAVETMPS;

  PUSHMARK(SP);
  for(n = 0; n < argc; n++)
    XPUSHs(sv_2mortal(newSViv(argv[n])));
  PUTBACK;

  count = perl_call_sv(func, G_SCALAR);

  if (count > 0){
    eb_write_text_string(book, POPp);
  }

  FREETMPS;
  LEAVE;

  return EB_SUCCESS;
}

MODULE = EB    PACKAGE = EB::Hookset   PREFIX=eb_

void
eb_set_sub(obj, code, func)
     hookset_obj* obj;
     EB_Hook_Code code;
     CV* func;
PREINIT:
     EB_Hook *hook;
CODE:
     av_store(obj->functions, code, newRV_noinc((SV *)func)); // functions内にperlの関数のリファレンスを登録

     Newx(hook, sizeof(hook), EB_Hook);
     hook->code = code;
     hook->function = eb_text_hook;
     eb_set_hook(obj->hookset, hook);

perlの関数はリファレンス(RV*)で渡さないとうまくいかない。

EBLibraryのPerlバインディングを作った

言語処理の研究で辞書を使う機会も多いので、
XSの練習も兼ねてEB LibraryのPerlバインディングを作ってみた。
Appendix は未実装。とりあえずEPWINGを使いたかったので。

まだ、とりあえず動くレベル。



EBLibraryのRubyバインディング http://rubyeb.sourceforge.net/index-ja.html
id: dayflowerさんのEB Libraryの例題 http://d.hatena.ne.jp/dayflower/20081210/1228882560
を参考にした。



基本的な使い方

use EB;
my $eb = EB->new();
$eb->bind("./EIDAI"); 
$eb->subbooks(); # = [[code, subtitle], ...];
$eb->set_subbook(0); 
print $eb->subbook_title(), "\n"; # 研究社新英和大辞典第6版

my $res = $eb->search($ARGV[0])){
if(ref($res) eq "ARRAY"){
  foreach my $r(@{$res}){
      print $r->[0],"\n",$r->[1],"\n\n";  # 見出し, 本文
  }
}


キーワード検索、クロス検索の場合、キーワード配列のリファレンスを渡す

my $res = $eb->cross_search($words_ref);


元のAPIに基づいた検索処理も可

    $self->search_word($word);
    my $list_ref = $self->hit_list($max);
    return if(ref($list_ref) ne "ARRAY");
    foreach my $r(@{$list_ref}){
        $self->seek_text($r->{heading});
        my $heading = $self->read_heading();
        my $text = $self->read_text();
     print $heading,"\n",$text,"\n\n";  # 見出し, 本文
    }

フックセット

フックにPerlの関数を利用する。

my $hookset = EB::Hookset->new();
$hookset->set_sub($hookset->HOOK_WIDE_FONT, sub {return qq(<img src="font_img/16wide_$_[0].png" alt="<$_[0]>" />); });  # フックを登録
$hookset->set_sub($hookset->HOOK_NEWLINE, sub {return "<br>"; });
$eb->set_hookset($hookset);  # フックセットを登録
$eb->search($keyword); # 前方一致検索

フォント関係

$eb->set_font($eb->FONT_16); 
my $start = $eb->wide_font_start();
print $eb->wide_font_character_png($start); # フォントデータをPNG出力

ターミナルで”Google”をカラー表示

まず、ANSIColor.pmを作る。

package ANSIColor;

use strict;
use warnings;
use base 'Exporter';

use constant {
    # foreground color
    BLACK  => "\033[30m",
    RED    => "\033[31m",
    GREEN  => "\033[32m",
    YELLOW => "\033[33m",
    BLUE   => "\033[34m",
    PURPLE => "\033[35m",
    CYAN   => "\033[36m",
    WHITE  => "\033[37m",

    # background color
    BLACKB  => "\033[40m",
    REDB    => "\033[41m",
    GREENB  => "\033[42m",
    YELLOWB => "\033[43m",
    BLUEB   => "\033[44m",
    PURPLEB => "\033[45m",
    CYANB   => "\033[46m",
    WHITEB  => "\033[47m",

    # bold
    B => "\033[1m",
    BOFF => "\033[22m",

   # italics
    I => "\033[3m",
    IOFF => "\033[23m",

    # underline
    U => "\033[4m",
    UOFF => "\033[24m",

    # invert
    R => "\033[7m",
    ROFF => "\033[27m",

    # reset
    RESET  => "\033[0m",
};

our %EXPORT_TAGS = ('foreground' => [qw(BLACK RED GREEN YELLOW BLUE PURPLE CYAN WHITE RESET)],
                    'background' => [qw(BLACKB REDB GREENB YELLOWB BLUEB PURPLEB CYANB WHITEB RESET)],
                    'italic' => [qw(I ITON ITALIC ITALICON IOFF ITALICOFF RESET)],
                    'bold' => [qw(B BOLD BOLDON BOLDOFF BOFF RESET)],
                    'invert' => [qw(R INVON ROFF INVOFF)],
                    'underline' => [qw(U UL ULON UNDERLINE UNDERLINEON UOFF UNDERLINEOFF ULOFF)],
                );
$EXPORT_TAGS{'all'} = [map {@{$_}} values %EXPORT_TAGS];

our @EXPORT_OK = @{$EXPORT_TAGS{'all'}};

1;

あとは、以下を実行

>perl -e 'use ANSIColor qw(:foreground); print BLUE,G,RED,o,YELLOW,o,BLUE,g,GREEN,l,RED,e,RESET,"\n"'
Google

ターミナルがカラフルだと楽しくなります。


正規表現で使いたい場合は、eオプションを付けて、

use ANSIColor qw(:foreground);

# Wikipediaのgoogle項目冒頭 (http://en.wikipedia.org/wiki/Google) 
my $str =<<_EOS_;
Google Inc. is an American public corporation, earning revenue from 
advertising related to its Internet search, e-mail, online mapping, 
office productivity, social networking, and video sharing services 
as well as selling advertising-free versions of the same technologies. 
The Google headquarters, the Googleplex, is located in Mountain View, 
California. As of March 31, 2009, the company has 20,164 full-time employees.
_EOS_

$str =~ s/Google/BLUE.G.RED.o.YELLOW.o.BLUE.g.GREEN.l.RED.e.RESET/eg;
print $str;

結果は、

Google Inc. is an American public corporation, earning revenue from 
advertising related to its Internet search, e-mail, online mapping, 
office productivity, social networking, and video sharing services 
as well as selling advertising-free versions of the same technologies. 
The Google headquarters, the Googleplex, is located in Mountain View, 
California. As of March 31, 2009, the company has 20,164 full-time employees.

まとめて記述

まとめて記述するために抑えておくべき基本技

qwを使う

my @array = qw(a b c d e f);    # ('a','b','c','d','e','f')
my %hash = qw(a 1 b 2 c 3 d 4); # (a => 1, b => 2, c => 3, d => 4)

「n..m」を使う

my @array = 1..100; # (1, 2, ..., 100)
for my $i(1..100){
    $array[$i];
}

「-n」を使う(n:後ろからの番号)

my @array = 1..100;
print $array[-1]; # $array[-1] == $array[$#array] == $array[@array - 1]
for my $i(-20..-1){ # 最後の20個
    print $array[$i];
}

まとめて記述

出力

配列の場合

my @array = qw(a b c d e f g);
my ($a, $b, undef, $c, undef) = @array[2..5]; # ('c','d','e', 'f')  $ではなく@を使う!
my ($a, $b) = @array[2,5]\n"; # ('c','f')

ハッシュの値をまとめて出力

my %hash = qw(a 1 b 2 c 3 d 4 e 5);
my ($v1, $v2) = @hash{qw(c d)} # %, $でなく@を使う!

リファレンスの場合

my $hash_ref = {qw(a 1 b 2 c 3 d 4 e 5)};
my ($v1, $v2) = @$hash{qw(c d)};
my ($v1, $v2) = @{$hash}{qw(c d)};
代入

配列の場合

@array2[-3,-1] = @array1[1,3];

ハッシュの場合

@hash2{qw(foo1 foo2)} = @hash1{qw(hoge1 hoge2)};

リファレンスの場合

@$hash2{qw(foo1 foo2)} = @$hash1{qw(hoge1 hoge2)};
@{$hash2}{qw(foo1 foo2)} = @{$hash1}{qw(hoge1 hoge2)};