Perl6の内部表現

Takahiro Shimizu

このセッションの内容

内容

Perl6とは

現在のPerl6

[参考]Perl5のソースコード

use ustrict;
use warnings;

my $scalar_value = "hello!";
print "$scalar_value\n";

my @array = (1..10);
print "$array[0]\n";

my %hash = ( this_is_key => "this_is_value");
print "$hash{this_is_key}\n";

my $hash_ref = \%hash;
print "$hash_ref->{this_is_key}\n";

Perl6のソースコード概要

my $str_value = 'hello world!';
$str_value.say; # hello world!
my $sample_value = 'hello world!';
$sample_value.say; # hello world!

$sample_value = '31';
$sample_value.say; # 31

say($sample_value * 3);

Perl6の言語的な特徴

my Int $int_value  = 31;
$int_value = "hello"; # Compile error!
$ perl6 type_invalid.p6
Type check failed in assignment to $int_value; expected Int but got Str ("hello")
  in block <unit> at type_invalid.p6 line 4

Perl6の言語的な特徴

my subset Fizz of Int where * %% 3;
my subset Buzz of Int where * %% 5;
my subset FizzBuzz of Int where Fizz&Buzz;
my subset Number of Int where none Fizz|Buzz;

proto sub fizzbuzz ($) { * }
multi sub fizzbuzz (FizzBuzz) { "FuzzBuzz" }
multi sub fizzbuzz (Fizz) { "Fizz" }
multi sub fizzbuzz (Buzz) { "Buzz" }
multi sub fizzbuzz (Number $number) { $number }

fizzbuzz($_).say for 1..15;

スクリプト言語

スクリプト言語処理系

Perl6以外のスクリプト言語

Perl6の処理系の構成

Rakudoの構成図

(http://brrt-to-the-future.blogspot.com/2015/03/advancing-jit-compiler.html)

Perl6とNQP

my $value := "hello!";
say($value);

NQPスクリプト

#! nqp
sub fib($n) {
    $n < 2 ?? $n !! fib($n-1) + fib($n - 2);
}

my $N := 29;

my $z  := fib($N);

nqp::say("fib($N) = " ~ fib($N));

NQPスクリプト(nまでの整数の和)

sub add_test($n){
    my $sum := 0;
    while ( $n > 1) {
        $sum := $sum + $n;
        --$n;
    }
    return $sum;
}

say(add_test(10000));

NQP

sub add_test(int $n){
    mu $sum := 0;
    while nqp::isgt_i($n,1) {
        $sum := nqp::add_i($sum,$n);
        $n   := nqp::sub_i($n,1);
    }
    return $sum;
}

NQPとMoarVM

Perl6のVM

MoarVM

バイトコード

$nqp --target=mbc --output=fib.moarvm fib.nqp

バイトコード

バイトコードとMoarVM

     annotation: add_test.nqp:1
00003      const_i64_16       loc_2_int, 0
00004      hllboxtype_i       loc_3_obj
00005      box_i              loc_3_obj, loc_2_int, loc_3_obj
00006      set                loc_1_obj, loc_3_obj
     label_1:
00007      decont             loc_3_obj, loc_0_obj
00008      smrt_numify        loc_4_num, loc_3_obj
00009      const_i64_16       loc_2_int, 1
00010      coerce_in          loc_5_num, loc_2_int
00011      gt_n               loc_2_int, loc_4_num, loc_5_num
00012      unless_i           loc_2_int, label_2(00031)
00013      osrpoint
     annotation: add_test.nqp:3
00014      decont             loc_3_obj, loc_1_obj
00015      smrt_numify        loc_5_num, loc_3_obj
00016      decont             loc_3_obj, loc_0_obj
00017      smrt_numify        loc_4_num, loc_3_obj
00018      add_n              loc_4_num, loc_5_num, loc_4_num
00019      hllboxtype_n       loc_3_obj
00020      box_n              loc_3_obj, loc_4_num, loc_3_obj
00021      set                loc_1_obj, loc_3_obj
00022      decont             loc_3_obj, loc_0_obj
00023      smrt_numify        loc_4_num, loc_3_obj
00024      coerce_ni          loc_6_int, loc_4_num
00025      const_i64_16       loc_7_int, 1
00026      sub_i              loc_7_int, loc_6_int, loc_7_int
00027      hllboxtype_i       loc_3_obj
00028      box_i              loc_3_obj, loc_7_int, loc_3_obj
00029      set                loc_0_obj, loc_3_obj
00030      goto               label_1(00007)

NQPとバイトコードの対応

say(add_test(10000));
     annotation: add_test.nqp:1
     label_1:
00020      getlex_no          loc_7_obj, '&say'
00021      decont             loc_7_obj, loc_7_obj
00022      const_s            loc_3_str, '&add_test'
00023      getlexstatic_o     loc_8_obj, loc_3_str
00024      decont             loc_8_obj, loc_8_obj
00025      const_i64_16       loc_5_int, 10000
00026      prepargs           Callsite_1
00027      arg_i              0, loc_5_int
00028      invoke_o           loc_8_obj, loc_8_obj
00029      prepargs           Callsite_0
00030      arg_o              0, loc_8_obj
00031      invoke_v           loc_7_obj
00032      null               loc_7_obj
00033      return_o           loc_7_obj

NQPとバイトコードの対応

my $sum := 0;
     annotation: add_test.nqp:1
00003      const_i64_16       loc_2_int, 0
00004      hllboxtype_i       loc_3_obj
00005      box_i              loc_3_obj, loc_2_int, loc_3_obj
00006      set                loc_1_obj, loc_3_obj

NQPとバイトコードの対応

    while ( $n > 1) {
     label_1:
00007      decont             loc_3_obj, loc_0_obj
00008      smrt_numify        loc_4_num, loc_3_obj
00009      const_i64_16       loc_2_int, 1
00010      coerce_in          loc_5_num, loc_2_int
00011      gt_n               loc_2_int, loc_4_num, loc_5_num
00012      unless_i           loc_2_int, label_2(00031)
00013      osrpoint

decode命令

    while ( $n > 1) {
00007      decont             loc_3_obj, loc_0_obj

smrt_nomify

    while ( $n > 1) {
00008      smrt_numify        loc_4_num, loc_3_obj

const_i64_16とcoerece_in

    while ( $n > 1) {
00009      const_i64_16       loc_2_int, 1
00010      coerce_in          loc_5_num, loc_2_int

比較とif文の判定

    while ( $n > 1) {
00011      gt_n               loc_2_int, loc_4_num, loc_5_num
00012      unless_i           loc_2_int, label_2(00031)

C言語での実装へ

MoarVMのバイトコードインタプリタ部分

MoarVMなどの言語処理系のバイトコードインタプリタは次のことを繰り返している

  1. 入力されたバイトコード列から命令に対応する部分を読み取る
  2. 読み込んだ数値から、 対応する命令を取得する
  3. 命令部分を実行する
  4. バイトコード列を次に進め、繰り返す

巨大なswitch文を使うケース

    while( pc != NULL) {
        switch(pc){
            case ADD_INSTRUCTION:
                // instruction....
                break;
            case SUBD_INSTRUCTION:
                // instruction....
                break;
        }
    }

Cコンパイラのラベルgotoを使うケース

    static const void *CODES[] = {&&ADD_INSTRUCTION, &&SUB_INSTRCUTION};

    goto *CODES[pc];

ADD_INSTRUCTION:
    // instruction...
    pc++;
    goto *CODES[pc];

SUB_INSTRUCTION:
    // instruction...
    pc++;
    goto *CODES[pc];

MoarVMでは

MoarVMのC言語での実装

/* This is the interpreter run loop. We have one of these per thread. */
void MVM_interp_run(MVMThreadContext *tc, void (*initial_invoke)(MVMThreadContext *, void *), void *invoke_data) {
#if MVM_CGOTO
#include "oplabels.h"
#endif

    /* Points to the place in the bytecode right after the current opcode. */
    /* See the NEXT_OP macro for making sense of this */
    MVMuint8 *cur_op = NULL;

    /* The current frame's bytecode start. */
    MVMuint8 *bytecode_start = NULL;

    /* Points to the base of the current register set for the frame we
     * are presently in. */
    MVMRegister *reg_base = NULL;

    /* Points to the current compilation unit. */
    MVMCompUnit *cu = NULL;

    /* The current call site we're constructing. */
    MVMCallsite *cur_callsite = NULL;

    /* Stash addresses of current op, register base and SC deref base
     * in the TC; this will be used by anything that needs to switch
     * the current place we're interpreting. */
    tc->interp_cur_op         = &cur_op;
    tc->interp_bytecode_start = &bytecode_start;
    tc->interp_reg_base       = &reg_base;
    tc->interp_cu             = &cu;

    /* With everything set up, do the initial invocation (exactly what this does
     * varies depending on if this is starting a new thread or is the top-level
     * program entry point). */
    initial_invoke(tc, invoke_data);

MoarVMのレジスタ構成

/* Different views of a register. */
union MVMRegister {
    MVMObject         *o;
    MVMString *s;
    MVMint8            i8;
    MVMuint8           u8;
    MVMint16           i16;
    MVMuint16          u16;
    MVMint32           i32;
    MVMuint32          u32;
    MVMint64           i64;
    MVMuint64          u64;
    MVMnum32           n32;
    MVMnum64           n64;
};

MVM_interp_runの登場人物

    /* Points to the place in the bytecode right after the current opcode. */
    /* See the NEXT_OP macro for making sense of this */
    MVMuint8 *cur_op = NULL;

    /* The current frame's bytecode start. */
    MVMuint8 *bytecode_start = NULL;

    /* Points to the base of the current register set for the frame we
     * are presently in. */
    MVMRegister *reg_base = NULL;

MVM_interp_runメインループ

    /* Enter runloop. */
    runloop: {
        MVMuint16 op;

#if MVM_TRACING
        if (tracing_enabled) {
            char *trace_line;
            trace_line = MVM_exception_backtrace_line(tc, tc->cur_frame, 0, cur_op);
            fprintf(stderr, "Op %d%s\n", (int)*((MVMuint16 *)cur_op), trace_line);
            /* slow tracing is slow. Feel free to speed it. */
            MVM_free(trace_line);
        }
#endif

        /* The ops should be in the same order here as in the oplist file, so
         * the compiler can can optimise the switch properly. To check if they
         * are in the same order as the oplist use the
         * tools/compare-oplist-interp-order.sh helper script. */
        DISPATCH(NEXT_OP) {
            OP(no_op):
                goto NEXT;
            OP(const_i8):
            OP(const_i16):
            OP(const_i32):
                MVM_exception_throw_adhoc(tc, "const_iX NYI");
            OP(const_i64):
                GET_REG(cur_op, 0).i64 = MVM_BC_get_I64(cur_op, 2);
                cur_op += 10;
                goto NEXT;

MVM_interp_runメインループ

#if MVM_CGOTO
#define DISPATCH(op)
#define OP(name) OP_ ## name
#define NEXT *LABELS[NEXT_OP]
#else
#define DISPATCH(op) switch (op)
#define OP(name) case MVM_OP_ ## name
#define NEXT runloop
#endif

MVM_interp_runメインループ

#if MVM_CGOTO
#define DISPATCH(op)
#define OP(name) OP_ ## name
#define NEXT *LABELS[NEXT_OP]
#else
#define DISPATCH(op) switch (op)
#define OP(name) case MVM_OP_ ## name
#define NEXT runloop
#endif

MVM_interp_runメインループ

#if MVM_CGOTO
#define DISPATCH(op)
#define OP(name) OP_ ## name
#define NEXT *LABELS[NEXT_OP]
#else
#define DISPATCH(op) switch (op)
#define OP(name) case MVM_OP_ ## name
#define NEXT runloop
#endif

それぞれの命令の実装

    OP(add_i):
        GET_REG(cur_op, 0).i64 = GET_REG(cur_op, 2).i64 + GET_REG(cur_op, 4).i64;
        cur_op += 6;
        goto NEXT;
    OP(sub_i):
        GET_REG(cur_op, 0).i64 = GET_REG(cur_op, 2).i64 - GET_REG(cur_op, 4).i64;
        cur_op += 6;
        goto NEXT;
    OP(mul_i):
        GET_REG(cur_op, 0).i64 = GET_REG(cur_op, 2).i64 * GET_REG(cur_op, 4).i64;
        cur_op += 6;
        goto NEXT;

本日の展示について

まとめ