プログラムを書く人とかはキレイにコードを書きたいと常日頃思っていると思うんですけど,こういうのは見た目の問題,そもそも見えなければ問題はない.
Whitespace って言語があって,これは基本的にスペースとタブと改行だけでプログラムが書ける.
例えば Hello world プログラムとかはこんな感じ.最初の "Say hello" とかは単なるコメントです.
なんてスバらしい言語,これで同僚とかにソースの汚なさをバカにされないで済むと,みなさんビックリされたことでしょう.
Whitespace
Whitespace の Web サイトはどうも ここ っぽいんですが,言語としての仕様はない.ソース読めって感じのことが書いてありますが,公式の Whitespace 実装は Haskell で書いてあって,ぼくは Haskell が読めません.あーもうテストプログラム全部動けばいいんだろってことで,Tutorial を見ながら Whitespace の仮想マシンみたいなやつを Perl で実装することにしたのでした.
Whitespace の中身
Whitespace に用意されているのは各種数値演算やロード・ストア命令,スタック操作とかですね.ジャンプ命令とか,関数呼び出しも可能です.当然再帰呼び出しもできるよ!
また,Whitespace はスタックマシンとして仕様が定まっていて,数値演算とかはスタックのトップとその次の要素が演算対象ですし,ヒープにデータを格納するときも,格納領域のアドレスがスタックのトップに詰まれていることが前提になっている.
注意しないといけないのは,コールスタックは演算とかに使うスタックとは別にしないといけないっぽいこと.同じにしてるとサンプルとして用意されている hello world のプログラムが動かない形になっています (この Hello world のプログラムは関数呼び出しを使っている).
使ってみた
使い方としてはこんな感じ.出力は無理矢理出力バッファに格納されているってことにした.
use strict; use warnings; use Language::Whitespace; my $ws = Language::Whitespace->new_from_file( 'hworld.ws' ); print $ws->compile()->run()->output(); # Hello, world of spaces!
デバッグのときに Whitespace でプログラムを書く必要があったんですけど,正しく書けているかを確認するのが Whitespace だと大変なので,ニーモニックは表示されるようにしてる.
$ws->compile()->mnemonic(); # 0000 PUSH 0 # 0001 PUSH 72 # 0002 STORE # 0003 PUSH 1 # 0004 PUSH 101 # 0005 STORE # 0006 PUSH 2 # 0007 PUSH 108 # 0008 STORE # 0009 PUSH 3 # 0010 PUSH 108 # 続く
出力バッファがあるんだったら入力バッファもなきゃいかんということで,入力は基本的には入力バッファから読み取ることにしてる.例えば入力された数字 n について n! を計算する fact.ws だとこんな感じ.改行まで入れてやらないといけないのはちょっとダメなかんじです.
my $ws = Language::Whitespace->new_from_file( 't/fact.ws' )->input( split //, "5\n" ); print $ws->compile()->run()->output(); # Enter a number: 5! = 120
いちお,directIO メソッドを呼び出すと直接ターミナルから読み取れる.
# fact.pl use strict; use warnings; use Language::Whitespace; my $ws = Language::Whitespace->new_from_file( 't/fact.ws' ); $ws->directIO(); $ws->compile()->run();
$ perl -Ilib fact.pl Enter a number: 6 6! = 720
以下全ソース.あんましキレイなソースにできませんでした.
package Language::Whitespace; use warnings; use strict; use Carp; our $VERSION = '0.0.1'; my %OP = ( " " => { # stack manipulation " " => { op => 'PUSH', arg_call => 'parse_int', }, "\n " => { op => 'DUP', arg_call => 'no_arg' }, "\t " => { op => 'COPY', arg_call => 'parse_int' }, "\n\t" => { op => 'SWAP', arg_call => 'no_arg' }, "\n\n" => { op => 'DISC', arg_call => 'no_arg' }, "\t\n" => { op => 'SLIDE', arg_call => 'parse_int' }, }, "\t " => { # arithmetic " " => { op => 'ADD', arg_call => 'no_arg' }, " \t" => { op => 'SUB', arg_call => 'no_arg' }, " \n" => { op => 'MUL', arg_call => 'no_arg' }, "\t " => { op => 'DIV', arg_call => 'no_arg' }, "\t\t" => { op => 'MOD', arg_call => 'no_arg' }, }, "\t\t" => { # heap " " => { op => 'STORE', arg_call => 'no_arg' }, "\t" => { op => 'LOAD', arg_call => 'no_arg' }, }, "\n" => { # flow control " " => { op => 'MARK', arg_call => 'parse_label' }, " \t" => { op => 'CALL', arg_call => 'parse_label' }, " \n" => { op => 'JMP', arg_call => 'parse_label' }, "\t " => { op => 'JPZ', arg_call => 'parse_label' }, "\t\t" => { op => 'JPM', arg_call => 'parse_label' }, "\t\n" => { op => 'RET', arg_call => 'no_arg' }, "\n\n" => { op => 'EXIT', arg_call => 'no_arg' }, }, "\t\n" => { # I/O " " => { op => 'PUTC', arg_call => 'no_arg' }, " \t" => { op => 'PUTN', arg_call => 'no_arg' }, "\t " => { op => 'GETC', arg_call => 'no_arg' }, "\t\t" => { op => 'GETN', arg_call => 'no_arg' }, }, ); sub new { my $class = ref shift || __PACKAGE__; my $ws = bless { pc => 0, # program counter src => [], stack => [], # stack heap => {}, # heap space output => [], # output buffer input => [], # input buffer mark => {}, # label-address mapping callstack => [], # return address stack directIO => 0, next_label => 'LABEL00', # label }, $class; $ws->_implementation(); # implements wspace code $ws->lex( shift ) if @_; # strip comments in source code $ws; } sub _implementation { my $ws = shift; $ws->{impl} = { # stack operations PUSH => sub { $ws->push_stack( shift ) }, DUP => sub { $ws->push_stack( $ws->at_stack( -1 ) ) }, SWAP => sub { @{ $ws->{stack} }[-2, -1] = @{ $ws->{stack} }[-1, -2] }, COPY => sub { $ws->push_stack( $ws->at_stack( -1 * (shift() + 1) ) ); }, DISC => sub { $ws->pop_stack() }, SLIDE => sub { my $num = shift; $ws->splice_stack( -$num-1, $num ); }, # arithmetic operations ADD => sub { my $a = $ws->pop_stack(); my $b = $ws->pop_stack(); $ws->push_stack( $a + $b ) }, SUB => sub { my $a = $ws->pop_stack(); my $b = $ws->pop_stack(); $ws->push_stack( $b - $a ) }, MUL => sub { my $a = $ws->pop_stack(); my $b = $ws->pop_stack(); $ws->push_stack( $b * $a ) }, DIV => sub { my $a = $ws->pop_stack(); my $b = $ws->pop_stack(); $ws->push_stack( int( $b / $a ) ) }, MOD => sub { my $a = $ws->pop_stack(); my $b = $ws->pop_stack(); $ws->push_stack( $b % $a ) }, # heap operations STORE => sub { my $val = $ws->pop_stack(); my $addr = $ws->pop_stack(); $ws->heap( $addr, $val ); }, LOAD => sub { my $addr = $ws->pop_stack(); $ws->push_stack( $ws->heap( $addr ) ); }, # flow control CALL => sub { $ws->push_callstack( $ws->{pc} ); # push return address $ws->{pc} = $ws->{mark}->{ shift() }; }, RET => sub { $ws->{pc} = $ws->pop_callstack(); }, JMP => sub { $ws->{pc} = $ws->{mark}->{ shift() } }, JPZ => sub { if ($ws->pop_stack() == 0) { $ws->{pc} = $ws->{mark}->{ shift() } } }, JMI => sub { $ws->{pc} = $ws->{mark}->{ shift() } if $ws->pop_stack() < 0; }, EXIT => sub { $ws->{pc} = -1; }, # I/O operations PUTC => sub { $ws->output_buffer( chr $ws->pop_stack() ); }, PUTN => sub { $ws->output_buffer( $ws->pop_stack() ); }, GETC => sub { my $adrs = $ws->pop_stack(); $ws->heap( $adrs, ord $ws->getchar() );}, GETN => sub { my $adrs = $ws->pop_stack(); $ws->heap( $adrs, $ws->getnum() ) }, }; } sub directIO { shift->{directIO} = 1; } sub new_from_file { my $ws = shift->new; my $wsfile = shift or die __PACKAGE__, "->new_from_file(filename)"; open my $fh, '<', $wsfile or die "$wsfile: $!"; my $src = do { local $/; <$fh> }; close $fh; $ws->lex( $src ); $ws; } sub lex { my ( $ws, $code ) = @_; $code =~ tr/ \t\n//cd; $ws->{src} = [ split //, $code ]; $ws; } sub compile { my $ws = shift; while ( @{ $ws->{src} } ) { for my $token ( keys %OP ) { my $len = length $token; my $s = $ws->read_src( $len ); if ( defined $s and $s eq $token ) { $ws->consume( $len ); $ws->parse_cmd( $OP{$token} ); last; } } } $ws; } sub consume { my ($ws, $num) = @_; splice @{ $ws->{src} }, 0, $num; $ws; } sub read_src { my ($ws, $len) = @_; return if $len > @{ $ws->{src} }; join '', @{ $ws->{src} }[ 0 .. $len - 1]; } sub parse_cmd { my ( $ws, $href ) = @_; for my $token ( keys %$href ) { my $len = length $token; my $s = $ws->read_src( $len ); if ( defined $s and $s eq $token ) { $ws->consume( $len ); $ws->compile_line( $href->{$token} ); last; } } } sub mnemonic { my ($ws) = @_; for my $href (@{ $ws->{mnemonic} }) { my ($addr, $code, $arg) = ($href->{addr}, $href->{code}, (defined $href->{arg})? $href->{arg} : ''); if ( $href->{code} eq 'MARK' ) { printf " %6s %s\n", $code, $arg; } else { printf "%04d %6s %s\n", $addr, $code, $arg; } } $ws; } sub push_stack { my ($ws, @elems) = @_; push @{ $ws->{stack} }, @elems; } sub pop_stack { my ($ws) = @_; pop @{ $ws->{stack} }; } sub splice_stack { my ($ws, $idx, $num) = @_; $num ||= 1; $idx ||= 0; splice @{ $ws->{stack} }, $idx, $num; } sub at_stack { my ($ws, $idx) = @_; $ws->{stack}->[$idx]; } sub heap { my ($ws, $addr, $val ) = @_; if ( defined $val ) { $ws->{heap}->{$addr} = $val; } $ws->{heap}->{$addr}; } sub load { my ($ws, $adrs) = @_; $ws->{heap}->{$adrs}; } sub push_mnemonic { my ( $ws, $addr, $code, $arg ) = @_; push @{ $ws->{mnemonic} }, { addr => $addr, code => $code, arg => $arg }; } sub compile_line { my ( $ws, $href ) = @_; my $arg = do { no strict 'refs'; my $call = $href->{arg_call}; $ws->$call(); }; $ws->push_mnemonic( $ws->{pc}, $href->{op}, $arg ); # for debug # resolves label if ( $href->{op} eq 'MARK' ) { $ws->{mark}->{ $arg } = $ws->{pc}; return; } $ws->{code}->[ $ws->{pc}++ ] = { OP => $href->{op}, ARG => $arg, CODE => $ws->{impl}->{ $href->{op} } }; } sub push_callstack { my ($ws, $addr) = @_; push @{ $ws->{callstack} }, $addr; } sub pop_callstack { my ($ws, $addr) = @_; pop @{ $ws->{callstack} }; } sub getnum { my $ws = shift; if ( not $ws->{directIO} ) { my $content = join '', @{ $ws->{input} }; my $idx = index $content, "\n"; my $num = join '', splice @{ $ws->{input} }, 0, $idx; shift @{ $ws->{input} }; # discard a newline character return $num; } while( (my $c = getc()) ne "\n" ) { return $c; } } sub getchar { my $ws = shift; if ( not $ws->{directIO} ) { return shift @{ $ws->{input} } or die "empty input buffer"; } while( (my $c = getc()) ne "\n" ) { return $c; } } sub input { my $ws = shift; push @{ $ws->{input} }, @_; $ws; } sub output_buffer { my ($ws, $c) = @_; if ( $ws->{directIO} ) { print $c; } else { push @{ $ws->{output} }, $c; } } sub output { my ($ws, $delim) = @_; $delim ||= ''; join $delim, @{ $ws->{output} } } sub run { my $ws = shift; $ws->{pc} = 0; # initializes program counter while( $ws->{pc} >= 0 && $ws->{code}->[ $ws->{pc} ] ) { my $href = $ws->{code}->[ $ws->{pc}++ ]; $href->{CODE}->( $href->{ARG} ); } $ws; } sub parse_label { my ( $ws ) = @_; my $src = $ws->{src}; my $binary = ''; while ( ( my $c = shift @$src ) ne "\n" ) { $binary .= ($c eq " ")? '0' : ($c eq "\t")? '1' : die "not a number"; } unless ( $ws->{label}->{$binary} ) { $ws->{label}->{$binary} = $ws->{next_label}++; } $ws->{label}->{$binary}; } sub parse_int { my ( $ws ) = @_; my $src = $ws->{src}; my $sign = shift @$src; $sign = ($sign eq ' ')? 1 : ($sign eq "\t")? -1 : die "not a number"; my $binary = ''; while ( ( my $c = shift @$src ) ne "\n" ) { $binary .= ($c eq " ")? '0' : ($c eq "\t")? '1' : die "not a number"; } my $dec = eval "0b$binary"; return $sign * $dec; } sub no_arg { undef } 1;