wesnoth/utils/wml.pm
2018-03-11 14:59:37 +11:00

502 lines
11 KiB
Perl

package wml;
use strict;
use Carp qw(confess);
$wml::open_element = 0, $wml::close_element = 1, $wml::schema_item = 2,
$wml::literal_word = 3, $wml::first_word = 4, $wml::end_words = 256;
$wml::max_schema_size = $wml::end_words - $wml::first_word;
$wml::max_schema_item_length = 20;
sub read_literal_word
{
my $word = '';
my ($chars) = @_;
while(@$chars) {
my $char = shift @$chars;
if((ord $char) == 0) {
return $word;
}
$word .= $char;
}
confess "Unexpected end of input";
}
sub read_word
{
my ($schema,$chars) = @_;
@$chars or confess "Unexpected end of input";
my $char = shift @$chars;
my $code = ord $char;
if($code == $wml::literal_word) {
return &read_literal_word($chars);
} elsif($code == $wml::schema_item) {
my $word = &read_literal_word($chars);
push @$schema, $word;
return &read_word($schema,$chars);
} elsif($code < $wml::first_word) {
confess "Unexpected character '$code' when word was expected";
} else {
my $index = $code - $wml::first_word;
if($index >= @$schema) {
confess "Word '$index' ($code) not found in schema";
}
return $schema->[$index];
}
}
sub add_child
{
my ($doc,$child_name) = @_;
my $new_element = {'name' => $child_name, 'children' => [], 'attr' => {}};
push(@{$doc->{'children'}}, $new_element);
return $new_element;
}
sub read_binary
{
my ($schema,$input) = @_;
my $doc = {'name' => '', 'children' => [], 'attr' => {}};
my @stack = ($doc);
my $cur = $doc;
my $len = length $input;
my @chars;
for (my $i = 0; $i < $len; $i++) {
push @chars, substr($input, $i, 1);
}
#to see all the characters input for debugging
#print STDERR "INPUT: " . (join ', ', (map {ord} @chars)) . "\n";
while(@chars) {
my $char = shift @chars;
my $code = ord $char;
my $remaining = @chars;
if($code == $wml::open_element) {
my $word = &read_word($schema,\@chars);
$cur = &add_child($cur,$word);
push @stack, $cur;
} elsif($code == $wml::close_element) {
pop @stack or confess "Illegal close element found";
$cur = $stack[$#stack] or confess "Illegal close element found";
} elsif($code == $wml::schema_item) {
my $word = &read_literal_word(\@chars);
push @$schema, $word;
} else {
unshift @chars, $char;
my $name = &read_word($schema,\@chars);
my $value = &read_literal_word(\@chars);
$cur->{'attr'}->{$name} = $value;
}
}
if($#stack != 0) {
my $stack = $#stack;
confess "Unexpected end of input: $stack items still unclosed";
}
return $doc;
}
sub read_text
{
my ($text) = @_;
my @chars = split //, $text;
my $doc = {'name' => '', 'children' => [], 'attr' => {}};
my @stack = ($doc);
my $cur = $doc;
my $token = '';
my $state = 'SEEK';
my $name = '';
my $macro=0;
foreach my $char (@chars) {
if($state eq 'SEEK') {
next if $char =~ /\s/;
if($char eq '[') {
$state = 'ELEMENT';
$token = '';
} elsif($char eq '{') {
$state = 'MACRO';
$token = '';
$macro++;
} else {
$state = 'NAME';
$token .= $char;
}
} elsif($state eq 'MACRO') {
if($char eq '{'){
$macro++;
}elsif($char eq '}'){
$macro--;
}
if(0 == $macro){
$state = 'SEEK';
$token = '';
}
} elsif($state eq 'NAME') {
if($char eq '=') {
$name = $token;
$state = 'VALUE';
$token = '';
} else {
$token .= $char unless $char =~ /\s/;
}
} elsif($state eq 'VALUE') {
if($char eq "\n" or $char eq "\r") {
my @names = ($name);
my @values = ($token);
if($name =~ /,/) {
@names = split /,/, $name;
@values = split /,/, $token;
}
while(@names and @values) {
my $name = shift @names;
my $value = shift @values;
$cur->{'attr'}->{$name} = $value;
}
$state = 'SEEK';
$token = '';
} elsif($char eq '"') {
$state = 'VALUE_QUOTE';
} else {
$token .= $char;
}
} elsif($state eq 'VALUE_QUOTE') {
if($char eq "\\") {
$state = 'VALUE_BACKSLASH';
} elsif($char eq '"') {
$state = 'VALUE';
} else {
$token .= $char;
}
} elsif($state eq 'VALUE_BACKSLASH') {
$token .= $char;
$state = 'VALUE_QUOTE';
} elsif($state eq 'ELEMENT') {
if($char eq '/' and $token eq '') {
$state = 'CLOSE_ELEMENT';
} elsif($char eq ']') {
$cur = &add_child($cur,$token);
push @stack, $cur;
$state = 'SEEK';
$token = '';
} else {
$token .= $char;
}
} elsif($state eq 'CLOSE_ELEMENT') {
if($char eq ']') {
my $expected = $cur->{'name'};
$expected eq $token or confess "close element '$token' doesn't match current open element '$expected'";
pop @stack or confess "illegal close element '$token'";
$cur = $stack[$#stack];
$token = '';
$state = 'SEEK';
} else {
$token .= $char;
}
}
}
if($state eq 'VALUE') {
my @names = ($name);
my @values = ($token);
if($name =~ /,/) {
@names = split /,/, $name;
@values = split /,/, $token;
}
while(@names and @values) {
my $name = shift @names;
my $value = shift @values;
$cur->{'attr'}->{$name} = $value;
}
} elsif($state ne 'SEEK' or $#stack != 0) {
confess "unexpected end of WML document: state is '$state', stack size is $#stack";
}
return $doc;
}
sub write_word_literal($)
{
my ($word) = @_;
return $word . "\0";
}
sub write_word
{
my $res = '';
my ($schema,$schema_lookup,$word) = @_;
if(exists $schema_lookup->{$word}) {
return chr($wml::first_word + $schema_lookup->{$word});
}
my $pos = scalar(@$schema);
if($pos < $wml::max_schema_size and length($word) < $wml::max_schema_item_length) {
push @$schema, $word;
$schema_lookup->{$word} = $pos;
return chr($wml::schema_item) . &write_word_literal($word) . chr($wml::first_word + $pos);
} else {
return chr($wml::literal_word) . &write_word_literal($word);
}
}
sub write_binary_internal
{
my $res = '';
my ($schema,$schema_lookup,$doc) = @_;
foreach my $name (keys %{$doc->{'attr'}}) {
my $value = $doc->{'attr'}->{$name};
$res .= &write_word($schema,$schema_lookup,$name) . &write_word_literal($value);
}
foreach my $child (@{$doc->{'children'}}) {
my $name = $child->{'name'};
$res .= chr($wml::open_element) .
&write_word($schema,$schema_lookup,$name) .
&write_binary_internal($schema,$schema_lookup,$child) .
chr($wml::close_element);
}
return $res;
}
sub write_binary
{
my ($schema,$doc) = @_;
my %schema_lookup = ();
my $i = 0;
foreach my $word (@$schema) {
$schema_lookup{$word} = $i;
++$i;
}
return &write_binary_internal($schema,\%schema_lookup,$doc);
}
sub write_text
{
my $doc = shift @_;
my $indent = shift @_ or 0;
my $res = '';
foreach my $name (keys %{$doc->{'attr'}}) {
my $value = $doc->{'attr'}->{$name};
$res .= "\t" x $indent;
$res .= "$name=\"$value\"\n";
}
foreach my $child (@{$doc->{'children'}}) {
my $name = $child->{'name'};
$res .= "\t" x $indent;
$res .= "[$name]\n";
$res .= &write_text($child,$indent+1);
$res .= "\t" x $indent;
$res .= "[/$name]\n";
}
return $res;
}
sub output_document
{
my $doc = shift @_;
my $indent = 0;
$indent = shift @_ if @_;
foreach my $name (keys %{$doc->{'attr'}}) {
my $value = $doc->{'attr'}->{$name};
print ' ' x $indent;
print "$name=\"$value\"\n";
}
foreach my $child (@{$doc->{'children'}}) {
my $name = $child->{'name'};
print ' ' x $indent;
print "[$name]\n";
&output_document($child,$indent+1);
print ' ' x $indent;
print "[/$name]\n";
}
}
sub has_child
{
my ($doc,$name) = @_;
foreach my $child (@{$doc->{'children'}}) {
if($child->{'name'} eq $name) {
return $child;
}
}
return 0;
}
sub get_children
{
my ($doc,$name) = @_;
my @res = ();
foreach my $child (@{$doc->{'children'}}) {
if($child->{'name'} eq $name) {
push @res, $child;
}
}
return @res;
}
sub deep_copy
{
my ($doc) = @_;
my $res = {'name' => $doc->{'name'}, 'children' => [], 'attr' => {}};
my $attrsrc = $doc->{'attr'};
my $attrdst = $res->{'attr'};
while(my ($key,$val) = each %$attrsrc) {
$attrdst->{$key} = $val;
}
my $childsrc = $doc->{'children'};
my $childdst = $res->{'children'};
foreach my $child (@$childsrc) {
push @$childdst, &deep_copy($child);
}
return $res;
}
sub docs_equal
{
my ($doc1, $doc2) = @_;
return 0 if $doc1->{'name'} ne $doc2->{'name'};
my $attr1 = $doc1->{'attr'};
my $attr2 = $doc2->{'attr'};
while(my ($key, $value) = each %$attr1) {
return 0 if $attr2->{$key} ne $value;
}
while(my ($key, $value) = each %$attr2) {
return 0 if $attr1->{$key} ne $value;
}
my $children1 = $doc1->{'children'};
my $children2 = $doc2->{'children'};
if(scalar(@$children1) != scalar(@$children2)) {
return 0;
}
for(my $n = 0; $n < @$children1; ++$n) {
return 0 if not &docs_equal($children1->[$n], $children2->[$n]);
}
return 1;
}
sub get_diff
{
my ($doc1, $doc2, $diff_name) = @_;
my $diffs = {'name' => $diff_name, 'children' => [], 'attr' => {}};
my $attr1 = $doc1->{'attr'};
my $attr2 = $doc2->{'attr'};
my $d = 0;
my @keys1 = keys %$attr1;
my @keys2 = keys %$attr2;
foreach my $key (@keys1) {
if($attr1->{$key} ne $attr2->{$key}) {
my $child = {'name' => 'insert', 'children' => [], 'attr' => {$key => $attr1->{$key}}};
push @{$diffs->{'children'}}, $child;
++$d;
}
}
foreach my $key (@keys2) {
if(undef $attr1->{$key}) {
my $child = {'name' => 'delete', 'children' => [], 'attr' => {$key => 'x'}};
push @{$diffs->{'children'}}, $child;
++$d;
}
}
my $children1 = $doc1->{'children'};
my $children2 = $doc2->{'children'};
my %childmap1 = {};
my %childmap2 = {};
my %entities = {};
foreach my $child (@$children1) {
my $a = $childmap1{$child->{'name'}};
if(not $a) {
$a = [$child];
$childmap1{$child->{'name'}} = $a;
$entities{$child->{'name'}} = 1;
} else {
push @$a, $child;
}
}
foreach my $child (@$children2) {
my $a = $childmap2{$child->{'name'}};
if(not $a) {
$a = [$child];
$childmap2{$child->{'name'}} = $a;
$entities{$child->{'name'}} = 1;
} else {
push @$a, $child;
}
}
foreach my $key (keys %entities) {
my $ndeletes = 0;
my $a1 = ($childmap1{$key} or []);
my $a2 = ($childmap2{$key} or []);
for(my $n = 0; $n < @$a1 or $n < @$a2; ++$n) {
if($n < @$a1 and $n < @$a2) {
if(not &docs_equal($a1->[$n], $a2->[$n])) {
my $diff = &get_diff($a1->[$n], $a2->[$n], $key);
my $change = {'name' => 'change_child', 'children' => $diff->{'children'}, 'attr' => {'index' => $n}};
push @{$diffs->{'children'}}, $change;
++$d;
}
} elsif($n < @$a1) {
my $insert = {'name' => 'insert_child', 'children' => [$a1->[$n]], 'attr' => {'index' => $n}};
push @{$diffs->{'children'}}, $insert;
++$d;
} else {
my $attr = $a2->[$n];
my $delete = {'name' => 'delete_child', 'children' => [{'name' => $attr->{'name'}, 'children' => [], 'attr' => {}}], 'attr' => {'index' => $n - $ndeletes}};
++$ndeletes;
push @{$diffs->{'children'}}, $delete;
++$d;
}
}
}
return undef unless $d;
return {'name' => '', 'children' => [$diffs], 'attr' => {}};
}
sub single_child_doc
{
my ($name) = @_;
my $res = {'name' => '', 'children' => [{'name' => $name, 'children' => [], 'attr' => {}}], 'attr' => {}};
return $res;
}
'Wesnoth Markup Language parser';