Parse EBNF

This parses the EBNF rule set using a Raku grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods. It is implemented and exercised using the flavor of EBNF and test cases specified on the test page.

# A Raku grammar to parse EBNF
grammar EBNF {
    rule         TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
    rule  production { <name> '=' <expression> <[.;]> }
    rule  expression { <term> +% "|" }
    rule        term { <factor>+ }
    rule      factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
    rule       group { '(' <expression> ')' }
    rule      repeat { '{' <expression> '}' }
    rule    optional { '[' <expression> ']' }
    token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #"
    token    literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #"
    token      title { <literal> }
    token    comment { <literal> }
    token       name { <identifier>  <?before \h* '='> }
}

class EBNF::Actions {
    method        TOP($/) { 
                            say "Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT
                            make 'grammar ' ~
                              ($<title> ?? $<title>.subst(/\W/, '', :g) !! 'unnamed') ~
                              " \{\n rule TOP \{^[<" ~ $/<production>[0]<name> ~
                              ">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
                          }
   method production($/) { 
                            make 'token ' ~ $<name> ~ ' {' ~
                              $<expression>.ast ~ "}\n"
                          }
   method expression($/) { make join '|', $<term>>>.ast }
   method       term($/) { make join '\h*', $<factor>>>.ast }
   method     factor($/) { 
                            make $<literal>  ?? $<literal> !!
                              $<group>    ?? '[' ~ $<group>.ast    ~ ']'  !!
                              $<repeat>   ?? '[' ~ $<repeat>.ast   ~ '\\s*]*' !!
                              $<optional> ?? '[' ~ $<optional>.ast ~ ']?' !!
                              '<' ~ $<identifier> ~ '>'
                          }
   method     repeat($/) { make $<expression>.ast }
   method   optional($/) { make $<expression>.ast }
   method      group($/) { make $<expression>.ast }
}

# An array of test cases
my @tests = (
    {
        ebnf => 
            q<"a" {
                a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            } "z">
        ,
        teststrings => [
            'a1a3a4a4a5a6',
            'a1 a2a6',
            'a1 a3 a4 a6',
            'a1 a4 a5 a6',
            'a1 a2 a4 a4 a5 a6',
            'a1 a2 a4 a5 a5 a6',
            'a1 a2 a4 a5 a6 a7',
            'your ad here' 
        ]
    },
    {
        ebnf =>
            q<{
                expr = term { plus term } .
                term = factor { times factor } .
                factor = number | '(' expr ')' .
 
                plus = "+" | "-" .
                times = "*" | "/" .
 
                number = digit { digit } .
                digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            }>
        ,
        teststrings => [
            '2',
            '2*3 + 4/23 - 7',
            '(3 + 4) * 6-2+(4*(4))',
            '-2',
            '3 +',
            '(4 + 3'
        ]
    },
    {
        ebnf => q<a = "1";>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ a = "1" ;>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ hello world = "1"; }>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ foo = bar . }>,
        teststrings => ['foobar']
    }
);

# Test the parser.
my $i = 1;
for @tests -> $test {
    unless EBNF.parse($test<ebnf>) {
         say "Parsing EBNF grammar:\n";
         say "{$test<ebnf>.subst(/^^\h*/,'',:g)}\n";
         say "Invalid syntax. Can not be parsed.\n";
         say '*' x 79;
         next;
    }
    my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions));
    my $grammar = $p.ast;
    $grammar ~~ m/^'grammar '(\w+)/;
    my $title = $0;
    my $fn = 'EBNFtest'~$i++;
    my $fh = open($fn, :w) orelse .die;
    $fh.say( "\{\n", $grammar );
    $fh.say( qq|say "Parsing EBNF grammar '$title':\\n";| );
    $fh.say( qq|say q<{$test<ebnf>.subst(/^^\h*/,'',:g)}>;| );
    $fh.say(  q|say "\nValid syntax.\n\nTesting:\n";| );
    $fh.say(  q|CATCH { default { say " - $_" } };| );
    my $len = max $test<teststrings>.flat>>.chars;
    for $test<teststrings>.flat -> $s {
        $fh.say( qq|printf "%{$len}s", '{$s}';| ~
                 qq|printf " - %s\\n", {$title}.parse('{$s}')| ~
                 qq| ?? 'valid.' !! 'NOT valid.';|
               );
    }
    $fh.say( qq| "\\n"} |);
    $fh.close;
    say qqx/raku $fn/;
    say '*' x 79, "\n";
    unlink $fn;
}

Output:

Last updated

Was this helpful?