Parse EBNF

This parses the EBNF rule set using a Raku grammar, then if it parses as valid EBNF, constructs a grammar and parses the test strings with that. EBNF rule sets that are naively syntactically correct but missing rules will parse as valid but will give a runtime failure warning about missing methods. It is implemented and exercised using the flavor of EBNF and test cases specified on the test page.

# A Raku grammar to parse EBNF
grammar EBNF {
    rule         TOP { ^ <title>? '{' [ <production> ]+ '}' <comment>? $ }
    rule  production { <name> '=' <expression> <[.;]> }
    rule  expression { <term> +% "|" }
    rule        term { <factor>+ }
    rule      factor { <group> | <repeat> | <optional> | <identifier> | <literal> }
    rule       group { '(' <expression> ')' }
    rule      repeat { '{' <expression> '}' }
    rule    optional { '[' <expression> ']' }
    token identifier { <-[\|\(\)\{\}\[\]\.\;\"\'\s]>+ } #"
    token    literal { ["'" <-[']>+ "'" | '"' <-["]>+ '"'] } #"
    token      title { <literal> }
    token    comment { <literal> }
    token       name { <identifier>  <?before \h* '='> }
}

class EBNF::Actions {
    method        TOP($/) { 
                            say "Syntax Tree:\n", $/; # Dump the syntax tree to STDOUT
                            make 'grammar ' ~
                              ($<title> ?? $<title>.subst(/\W/, '', :g) !! 'unnamed') ~
                              " \{\n rule TOP \{^[<" ~ $/<production>[0]<name> ~
                              ">]+\$\}\n " ~ $<production>>>.ast ~ "\}"
                          }
   method production($/) { 
                            make 'token ' ~ $<name> ~ ' {' ~
                              $<expression>.ast ~ "}\n"
                          }
   method expression($/) { make join '|', $<term>>>.ast }
   method       term($/) { make join '\h*', $<factor>>>.ast }
   method     factor($/) { 
                            make $<literal>  ?? $<literal> !!
                              $<group>    ?? '[' ~ $<group>.ast    ~ ']'  !!
                              $<repeat>   ?? '[' ~ $<repeat>.ast   ~ '\\s*]*' !!
                              $<optional> ?? '[' ~ $<optional>.ast ~ ']?' !!
                              '<' ~ $<identifier> ~ '>'
                          }
   method     repeat($/) { make $<expression>.ast }
   method   optional($/) { make $<expression>.ast }
   method      group($/) { make $<expression>.ast }
}

# An array of test cases
my @tests = (
    {
        ebnf => 
            q<"a" {
                a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            } "z">
        ,
        teststrings => [
            'a1a3a4a4a5a6',
            'a1 a2a6',
            'a1 a3 a4 a6',
            'a1 a4 a5 a6',
            'a1 a2 a4 a4 a5 a6',
            'a1 a2 a4 a5 a5 a6',
            'a1 a2 a4 a5 a6 a7',
            'your ad here' 
        ]
    },
    {
        ebnf =>
            q<{
                expr = term { plus term } .
                term = factor { times factor } .
                factor = number | '(' expr ')' .
 
                plus = "+" | "-" .
                times = "*" | "/" .
 
                number = digit { digit } .
                digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            }>
        ,
        teststrings => [
            '2',
            '2*3 + 4/23 - 7',
            '(3 + 4) * 6-2+(4*(4))',
            '-2',
            '3 +',
            '(4 + 3'
        ]
    },
    {
        ebnf => q<a = "1";>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ a = "1" ;>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ hello world = "1"; }>,
        teststrings => ['foobar']
    },
    {
        ebnf => q<{ foo = bar . }>,
        teststrings => ['foobar']
    }
);

# Test the parser.
my $i = 1;
for @tests -> $test {
    unless EBNF.parse($test<ebnf>) {
         say "Parsing EBNF grammar:\n";
         say "{$test<ebnf>.subst(/^^\h*/,'',:g)}\n";
         say "Invalid syntax. Can not be parsed.\n";
         say '*' x 79;
         next;
    }
    my $p = EBNF.parse($test<ebnf>, :actions(EBNF::Actions));
    my $grammar = $p.ast;
    $grammar ~~ m/^'grammar '(\w+)/;
    my $title = $0;
    my $fn = 'EBNFtest'~$i++;
    my $fh = open($fn, :w) orelse .die;
    $fh.say( "\{\n", $grammar );
    $fh.say( qq|say "Parsing EBNF grammar '$title':\\n";| );
    $fh.say( qq|say q<{$test<ebnf>.subst(/^^\h*/,'',:g)}>;| );
    $fh.say(  q|say "\nValid syntax.\n\nTesting:\n";| );
    $fh.say(  q|CATCH { default { say " - $_" } };| );
    my $len = max $test<teststrings>.flat>>.chars;
    for $test<teststrings>.flat -> $s {
        $fh.say( qq|printf "%{$len}s", '{$s}';| ~
                 qq|printf " - %s\\n", {$title}.parse('{$s}')| ~
                 qq| ?? 'valid.' !! 'NOT valid.';|
               );
    }
    $fh.say( qq| "\\n"} |);
    $fh.close;
    say qqx/raku $fn/;
    say '*' x 79, "\n";
    unlink $fn;
}

Output:

Syntax Tree:
「"a" {
                a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
            } "z"」
 title => 「"a"」
  literal => 「"a"」
 production => 「a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;

  name => 「a」
   identifier => 「a」
  expression => 「"a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" 」
   term => 「"a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" 」
    factor => 「"a1" 」
     literal => 「"a1"」
    factor => 「( "a2" | "a3" ) 」
     group => 「( "a2" | "a3" ) 」
      expression => 「"a2" | "a3" 」
       term => 「"a2" 」
        factor => 「"a2" 」
         literal => 「"a2"」
       term => 「 "a3" 」
        factor => 「"a3" 」
         literal => 「"a3"」
    factor => 「{ "a4" } 」
     repeat => 「{ "a4" } 」
      expression => 「"a4" 」
       term => 「"a4" 」
        factor => 「"a4" 」
         literal => 「"a4"」
    factor => 「[ "a5" ] 」
     optional => 「[ "a5" ] 」
      expression => 「"a5" 」
       term => 「"a5" 」
        factor => 「"a5" 」
         literal => 「"a5"」
    factor => 「"a6" 」
     literal => 「"a6"」
 comment => 「"z"」
  literal => 「"z"」

Parsing EBNF grammar 'a':

"a" {
a = "a1" ( "a2" | "a3" ) { "a4" } [ "a5" ] "a6" ;
} "z"

Valid syntax.

Testing:

     a1a3a4a4a5a6 - valid.
          a1 a2a6 - valid.
      a1 a3 a4 a6 - valid.
      a1 a4 a5 a6 - NOT valid.
a1 a2 a4 a4 a5 a6 - valid.
a1 a2 a4 a5 a5 a6 - NOT valid.
a1 a2 a4 a5 a6 a7 - NOT valid.
     your ad here - NOT valid.

*******************************************************************************

Syntax Tree:
「{
                expr = term { plus term } .
                term = factor { times factor } .
                factor = number | '(' expr ')' .

                plus = "+" | "-" .
                times = "*" | "/" .

                number = digit { digit } .
                digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
            }」
 production => 「expr = term { plus term } .

  name => 「expr」
   identifier => 「expr」
  expression => 「term { plus term } 」
   term => 「term { plus term } 」
    factor => 「term 」
     identifier => 「term」
    factor => 「{ plus term } 」
     repeat => 「{ plus term } 」
      expression => 「plus term 」
       term => 「plus term 」
        factor => 「plus 」
         identifier => 「plus」
        factor => 「term 」
         identifier => 「term」
 production => 「term = factor { times factor } .

  name => 「term」
   identifier => 「term」
  expression => 「factor { times factor } 」
   term => 「factor { times factor } 」
    factor => 「factor 」
     identifier => 「factor」
    factor => 「{ times factor } 」
     repeat => 「{ times factor } 」
      expression => 「times factor 」
       term => 「times factor 」
        factor => 「times 」
         identifier => 「times」
        factor => 「factor 」
         identifier => 「factor」
 production => 「factor = number | '(' expr ')' .


  name => 「factor」
   identifier => 「factor」
  expression => 「number | '(' expr ')' 」
   term => 「number 」
    factor => 「number 」
     identifier => 「number」
   term => 「 '(' expr ')' 」
    factor => 「'(' 」
     literal => 「'('」
    factor => 「expr 」
     identifier => 「expr」
    factor => 「')' 」
     literal => 「')'」
 production => 「plus = "+" | "-" .

  name => 「plus」
   identifier => 「plus」
  expression => 「"+" | "-" 」
   term => 「"+" 」
    factor => 「"+" 」
     literal => 「"+"」
   term => 「 "-" 」
    factor => 「"-" 」
     literal => 「"-"」
 production => 「times = "*" | "/" .


  name => 「times」
   identifier => 「times」
  expression => 「"*" | "/" 」
   term => 「"*" 」
    factor => 「"*" 」
     literal => 「"*"」
   term => 「 "/" 」
    factor => 「"/" 」
     literal => 「"/"」
 production => 「number = digit { digit } .

  name => 「number」
   identifier => 「number」
  expression => 「digit { digit } 」
   term => 「digit { digit } 」
    factor => 「digit 」
     identifier => 「digit」
    factor => 「{ digit } 」
     repeat => 「{ digit } 」
      expression => 「digit 」
       term => 「digit 」
        factor => 「digit 」
         identifier => 「digit」
 production => 「digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .

  name => 「digit」
   identifier => 「digit」
  expression => 「"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 」
   term => 「"0" 」
    factor => 「"0" 」
     literal => 「"0"」
   term => 「 "1" 」
    factor => 「"1" 」
     literal => 「"1"」
   term => 「 "2" 」
    factor => 「"2" 」
     literal => 「"2"」
   term => 「 "3" 」
    factor => 「"3" 」
     literal => 「"3"」
   term => 「 "4" 」
    factor => 「"4" 」
     literal => 「"4"」
   term => 「 "5" 」
    factor => 「"5" 」
     literal => 「"5"」
   term => 「 "6" 」
    factor => 「"6" 」
     literal => 「"6"」
   term => 「 "7" 」
    factor => 「"7" 」
     literal => 「"7"」
   term => 「 "8" 」
    factor => 「"8" 」
     literal => 「"8"」
   term => 「 "9" 」
    factor => 「"9" 」
     literal => 「"9"」

Parsing EBNF grammar 'unnamed':

{
expr = term { plus term } .
term = factor { times factor } .
factor = number | '(' expr ')' .

plus = "+" | "-" .
times = "*" | "/" .

number = digit { digit } .
digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" .
}

Valid syntax.

Testing:

                    2 - valid.
       2*3 + 4/23 - 7 - valid.
(3 + 4) * 6-2+(4*(4)) - valid.
                   -2 - NOT valid.
                  3 + - NOT valid.
               (4 + 3 - NOT valid.

*******************************************************************************

Parsing EBNF grammar:

a = "1";

Invalid syntax. Can not be parsed.

*******************************************************************************
Parsing EBNF grammar:

{ a = "1" ;

Invalid syntax. Can not be parsed.

*******************************************************************************
Parsing EBNF grammar:

{ hello world = "1"; }

Invalid syntax. Can not be parsed.

*******************************************************************************
Syntax Tree:
「{ foo = bar . }」
 production => 「foo = bar . 」
  name => 「foo」
   identifier => 「foo」
  expression => 「bar 」
   term => 「bar 」
    factor => 「bar 」
     identifier => 「bar」

Parsing EBNF grammar 'unnamed':

{ foo = bar . }

Valid syntax.

Testing:

foobar - No such method 'bar' for invocant of type 'unnamed'

*******************************************************************************

Last updated