Revision 29473

Date:
2010/01/06 06:35:49
Author:
diakopter
Revision Log:
[sprixel] improved correctness and complexity/efficiency drastically. memoization of failed offsets when backtracking. implemented in plus. unclear whether it should be implemented in either. should go quadratic only on the same classes of patterns Perl 5 does, now...
Files:

Legend:

 
Added
 
Removed
 
Modified
  • src/perl6/sprixel/jsemit.js

     
    107 107 function Expr_val(v) { this.v = v }
    108 108 Expr_val.prototype.emit = function() { return this.v };
    109 109 function val(v) { return new Expr_val(v) }
    110 function dval(v) { return new Expr_val(dbg ? v : '') }
    110 111
    111 112 var Expr_func = make_binary_ctor();
    112 113 Expr_func.prototype.emit = function() {
     
    716 717 var retry = new gtr(), check = new gtr(); // label for retry spot
    717 718 c.r.push(
    718 719 casel(this.init),d,
    719 val("var z8=[]"), // create a container for the match offsets
    720 val("t.z=[]"), // create a container for the match offsets
    720 721 casel(retry)
    721 722 );
    722 723 this.l.emit(c);
     
    724 725 cond(val("(t.s==o)"),[[ // child is a zero-length assertion
    725 726 gotol(this.done)
    726 727 ]]),
    727 val("z8.push(o)"),
    728 val("t.z.push(o)"),
    728 729 gotol(retry), // try another one
    729 730
    730 731 casel(this.l.fail), // left hit its base case
    731 cond(val("(z8.length==0)"),[[
    732 cond(val("(t.z.length==0)"),[[
    732 733 a,
    733 734 gotol(this.fail)
    734 735 ]]),
    735 736 casel(check),
    736 cond(val("(z8.length==1)"),[[
    737 cond(val("(t.z.length==1)"),[[
    738 val("o=t.z.pop()"),
    737 739 gotol(this.done)
    738 740 ]]),
    739 val("t.z=z8"), // stash the saved offsets in self.
    740 741 gotol(this.notd),
    741 742
    742 743 casel(this.bt),
     
    760 761 c.r.push(
    761 762 casel(this.init),d,
    762 763 val("t.z8=[];t.ret={}"), // create containers for the child objects
    763 casel(retry)
    764 casel(retry),
    765 dval("print('retry '+o)")
    764 766 );
    765 767 this.l.emit(c);
    766 768 c.r.push(
    767 769 casel(this.l.done), // left succeeded
    770 dval("print('l.done '+o)"),
    768 771 //cond(val("(t.s==o)"),[[ // child is a zero-length assertion
    769 772 // gotol(this.done)
    770 773 //]]),
    774 cond(val("(t.i.ret[o])"),[[
    775 dval("print('4already tried '+o)"),
    776 val("t.i.z8.push(t);t.nd=false;t=t.i"),
    777 gotol(this.bt)
    778 ]]),
    771 779 val("t.i.z8.push(t);t.nd=false;t=t.i"),
    772 cond(val("(t.ret[o])"),[[
    773 gotol(this.l.fail)
    774 ]]),
    775 val("t.ret[o]=true"),
    776 780 gotol(retry), // try another one
    777 781
    778 782 casel(this.l.notd), // left succeeded
    783 dval("print('l.notd '+o)"),
    784 cond(val("(t.i.ret[o])"),[[
    785 dval("print('already tried '+o)"),
    786 val("t.i.z8.push(t);t.nd=true;t=t.i"),
    787 gotol(this.bt)
    788 ]]),
    779 789 val("t.i.z8.push(t);t.nd=true"),
    780 790 a,
    781 791 gotol(retry), // try another one
    782 792
    783 793 casel(this.l.fail), // left hit its base case
    794 dval("print('l.fail '+o)"),
    784 795 cond(val("(t.z8.length==0)"),[[
    785 796 a,
    786 797 gotol(this.fail)
    787 798 ]]),
    788 799 casel(check),
    789 800 cond(val("(t.z8.length==1&&t.z8[0].nd==false)"),[[
    801 dval("print('done '+o)"),
    790 802 gotol(this.done)
    791 803 ]]),
    804 //cond(val("(t.ret[o])"),[[
    805 // dval("print('2already tried '+o)"),
    806 // gotol(this.bt)
    807 //]]),
    792 808 val("t.ret[o]=true"),
    809 dval("print('notd '+o)"),
    793 810 gotol(this.notd),
    794 811
    795 812 casel(this.bt),
    813 dval("print('bt '+o)"),
    796 814 val("u=t.z8.pop()"),
    797 815 cond(val("(u.nd)"),[[
    798 816 val("o=(t=u).s"),
    817 cond(val("(t.i.ret[o])"),[[
    818 dval("print('3already tried '+o)"),
    819 val("t=t.i"),
    820 gotol(this.bt)
    821 ]]),
    822 dval("print('l.bt '+o)"),
    799 823 gotol(this.l.bt)
    800 824 ]]),
    801 825 cond(val("(t.z8.length==0)"),[[
     
    804 828 ]]),
    805 829 val("o=u.s"),
    806 830 cond(val("(t.z8.length==1&&t.z8[0].nd==false)"),[[
    831 dval("print('done '+o)"),
    807 832 gotol(this.done)
    808 833 ]]),
    809 834 cond(val("(t.ret[o])"),[[
    835 dval("print('3already tried '+o)"),
    836 //val("t.z8.push(u)"),
    810 837 gotol(this.bt)
    811 838 ]]),
    812 839 val("t.ret[o]=true"),
    840 dval("print('notd '+o)"),
    813 841 gotol(this.notd)
    814 842 );
    815 843 };
     
    826 854 }
    827 855
    828 856 function utf32str_substr(offset, length) {
    829 //print('checking offset at '+offset);
    857 if (dbg)
    858 print('checking offset at '+offset);
    830 859 return offset <= 0
    831 860 ? offset + length >= this.l
    832 861 ? this.str.substring(0)
     
    853 882 return arr; // array of ints representing the string's unicode codepoints
    854 883 }
    855 884
    885 var dbg = 0;
    856 886
    857 //var routine = func(["i"],[val("var gl=0,o=0,t={},c;last:for(;;){next:print(gl+' at '+o+' '+(typeof t=='undefined'));switch(gl){case 0:")]); // empty parser routine
    858 var routine = func(["i"],[val("var gl=0,o=0,t={},c;last:for(;;){next:/*print(gl);*/switch(gl){case 0:")]); // empty parser routine
    887 var iter = 300;
    888 var routine = func(["i"],[val("var gl=0,o=0,t={},c;last:for(;;){next:switch(gl){case 0:")]); // empty parser routine
    859 889
    860 890 var grammar;// = both(lit("hi"),end()); // a grammar expression definition
    861 891 //grammar = either(either(either(lit("hi"),lit("ha")),lit("hi")),lit("ho"));
     
    867 897 //grammar = plus(lit("a"));
    868 898 //grammar = both(plus(lit("a")),lit("aa"));
    869 899 //grammar = both(both(plus(either(lit("aa"),lit("a"))),lit('aaa')),end());
    870 grammar = both(both(plus(either(lit("aa"),lit("a"))),lit(Array(5000).join('a'))),end());
    900 grammar = both(both(plus(either(lit("aa"),lit("a"))),lit(Array(iter).join('a'))),end());
    901 //grammar = both(both(plus(plus(lit("a"))),lit(Array(iter).join('a'))),end());
    871 902
    872 903 grammar.fail = {id:1};
    873 904 grammar.done = grammar.notd = {id:-1};
     
    889 920
    890 921 var parserf = routine.emit();
    891 922
    892 //print(parserf);
    923 if (dbg)
    924 print(parserf);
    893 925
    894 926 var parser = eval(parserf); // compile the javascript function to machine code
    895 927
    896 var input = utf32str(Array(5001).join('a'));
    928 var input = utf32str(Array(iter+1).join('a'));
    929 //var input = utf32str('aaaa');
    897 930
    898 931 parser(input);
    899 932