# usage: # ruby rx.rb # run the unit test # ruby -rrx -e 'p matchstr(...)' # use rx as library. don't run the unit test. def splitstr(str, pat) # report end def find_match(ary, pat, beg=0) beg.upto(ary.length) {|s| try(pat, ary, s) {|e| return [s, e] } } nil end def subst(str, pat) ary = str.split(//) r = find_match(ary, pat) return str if !r s, e = r ary[0...s].join + yield(ary[s...e].join) + ary[e..-1].join end def gsubst(str, pat) ary = str.split(//) i = 0 result = [] while i <= ary.length && (r = find_match(ary, pat, i)) s, e = r result.concat ary[i...s] result << yield(ary[s...e].join) if s == e result << ary[e] e += 1 end i = e end if i < ary.length result.concat ary[i..-1] end result.join end def matchstr(exp, str) result = [] try(exp, str.split(//), 0) {|pos| result << pos } result end def startwithmatch(exp, str) ary = str.split(//) try(exp, ary, 0) { return 0 } nil end def hasmatch(exp, str) ary = str.split(//) 0.upto(ary.length) {|i| try(exp, ary, i) { return i } } nil end def count_try(exp, str) $try_count = 0 matchstr(exp, str) $try_count end $try_count = 0 def try(exp, seq, pos, &block) #p [pos, exp] $try_count += 1 case exp[0] when :empseq try_empseq(seq, pos, &block) when :lit _, sym = exp try_lit(sym, seq, pos, &block) when :cat _, e1, e2 = exp try_cat(e1, e2, seq, pos, &block) when :alt _, e1, e2 = exp try_alt(e1, e2, seq, pos, &block) when :rep _, e = exp try_rep(e, seq, pos, &block) when :anysym try_anysym(seq, pos, &block) when :string_start try_string_start(seq, pos, &block) when :string_end try_string_end(seq, pos, &block) when :line_start try_line_start(seq, pos, &block) when :line_end try_line_end(seq, pos, &block) when :opt _, e = exp try_opt(e, seq, pos, &block) when :plus _, e = exp try_plus(e, seq, pos, &block) when :rep_lazy _, e = exp try_rep_lazy(e, seq, pos, &block) when :opt_lazy _, e = exp try_opt_lazy(e, seq, pos, &block) when :plus_lazy _, e = exp try_plus_lazy(e, seq, pos, &block) else raise "unexpected AST: #{exp.inspect}" end end def try_opt(e, seq, pos, &block) try(e, seq, pos, &block) yield pos end def try_opt_lazy(e, seq, pos, &block) yield pos try(e, seq, pos, &block) end def try_string_end(seq, pos) yield pos if pos == seq.length end def try_string_start(seq, pos) yield pos if pos == 0 end def try_line_start(seq, pos) if pos == 0 || (pos < seq.length && seq[pos-1] == "\n") yield pos end end def try_line_end(seq, pos) if pos == seq.length || seq[pos] == "\n" yield pos end end def try_anysym(seq, pos, &block) if pos < seq.length yield pos+1 end end def try_empseq(seq, pos) yield pos end def try_lit(sym, seq, pos) #p [:try, sym, seq, pos] if pos < seq.length && seq[pos] == sym yield pos + 1 end end def try_cat(e1, e2, seq, pos, &block) try(e1, seq, pos) {|pos2| try(e2, seq, pos2, &block) } end def try_alt(e1, e2, seq, pos, &block) try(e1, seq, pos, &block) try(e2, seq, pos, &block) end def try_rep(exp, seq, pos, &block) try(exp, seq, pos) {|pos2| try_rep(exp, seq, pos2, &block) if pos < pos2 } yield pos end def try_rep_lazy(e, seq, pos, &block) yield pos try(e, seq, pos) {|pos2| try_rep_lazy(e, seq, pos2, &block) if pos < pos2 } end def try_plus_lazy(e, seq, pos, &block) try(e, seq, pos) {|pos2| try_rep_lazy(e, seq, pos2, &block) } end def try_plus(e, seq, pos, &block) try(e, seq, pos) {|pos2| try_rep(e, seq, pos2, &block) } end if $0 == __FILE__ # The trick to run the unit test only for non-library execution. require 'test/unit' class TestRX < Test::Unit::TestCase def test_empseq assert_equal([0], matchstr([:empseq], "")) end def test_lit assert_equal([], matchstr([:lit, "a"], "")) assert_equal([1], matchstr([:lit, "a"], "a")) assert_equal([1], matchstr([:lit, "a"], "aa")) assert_equal([], matchstr([:lit, "a"], "b")) end def test_cat assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "")) assert_equal([], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "ab")) assert_equal([2], matchstr([:cat, [:lit, "a"], [:lit, "b"]], "abc")) end def test_alt assert_equal([], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "a")) assert_equal([1], matchstr([:alt, [:lit, "a"], [:lit, "b"]], "b")) end def test_rep assert_equal([0], matchstr([:rep, [:lit, "a"]], "")) assert_equal([5,4,3,2,1,0], matchstr([:rep, [:lit, "a"]], "aaaaa")) end def test_anysym assert_equal([7,6,5,4,3,2,1,0], matchstr([:rep, [:anysym]], "abc\ndef")) end def test_string_start assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "a")) assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_start]], "aaa")) end def test_string_end assert_equal([0], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "")) assert_equal([1], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "a")) assert_equal([3], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaa")) assert_equal([4], matchstr([:cat, [:rep, [:lit, "a"]], [:string_end]], "aaaa")) assert_equal([1], matchstr([:cat, [:lit, "a"], [:string_end]], "a")) assert_equal([], matchstr([:cat, [:string_end], [:lit, "a"]], "a")) end def test_line_start assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "")) assert_equal([7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\n")) assert_equal([9,7,4,0], matchstr([:cat, [:rep, [:anysym]], [:line_start]], "aaa\naa\na\na")) end def test_line_end assert_equal([8,7,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\ndef\n")) assert_equal([9,8,4,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "abc\n\ndef\n")) assert_equal([8,5,2], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "ab\ncd\nef")) assert_equal([0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "")) assert_equal([9,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\n")) assert_equal([10,8,6,3], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "aaa\naa\na\na")) assert_equal([10,8,6,3,0], matchstr([:cat, [:rep, [:anysym]], [:line_end]], "\naa\naa\na\na")) end # def test_notnewline # assert_equal([1], matchstr([:notnewline], "a")) # assert_equal([1], matchstr([:notnewline], "b")) # assert_equal([1], matchstr([:notnewline], "c")) # assert_equal([], matchstr([:notnewline], "\n")) # end # def test_opt assert_equal([1,0], matchstr([:opt, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_opt_lazy assert_equal([0,1], matchstr([:opt_lazy, [:lit, "a"]], "a")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "b")) assert_equal([0], matchstr([:opt_lazy, [:lit, "a"]], "")) assert_equal([2], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ab")) assert_equal([], matchstr([:cat, [:opt_lazy, [:lit, "a"]], [:lit, "b"]], "ac")) end def test_plus assert_equal([], matchstr([:plus, [:lit, "a"]], "")) assert_equal([1], matchstr([:plus, [:lit, "a"]], "a")) assert_equal([5,4,3,2,1], matchstr([:plus, [:lit, "a"]], "aaaaa")) end def test_rep_lazy assert_equal([0], matchstr([:rep_lazy, [:lit, "a"]], "")) assert_equal([0,1,2,3,4,5], matchstr([:rep_lazy, [:lit, "a"]], "aaaaa")) end def test_find_match assert_equal([1,3], find_match(["a","b","b","c","c","c"], [:plus, [:lit, "b"]])) end def test_subst assert_equal("aZccc", subst("abbccc", [:plus, [:lit, "b"]]) { "Z" }) assert_equal("aZcccbb", subst("abbcccbb", [:plus, [:lit, "b"]]) { "Z" }) end def test_gsubst assert_equal("aZcccZd", gsubst("abbcccbbbbd", [:plus, [:lit, "b"]]) { "Z" }) end def test_splitstr assert_equal(["abc"], splitstr("abc", [:lit, "z"])) assert_equal("abc".split(/z/), splitstr("abc", [:lit, "z"])) assert_equal(["a", "c"], splitstr("abc", [:lit, "b"])) assert_equal("abc".split(/b/), splitstr("abc", [:lit, "b"])) assert_equal(["a", "c", "e", "g"], splitstr("abcdefg", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal("abcdefg".split(/[bdf]/), splitstr("abcdefg", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal(["a", "c", "e"], splitstr("abcdef", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal("abcdef".split(/[bdf]/), splitstr("abcdef", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal(["aa", "c", "d", "e", "fff"], splitstr("aabbbcbbdbebbbfff", [:plus, [:lit, "b"]])) assert_equal("aabbbcbbdbebbbfff".split(/b+/), splitstr("aabbbcbbdbebbbfff", [:plus, [:lit, "b"]])) assert_equal(["a", "b", "c"], splitstr("abc", [:empseq])) assert_equal("abc".split(//), splitstr("abc", [:empseq])) assert_equal(["", "", "", "def"], splitstr("abcdef", [:alt, [:lit, "a"], [:alt, [:lit, "b"], [:lit, "c"]]])) assert_equal("abcdef".split(/[abc]/), splitstr("abcdef", [:alt, [:lit, "a"], [:alt, [:lit, "b"], [:lit, "c"]]])) assert_equal(["", "c", "e"], splitstr("bcdef", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal("bcdef".split(/[bdf]/), splitstr("bcdef", [:alt, [:lit, "b"], [:alt, [:lit, "d"], [:lit, "f"]]])) assert_equal([], splitstr("", [:lit, "z"])) assert_equal("".split(/z/), splitstr("", [:lit, "z"])) assert_equal(["abc"], splitstr("abc", [:string_start])) assert_equal("abc".split(/\A/), splitstr("abc", [:string_start])) assert_equal(["abc"], splitstr("abc", [:string_end])) assert_equal("abc".split(/\z/), splitstr("abc", [:string_end])) assert_equal(["a", "c"], splitstr("abc", [:rep, [:lit, "b"]])) assert_equal("abc".split(/b*/), splitstr("abc", [:rep, [:lit, "b"]])) assert_equal(["a", "c", "d", "c", "d"], splitstr("abcdbbcbd", [:rep, [:lit, "b"]])) assert_equal("abcdbbcbd".split(/b*/), splitstr("abcdbbcbd", [:rep, [:lit, "b"]])) assert_equal(["a", "b", "c", "d", "b", "b", "c", "b", "d"], splitstr("abcdbbcbd", [:rep_lazy, [:lit, "b"]])) assert_equal("abcdbbcbd".split(/b*?/), splitstr("abcdbbcbd", [:rep_lazy, [:lit, "b"]])) assert_equal(["", "c", "d", "c", "d"], splitstr("bcdbbcbd", [:rep, [:lit, "b"]])) assert_equal("bcdbbcbd".split(/b*/), splitstr("bcdbbcbd", [:rep, [:lit, "b"]])) assert_equal(["b", "c", "d", "b", "b", "c", "b", "d"], splitstr("bcdbbcbd", [:rep_lazy, [:lit, "b"]])) assert_equal("bcdbbcbd".split(/b*?/), splitstr("bcdbbcbd", [:rep_lazy, [:lit, "b"]])) end end end