こどもてるびぃ

 | 

2006-05-29

[]Ruby Quiz - Regexp.build() (#4)

やっぱり 3D はめんどくさいからやめた。第4問は、整数Rangeマッチする正規表現を作ろう、みたいな話。これもすごい大変だったよ。

require 'test/unit'

class RegexpBuildTest < Test::Unit::TestCase
  def test_unfold_integers
    regexp = Regexp.build_unfold_integers(3, 7)
    assert regexp.match('3')
    assert regexp.match('7')
    assert !regexp.match('13')
    assert !regexp.match('31')
    assert !regexp.match('-3')
  end

  def test_unfold
    regexp = Regexp.build_unfold(98, 99, 2000..2005)
    assert !regexp.match('04')
    assert regexp.match('2004')
    assert regexp.match('99')
  end

# too heavy
#   def test_unfold_million
#     regexp = Regexp.build_unfold(0 .. 1000000)
#     assert !regexp.match('-1')
#   end

  def test_digit_list
    assert_equal [1, 2, 3], DigitList.new(123, 3)
    assert_equal [0, 0, 1], DigitList.new(1, 3)
    assert_equal [0, 0, 0, 0], DigitList.zero(4)
    assert_equal [9, 9, 9, 9], DigitList.max(4)
  end

  def test_digit_list_range
    range = (3 .. 34).to_digit_list_range
    assert_equal [3], range.first
    assert_equal [3, 4], range.last

    range = (3 ... 34).to_digit_list_range
    assert_equal [3, 3], range.last
  end

  def test_range_regexp_one
    regexp = (1 .. 5).to_regexp.exact
    assert regexp.match('1')
    assert !regexp.match('11')
  end

  def test_range_regexp_two
    regexp = (12 .. 15).to_regexp.exact
    assert regexp.match('13')
    assert !regexp.match('16')

    regexp = (12 .. 23).to_regexp.exact
    assert regexp.match('13')
    assert regexp.match('21')
    assert !regexp.match('123')

    regexp = (12 .. 89).to_regexp.exact
    assert regexp.match('13')
    assert regexp.match('56')
    assert !regexp.match('93')
  end

  def test_range_regexp_three
    regexp = (205 .. 590).to_regexp.exact
    assert regexp.match('205')
    assert regexp.match('509')
    assert regexp.match('509')
    assert !regexp.match('591')
  end

  def test_range_regexp_other_order
    regexp = (3 .. 590).to_regexp.exact
    assert regexp.match('3')
    assert !regexp.match('03')
    assert regexp.match('102')
    assert regexp.match('590')
    assert !regexp.match('5901')
  end

  def test_sign
    assert_equal 0, DigitList.new(0).sign
    assert_equal 1, DigitList.new(12).sign
    assert_equal -1, DigitList.new(-100).sign

    ranges = (-12 .. 3).to_digit_list_range.split_by_sign
    assert_equal -1, ranges[0].first.sign
    assert_equal [1, 2], ranges[0].first
    assert_equal [0], ranges[0].last
    assert_equal [0], ranges[1].first
    assert_equal 1, ranges[1].last.sign
    assert_equal [3], ranges[1].last
  end

  def test_negate
    range = (-12 .. 2).to_digit_list_range
    negative = range.negate

    assert_equal -1, range.first.sign
    assert_equal [1, 2], range.first

    assert_equal 1, negative.last.sign
    assert_equal [1, 2], negative.last
    assert_equal -1, negative.first.sign
    assert_equal [2], negative.first
  end

  def test_range_regexp_negative
    regexp = (-104 .. -3).to_regexp.exact
    assert regexp.match('-5')
    assert !regexp.match('5')
    assert !regexp.match('0')
    assert regexp.match('-100')
    assert !regexp.match('-105')
    assert !regexp.match('-0100')
  end

  def test_range_regexp_nega_pos
    regexp = (-104 .. 12).to_regexp.exact
    assert regexp.match('-5')
    assert regexp.match('5')
    assert regexp.match('0')
    assert regexp.match('-15')
    assert !regexp.match('15')
    assert regexp.match('-100')
    assert !regexp.match('-105')
  end

  def test_build
    regexp = Regexp.build(98, 99, 2000..2005)
    assert regexp.match('99')
    assert regexp.match('2004')
    assert !regexp.match('04')
  end

  def test_build_million
    regexp = Regexp.build(0 .. 1000000)
    assert !regexp.match('-1')
  end
end

class Regexp
  def self.build_unfold_integers(*integers)
    self.union(* integers.map {|i| i.to_regexp }).exact
  end
  def self.build_unfold(*args)
    integers = args.inject([]) do |array, arg|
      if arg.kind_of?(Enumerable)
        array + arg.to_a
      else
        array << arg
      end
    end
    self.build_unfold_integers(*integers)
  end

  def self.build(*args)
    self.union(* args.map {|a| a.to_regexp }).exact
  end

  def exact
    /\A#{self}\z/
  end
end

class Integer
  def to_regexp
    /#{self}/
  end
end

class Array
  def drop(n)
    self[n ..  -1]
  end
end

class DigitList < Array
  attr_accessor :sign
  def initialize(integer, n = nil)
    @sign = integer > 0 ? 1 : integer < 0 ? -1 : 0
    abs = integer.abs
    super(("%0#{n if n}d" % abs).split('').map {|i| i.to_i })
  end

  def self.zero(n)
    return self.new(0, n)
  end
  def self.min(n)
    return self.new(10 ** (n - 1))
  end
  def self.max(n)
    return self.new(10 ** n - 1, n)
  end
end


class Range
  def to_digit_list_range
    ifirst = first.to_i
    ilast = last.to_i
    ilast = ilast - 1 if exclude_end? && ifirst < ilast
    DigitListRange.new(DigitList.new(ifirst), DigitList.new(ilast))
  end
  def to_regexp
    to_digit_list_range.to_regexp
  end
end

class DigitListRange < Range
  def initialize(first, last)
    @order = [first.size, last.size].min
    super(first, last)
  end

  def to_regexp
    if first.sign * last.sign < 0
      Regexp.union(* split_by_sign.map {|range| range.to_regexp})
    elsif first.sign < 0
      /\-#{negate.to_unsigned_regexp}/
    else
      to_unsigned_regexp
    end
  end

  def to_unsigned_regexp
    if first.size < last.size
      Regexp.union(*[
        self.class.new(first, DigitList.max(last.size - 1)).to_unsigned_regexp,
        self.class.new(DigitList.min(last.size), last).to_unsigned_regexp
      ]);
    elsif @order == 1
      /[#{first[0]}-#{last[0]}]/
    elsif first[0] == last[0]
      /#{first[0]}#{self.class.new(first.drop(1), last.drop(1)).to_unsigned_regexp}/
    else
      regs = [
        /#{first[0]}#{self.class.new(first.drop(1), DigitList.max(@order - 1)).to_unsigned_regexp}/,
        /#{last[0]}#{self.class.new(DigitList.zero(@order - 1), last.drop(1)).to_unsigned_regexp}/
      ];
      if first[0] + 1 < last[0]
        regs << /[#{first[0] + 1}-#{last[0] - 1}]\d{#{@order - 1}}/
      end
      Regexp.union(*regs)
    end
  end

  def negate
    new_first = last.clone
    new_last = first.clone
    new_first.sign *= -1
    new_last.sign *= -1
    self.class.new(new_first, new_last)
  end

  def split_by_sign
    if first.sign * last.sign >= 0
      [self]
    else
      [self.class.new(first, DigitList.new(0)), self.class.new(DigitList.new(0), last)]
    end
  end
end
 |