#
# $Header: /usr/local/cvsroot/pythondoc/regex_util.py,v 1.2 1999/05/01 01:08:35 daniel Exp $
#
# Copyright (C) Daniel Larsson
# All Rights Reserved.
#
# See copyright notice in the file 'LICENSE.TXT', which should have accompanied
# this distribution.
#
import re

class RegexIter:
    """Iterate over a string using a regular expression.

    Given a regular expression and a string, this class
    lets you iterate over all matches (using regex.match)
    of the regular expression in the string."""

    def __init__(self, text, rexp):
	if type(rexp) == type(''):
	    rexp = re.compile(rexp)
	self.rexp = rexp
	self.text = text
	self.pos = 0
    def __getitem__(self, index):
	match = self.rexp.match(self.text, self.pos)
	if not match: raise IndexError # Stop iteration
	self.pos = match.end()
	return match

class RegexSearchIter(RegexIter):
    """Iterate over a string using a regular expression.

    Given a regular expression and a string, this class
    lets you iterate over all matches (using regex.search)
    of the regular expression in the string."""

    def __getitem__(self, index):
	match = self.rexp.search(self.text, self.pos)
	if not match: raise IndexError # Stop iteration
	self.pos = match.end()
	return match

def _compile_regex(rexp):
    if type(rexp) == type(''):
	rexp = re.compile(rexp)
    return rexp

def _nearest_match(m1, m2):
    if not m1 or (m2 and m2.start() < m1.start()):
	return m2
    return m1

class MultiRegexSearchIter:
    """Iterate over a string using a list of regular expressions.

    Given a list of regular expressions and a string, this class
    lets you iterate over all matches (using regex.match)
    of the regular expression in the string."""

    def __init__(self, text, rexps=(), *args):
	self.rexps = map(_compile_regex, tuple(rexps) + args)
	self.text = text
	self.pos = 0
    def __getitem__(self, index):
	matches = map(lambda rexp, t=self.text, p=self.pos: rexp.search(t, p),
		      self.rexps)
	first_match = reduce(_nearest_match, matches)
	if not first_match: raise IndexError # Stop iteration
	self.pos = first_match.end()
	return first_match


def test():
    s1 = "*one* *two* *three*"
    s2 = "Hello, *one* how *two* is *three* the weather?"
    s3 = "Hello, *one* how %two% is -three- the weather?"
    rexp = '\*(?P<word>[a-zA-Z]*)\*[ ]?'
    rexp2 = '\%(?P<word>[a-zA-Z]*)\%[ ]?'
    rexp3 = '\-(?P<word>[a-zA-Z]*)\-[ ]?'

    def equal(l1, l2):
	return reduce(lambda c, (a, b): c and a == b, map(None, l1, l2), 1)

    print "Matching the string '%s' using RegexIter:" % s1
    result = []
    for match in RegexIter(s1, rexp):
	result.append(match.group('word'))
    assert equal(['one', 'two', 'three'], result)

    print "Matching the string '%s' using RegexSearchIter:" % s1
    result = []
    for match in RegexSearchIter(s1, rexp):
	result.append(match.group('word'))
    assert equal(['one', 'two', 'three'], result)

    print "Matching the string '%s' using RegexIter:" % s2
    result = []
    for match in RegexIter(s2, rexp):
	result.append(match.group('word'))
    assert equal([], result)

    print "Matching the string '%s' using RegexSearchIter:" % s2
    result = []
    for match in RegexSearchIter(s2, rexp):
	result.append(match.group('word'))
    assert equal(['one', 'two', 'three'], result)

    print "Matching the string '%s' using MultiRegexSearchIter:" % s3
    result = []
    for match in MultiRegexSearchIter(s3, rexp, rexp2, rexp3):
	result.append(match.group('word'))
    assert equal(['one', 'two', 'three'], result)

if __name__ == "__main__":
    test()

#
# $Log: regex_util.py,v $
# Revision 1.2  1999/05/01 01:08:35  daniel
# Removed Windows style line endings.
#
# 
# *****************  Version 2  *****************
# User: Daniel       Date: 98-08-06   Time: 17:24
# Updated in $/Pythondoc
# Added header and footer.
# 
