4 """Utilities for determing name variants.
6 Copyright (c) Andrew Flegg <andrew@bleb.org> 2009.
7 Released under the Artistic Licence."""
9 __non_alpha__ = re.compile("[^A-Za-z]+")
12 ['andrew', 'andy', 'andi', 'drew'],
13 ['benjamin', 'ben', 'benny'],
14 ['christian', 'chris'],
15 ['christopher', 'chris'],
17 ['daniel', 'dan', 'danny'],
18 ['joseph', 'joey', 'joe'],
21 ['matthew', 'matt', 'mat', 'matty'],
23 ['michael', 'mike', 'mic', 'mik', 'micky'],
25 ['robert', 'rob', 'bob', 'bobby', 'robbie'],
26 ['thomas', 'tom', 'tommy']
32 if (not name in __map__):
33 __map__[name] = set(row)
35 __map__[name] = __map__[name].union(row)
38 # -----------------------------------------------------------------------
39 def canonical(name, strip = True):
40 """Return a transliterated, lower-case version of name; optionally
41 stripping all non-alphabetic characters from the result."""
44 result = unicode(name).encode('trans').lower()
46 return __non_alpha__.sub('', result)
49 except UnicodeDecodeError:
52 return __non_alpha__.sub('', result)
56 # -----------------------------------------------------------------------
58 """Return a set of names which should be checked for given the input
59 name. Any word which is has a replacement will be replaced, and an
60 iterable list of all variants will be returned."""
66 name = canonical(name, strip = False)
68 bits = name.split(' ')
71 for replacement in __map__[bit]:
72 result.add(name.replace(bit, replacement))