4 """Utilities for determing name variants.
6 Copyright (c) Andrew Flegg <andrew@bleb.org> 2009.
7 Released under the Artistic Licence."""
9 __non_alpha__ = re.compile("[^A-Za-z]+")
12 ['andrew', 'andy', 'andi', 'drew'],
13 ['benjamin', 'ben', 'benny'],
14 ['christian', 'chris'],
15 ['christopher', 'chris'],
17 ['daniel', 'dan', 'danny'],
18 ['matthew', 'matt', 'mat', 'matty'],
20 ['michael', 'mike', 'mic', 'mik', 'micky'],
22 ['robert', 'rob', 'bob', 'bobby', 'robbie'],
23 ['thomas', 'tom', 'tommy']
29 if (not name in __map__):
30 __map__[name] = set(row)
32 __map__[name] = __map__[name].union(row)
35 # -----------------------------------------------------------------------
36 def canonical(name, strip = True):
37 """Return a transliterated, lower-case version of name; optionally
38 stripping all non-alphabetic characters from the result."""
41 result = unicode(name).encode('trans').lower()
43 return __non_alpha__.sub('', result)
46 except UnicodeDecodeError:
49 return __non_alpha__.sub('', result)
53 # -----------------------------------------------------------------------
55 """Return a set of names which should be checked for given the input
56 name. Any word which is has a replacement will be replaced, and an
57 iterable list of all variants will be returned."""
63 name = canonical(name, strip = False)
65 bits = name.split(' ')
68 for replacement in __map__[bit]:
69 result.add(name.replace(bit, replacement))