4 # Composite codes we are interested in (HIM cannot use any other)
5 interested = [ 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x032e, 0x0307, 0x0308,
6 0x030a, 0x030b, 0x030c, 0x0327, 0x0328, 0x0323, 0x0309, 0x031b]
9 # Auxiliar function for parsing the decomposition segment in UnicodeData
10 hex_p = re.compile(r"\s+")
11 def parse_decomp(str):
13 f = filter(lambda x: x[0] != '<', m)
14 return [int(x, 16) for x in f]
16 # First read all composition exclusions
17 p = re.compile(r"(?P<value>[0-9A-F]+)")
18 f = open('CompositionExclusions.txt', 'r')
26 value = int(m.group('value'), 16)
32 # Now read decomposition data
33 p = re.compile(r"(?P<value>[0-9A-F]+);(?P<name>[^;]*);[^;]*;(?P<canonical_class>\d+);[^;]*;(?P<decomp>[^;]*);")
34 f = open('UnicodeData.txt', 'r')
40 print "#warning invalid line:", line
43 value = int(m.group('value'), 16)
44 name = m.group('name')
45 segment = m.group('decomp')
47 compat = segment[0] == '<'
48 if not compat and value not in exclusions:
49 decomp = parse_decomp(segment)
56 if second not in composite:
57 composite[second] = {first: value}
59 composite[second][first] = value
61 # Nothing left to be collected. Let there be light!
63 /* This file is autogenerated by builder.py from UnicodeData.txt */
64 /* Do not edit; instead edit builder.py and regenerate. */
68 unsigned short result;
72 unsigned short second;
73 unsigned short data_size;
74 const struct item * data;
79 for second in interested:
80 if second not in composite:
81 print "#warning data for composite 0x%04x not found" % second
84 print "static const struct item values_for_%04x[] = {" % second
86 items = composite[second].items()
88 for first, value in items:
89 print "\t{0x%x, 0x%x}," % (first, value)
96 print "static const struct table composite_table[] = {"
97 for second in interested:
98 if second not in composite:
101 data_size = len(composite[second])
102 print "\t{0x%x, %u, values_for_%04x}, " % (second, data_size, second)