Cleaning up the makefile
[gonvert] / gonvert / util / io.py
1 #!/usr/bin/env python
2
3
4 from __future__ import with_statement
5
6 import os
7 import pickle
8 import contextlib
9 import itertools
10 import codecs
11 from xml.sax import saxutils
12 import csv
13 try:
14         import cStringIO as StringIO
15 except ImportError:
16         import StringIO
17
18
19 @contextlib.contextmanager
20 def change_directory(directory):
21         previousDirectory = os.getcwd()
22         os.chdir(directory)
23         currentDirectory = os.getcwd()
24
25         try:
26                 yield previousDirectory, currentDirectory
27         finally:
28                 os.chdir(previousDirectory)
29
30
31 @contextlib.contextmanager
32 def pickled(filename):
33         """
34         Here is an example usage:
35         with pickled("foo.db") as p:
36                 p("users", list).append(["srid", "passwd", 23])
37         """
38
39         if os.path.isfile(filename):
40                 data = pickle.load(open(filename))
41         else:
42                 data = {}
43
44         def getter(item, factory):
45                 if item in data:
46                         return data[item]
47                 else:
48                         data[item] = factory()
49                         return data[item]
50
51         yield getter
52
53         pickle.dump(data, open(filename, "w"))
54
55
56 @contextlib.contextmanager
57 def redirect(object_, attr, value):
58         """
59         >>> import sys
60         ... with redirect(sys, 'stdout', open('stdout', 'w')):
61         ...     print "hello"
62         ...
63         >>> print "we're back"
64         we're back
65         """
66         orig = getattr(object_, attr)
67         setattr(object_, attr, value)
68         try:
69                 yield
70         finally:
71                 setattr(object_, attr, orig)
72
73
74 def pathsplit(path):
75         """
76         >>> pathsplit("/a/b/c")
77         ['', 'a', 'b', 'c']
78         >>> pathsplit("./plugins/builtins.ini")
79         ['.', 'plugins', 'builtins.ini']
80         """
81         pathParts = path.split(os.path.sep)
82         return pathParts
83
84
85 def commonpath(l1, l2, common=None):
86         """
87         >>> commonpath(pathsplit('/a/b/c/d'), pathsplit('/a/b/c1/d1'))
88         (['', 'a', 'b'], ['c', 'd'], ['c1', 'd1'])
89         >>> commonpath(pathsplit("./plugins/"), pathsplit("./plugins/builtins.ini"))
90         (['.', 'plugins'], [''], ['builtins.ini'])
91         >>> commonpath(pathsplit("./plugins/builtins"), pathsplit("./plugins"))
92         (['.', 'plugins'], ['builtins'], [])
93         """
94         if common is None:
95                 common = []
96
97         if l1 == l2:
98                 return l1, [], []
99
100         for i, (leftDir, rightDir) in enumerate(zip(l1, l2)):
101                 if leftDir != rightDir:
102                         return l1[0:i], l1[i:], l2[i:]
103         else:
104                 if leftDir == rightDir:
105                         i += 1
106                 return l1[0:i], l1[i:], l2[i:]
107
108
109 def relpath(p1, p2):
110         """
111         >>> relpath('/', '/')
112         './'
113         >>> relpath('/a/b/c/d', '/')
114         '../../../../'
115         >>> relpath('/a/b/c/d', '/a/b/c1/d1')
116         '../../c1/d1'
117         >>> relpath('/a/b/c/d', '/a/b/c1/d1/')
118         '../../c1/d1'
119         >>> relpath("./plugins/builtins", "./plugins")
120         '../'
121         >>> relpath("./plugins/", "./plugins/builtins.ini")
122         'builtins.ini'
123         """
124         sourcePath = os.path.normpath(p1)
125         destPath = os.path.normpath(p2)
126
127         (common, sourceOnly, destOnly) = commonpath(pathsplit(sourcePath), pathsplit(destPath))
128         if len(sourceOnly) or len(destOnly):
129                 relParts = itertools.chain(
130                         (('..' + os.sep) * len(sourceOnly), ),
131                         destOnly,
132                 )
133                 return os.path.join(*relParts)
134         else:
135                 return "."+os.sep
136
137
138 class UTF8Recoder(object):
139         """
140         Iterator that reads an encoded stream and reencodes the input to UTF-8
141         """
142         def __init__(self, f, encoding):
143                 self.reader = codecs.getreader(encoding)(f)
144
145         def __iter__(self):
146                 return self
147
148         def next(self):
149                 return self.reader.next().encode("utf-8")
150
151
152 class UnicodeReader(object):
153         """
154         A CSV reader which will iterate over lines in the CSV file "f",
155         which is encoded in the given encoding.
156         """
157
158         def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
159                 f = UTF8Recoder(f, encoding)
160                 self.reader = csv.reader(f, dialect=dialect, **kwds)
161
162         def next(self):
163                 row = self.reader.next()
164                 return [unicode(s, "utf-8") for s in row]
165
166         def __iter__(self):
167                 return self
168
169 class UnicodeWriter(object):
170         """
171         A CSV writer which will write rows to CSV file "f",
172         which is encoded in the given encoding.
173         """
174
175         def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
176                 # Redirect output to a queue
177                 self.queue = StringIO.StringIO()
178                 self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
179                 self.stream = f
180                 self.encoder = codecs.getincrementalencoder(encoding)()
181
182         def writerow(self, row):
183                 self.writer.writerow([s.encode("utf-8") for s in row])
184                 # Fetch UTF-8 output from the queue ...
185                 data = self.queue.getvalue()
186                 data = data.decode("utf-8")
187                 # ... and reencode it into the target encoding
188                 data = self.encoder.encode(data)
189                 # write to the target stream
190                 self.stream.write(data)
191                 # empty queue
192                 self.queue.truncate(0)
193
194         def writerows(self, rows):
195                 for row in rows:
196                         self.writerow(row)
197
198
199 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
200         # csv.py doesn't do Unicode; encode temporarily as UTF-8:
201         csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
202                                                         dialect=dialect, **kwargs)
203         for row in csv_reader:
204                 # decode UTF-8 back to Unicode, cell by cell:
205                 yield [unicode(cell, 'utf-8') for cell in row]
206
207
208 def utf_8_encoder(unicode_csv_data):
209         for line in unicode_csv_data:
210                 yield line.encode('utf-8')
211
212
213 _UNESCAPE_ENTITIES = {
214  """: '"',
215  " ": " ",
216  "'": "'",
217 }
218
219
220 _ESCAPE_ENTITIES = dict((v, k) for (v, k) in zip(_UNESCAPE_ENTITIES.itervalues(), _UNESCAPE_ENTITIES.iterkeys()))
221 del _ESCAPE_ENTITIES[" "]
222
223
224 def unescape(text):
225         plain = saxutils.unescape(text, _UNESCAPE_ENTITIES)
226         return plain
227
228
229 def escape(text):
230         fancy = saxutils.escape(text, _ESCAPE_ENTITIES)
231         return fancy