aac896d15ea0dccd5f16147dc97b6a6b8a637359
[gonvert] / src / util / io.py
1 #!/usr/bin/env python
2
3
4 from __future__ import with_statement
5
6 import os
7 import pickle
8 import contextlib
9 import itertools
10 import codecs
11 import csv
12 try:
13         import cStringIO as StringIO
14 except ImportError:
15         import StringIO
16
17
18 @contextlib.contextmanager
19 def change_directory(directory):
20         previousDirectory = os.getcwd()
21         os.chdir(directory)
22         currentDirectory = os.getcwd()
23
24         try:
25                 yield previousDirectory, currentDirectory
26         finally:
27                 os.chdir(previousDirectory)
28
29
30 @contextlib.contextmanager
31 def pickled(filename):
32         """
33         Here is an example usage:
34         with pickled("foo.db") as p:
35                 p("users", list).append(["srid", "passwd", 23])
36         """
37
38         if os.path.isfile(filename):
39                 data = pickle.load(open(filename))
40         else:
41                 data = {}
42
43         def getter(item, factory):
44                 if item in data:
45                         return data[item]
46                 else:
47                         data[item] = factory()
48                         return data[item]
49
50         yield getter
51
52         pickle.dump(data, open(filename, "w"))
53
54
55 @contextlib.contextmanager
56 def redirect(object_, attr, value):
57         """
58         >>> import sys
59         ... with redirect(sys, 'stdout', open('stdout', 'w')):
60         ...     print "hello"
61         ...
62         >>> print "we're back"
63         we're back
64         """
65         orig = getattr(object_, attr)
66         setattr(object_, attr, value)
67         try:
68                 yield
69         finally:
70                 setattr(object_, attr, orig)
71
72
73 def pathsplit(path):
74         """
75         >>> pathsplit("/a/b/c")
76         ['', 'a', 'b', 'c']
77         >>> pathsplit("./plugins/builtins.ini")
78         ['.', 'plugins', 'builtins.ini']
79         """
80         pathParts = path.split(os.path.sep)
81         return pathParts
82
83
84 def commonpath(l1, l2, common=None):
85         """
86         >>> commonpath(pathsplit('/a/b/c/d'), pathsplit('/a/b/c1/d1'))
87         (['', 'a', 'b'], ['c', 'd'], ['c1', 'd1'])
88         >>> commonpath(pathsplit("./plugins/"), pathsplit("./plugins/builtins.ini"))
89         (['.', 'plugins'], [''], ['builtins.ini'])
90         >>> commonpath(pathsplit("./plugins/builtins"), pathsplit("./plugins"))
91         (['.', 'plugins'], ['builtins'], [])
92         """
93         if common is None:
94                 common = []
95
96         if l1 == l2:
97                 return l1, [], []
98
99         for i, (leftDir, rightDir) in enumerate(zip(l1, l2)):
100                 if leftDir != rightDir:
101                         return l1[0:i], l1[i:], l2[i:]
102         else:
103                 if leftDir == rightDir:
104                         i += 1
105                 return l1[0:i], l1[i:], l2[i:]
106
107
108 def relpath(p1, p2):
109         """
110         >>> relpath('/', '/')
111         './'
112         >>> relpath('/a/b/c/d', '/')
113         '../../../../'
114         >>> relpath('/a/b/c/d', '/a/b/c1/d1')
115         '../../c1/d1'
116         >>> relpath('/a/b/c/d', '/a/b/c1/d1/')
117         '../../c1/d1'
118         >>> relpath("./plugins/builtins", "./plugins")
119         '../'
120         >>> relpath("./plugins/", "./plugins/builtins.ini")
121         'builtins.ini'
122         """
123         sourcePath = os.path.normpath(p1)
124         destPath = os.path.normpath(p2)
125
126         (common, sourceOnly, destOnly) = commonpath(pathsplit(sourcePath), pathsplit(destPath))
127         if len(sourceOnly) or len(destOnly):
128                 relParts = itertools.chain(
129                         (('..' + os.sep) * len(sourceOnly), ),
130                         destOnly,
131                 )
132                 return os.path.join(*relParts)
133         else:
134                 return "."+os.sep
135
136
137 class UTF8Recoder(object):
138         """
139         Iterator that reads an encoded stream and reencodes the input to UTF-8
140         """
141         def __init__(self, f, encoding):
142                 self.reader = codecs.getreader(encoding)(f)
143
144         def __iter__(self):
145                 return self
146
147         def next(self):
148                 return self.reader.next().encode("utf-8")
149
150
151 class UnicodeReader(object):
152         """
153         A CSV reader which will iterate over lines in the CSV file "f",
154         which is encoded in the given encoding.
155         """
156
157         def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
158                 f = UTF8Recoder(f, encoding)
159                 self.reader = csv.reader(f, dialect=dialect, **kwds)
160
161         def next(self):
162                 row = self.reader.next()
163                 return [unicode(s, "utf-8") for s in row]
164
165         def __iter__(self):
166                 return self
167
168 class UnicodeWriter(object):
169         """
170         A CSV writer which will write rows to CSV file "f",
171         which is encoded in the given encoding.
172         """
173
174         def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
175                 # Redirect output to a queue
176                 self.queue = StringIO.StringIO()
177                 self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
178                 self.stream = f
179                 self.encoder = codecs.getincrementalencoder(encoding)()
180
181         def writerow(self, row):
182                 self.writer.writerow([s.encode("utf-8") for s in row])
183                 # Fetch UTF-8 output from the queue ...
184                 data = self.queue.getvalue()
185                 data = data.decode("utf-8")
186                 # ... and reencode it into the target encoding
187                 data = self.encoder.encode(data)
188                 # write to the target stream
189                 self.stream.write(data)
190                 # empty queue
191                 self.queue.truncate(0)
192
193         def writerows(self, rows):
194                 for row in rows:
195                         self.writerow(row)
196
197
198 def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
199         # csv.py doesn't do Unicode; encode temporarily as UTF-8:
200         csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
201                                                         dialect=dialect, **kwargs)
202         for row in csv_reader:
203                 # decode UTF-8 back to Unicode, cell by cell:
204                 yield [unicode(cell, 'utf-8') for cell in row]
205
206
207 def utf_8_encoder(unicode_csv_data):
208         for line in unicode_csv_data:
209                 yield line.encode('utf-8')