2 ldif - generate and parse LDIF data (see RFC 2849)
4 See http://www.python-ldap.org/ for details.
6 $Id: ldif.py,v 1.52 2009/12/03 22:11:26 stroeder Exp $
8 Python compability note:
9 Tested with Python 2.0+, but should work with Python 1.5.2+.
12 __version__ = '2.3.11'
18 'AttrTypeandValueLDIF','CreateLDIF','ParseLDIF',
26 import urlparse,urllib,base64,re,types
29 from cStringIO import StringIO
31 from StringIO import StringIO
33 attrtype_pattern = r'[\w;.]+(;[\w_-]+)*'
34 attrvalue_pattern = r'(([^,]|\\,)+|".*?")'
35 rdn_pattern = attrtype_pattern + r'[ ]*=[ ]*' + attrvalue_pattern
36 dn_pattern = rdn_pattern + r'([ ]*,[ ]*' + rdn_pattern + r')*[ ]*'
37 dn_regex = re.compile('^%s$' % dn_pattern)
39 ldif_pattern = '^((dn(:|::) %(dn_pattern)s)|(%(attrtype_pattern)s(:|::) .*)$)+' % vars()
42 'add':0,'delete':1,'replace':2
46 0:'add',1:'delete',2:'replace'
49 CHANGE_TYPES = ['add','delete','modify','modrdn']
50 valid_changetype_dict = {}
51 for c in CHANGE_TYPES:
52 valid_changetype_dict[c]=None
57 returns 1 if s is a LDAP DN
61 rm = dn_regex.match(s)
62 return rm!=None and rm.group(0)==s
65 SAFE_STRING_PATTERN = '(^(\000|\n|\r| |:|<)|[\000\n\r\200-\377]+|[ ]+$)'
66 safe_string_re = re.compile(SAFE_STRING_PATTERN)
70 return a dictionary with all items of l being the keys of the dictionary
72 return dict([(i,None) for i in l])
77 Write LDIF entry or change records to file object
78 Copy LDIF input to a file output object containing all data retrieved
82 def __init__(self,output_file,base64_attrs=None,cols=76,line_sep='\n'):
85 file object for output
87 list of attribute types to be base64-encoded in any case
89 Specifies how many columns a line may have before it's
90 folded into many lines.
92 String used as line separator
94 self._output_file = output_file
95 self._base64_attrs = list_dict([a.lower() for a in (base64_attrs or [])])
97 self._line_sep = line_sep
98 self.records_written = 0
100 def _unfoldLDIFLine(self,line):
102 Write string line as one or more folded lines
104 # Check maximum line length
106 if line_len<=self._cols:
107 self._output_file.write(line)
108 self._output_file.write(self._line_sep)
112 self._output_file.write(line[0:min(line_len,self._cols)])
113 self._output_file.write(self._line_sep)
115 self._output_file.write(' ')
116 self._output_file.write(line[pos:min(line_len,pos+self._cols-1)])
117 self._output_file.write(self._line_sep)
118 pos = pos+self._cols-1
119 return # _unfoldLDIFLine()
121 def _needs_base64_encoding(self,attr_type,attr_value):
123 returns 1 if attr_value has to be base-64 encoded because
124 of special chars or because attr_type is in self._base64_attrs
126 return self._base64_attrs.has_key(attr_type.lower()) or \
127 not safe_string_re.search(attr_value) is None
129 def _unparseAttrTypeandValue(self,attr_type,attr_value):
131 Write a single attribute type/value pair
138 if self._needs_base64_encoding(attr_type,attr_value):
140 self._unfoldLDIFLine(':: '.join([attr_type,base64.encodestring(attr_value).replace('\n','')]))
142 self._unfoldLDIFLine(': '.join([attr_type,attr_value]))
143 return # _unparseAttrTypeandValue()
145 def _unparseEntryRecord(self,entry):
148 dictionary holding an entry
150 attr_types = entry.keys()[:]
152 for attr_type in attr_types:
153 for attr_value in entry[attr_type]:
154 self._unparseAttrTypeandValue(attr_type,attr_value)
156 def _unparseChangeRecord(self,modlist):
159 list of additions (2-tuple) or modifications (3-tuple)
161 mod_len = len(modlist[0])
165 changetype = 'modify'
167 raise ValueError,"modlist item of wrong length"
168 self._unparseAttrTypeandValue('changetype',changetype)
171 mod_type,mod_vals = mod
173 mod_op,mod_type,mod_vals = mod
174 self._unparseAttrTypeandValue(MOD_OP_STR[mod_op],mod_type)
176 raise ValueError,"Subsequent modlist item of wrong length"
178 for mod_val in mod_vals:
179 self._unparseAttrTypeandValue(mod_type,mod_val)
181 self._output_file.write('-'+self._line_sep)
183 def unparse(self,dn,record):
186 string-representation of distinguished name
188 Either a dictionary holding the LDAP entry {attrtype:record}
189 or a list with a modify list like for LDAPObject.modify().
192 # Simply ignore empty records
194 # Start with line containing the distinguished name
195 self._unparseAttrTypeandValue('dn',dn)
196 # Dispatch to record type specific writers
197 if isinstance(record,types.DictType):
198 self._unparseEntryRecord(record)
199 elif isinstance(record,types.ListType):
200 self._unparseChangeRecord(record)
202 raise ValueError, "Argument record must be dictionary or list"
203 # Write empty line separating the records
204 self._output_file.write(self._line_sep)
205 # Count records written
206 self.records_written = self.records_written+1
210 def CreateLDIF(dn,record,base64_attrs=None,cols=76):
212 Create LDIF single formatted record including trailing empty line.
213 This is a compability function. Use is deprecated!
216 string-representation of distinguished name
218 Either a dictionary holding the LDAP entry {attrtype:record}
219 or a list with a modify list like for LDAPObject.modify().
221 list of attribute types to be base64-encoded in any case
223 Specifies how many columns a line may have before it's
224 folded into many lines.
227 ldif_writer = LDIFWriter(f,base64_attrs,cols,'\n')
228 ldif_writer.unparse(dn,record)
236 Base class for a LDIF parser. Applications should sub-class this
237 class and override method handle() to implement something meaningful.
239 Public class attributes:
241 Counter for records processed so far
244 def _stripLineSep(self,s):
246 Strip trailing line separators from s, but no other whitespaces
258 ignored_attr_types=None,
260 process_url_schemes=None,
266 File-object to read the LDIF input from
268 Attributes with these attribute type names will be ignored.
270 If non-zero specifies the maximum number of entries to be
273 List containing strings with URLs schemes to process with urllib.
274 An empty list turns off all URL processing and the attribute
275 is ignored completely.
277 String used as line separator
279 self._input_file = input_file
280 self._max_entries = max_entries
281 self._process_url_schemes = list_dict([s.lower() for s in (process_url_schemes or [])])
282 self._ignored_attr_types = list_dict([a.lower() for a in (ignored_attr_types or [])])
283 self._line_sep = line_sep
284 self.records_read = 0
286 def handle(self,dn,entry):
288 Process a single content LDIF record. This method should be
289 implemented by applications using LDIFParser.
292 def _unfoldLDIFLine(self):
294 Unfold several folded lines with trailing space into one line
296 unfolded_lines = [ self._stripLineSep(self._line) ]
297 self._line = self._input_file.readline()
298 while self._line and self._line[0]==' ':
299 unfolded_lines.append(self._stripLineSep(self._line[1:]))
300 self._line = self._input_file.readline()
301 return ''.join(unfolded_lines)
303 def _parseAttrTypeandValue(self):
305 Parse a single attribute type and value pair from one or
306 more lines of LDIF data
308 # Reading new attribute line
309 unfolded_line = self._unfoldLDIFLine()
310 # Ignore comments which can also be folded
311 while unfolded_line and unfolded_line[0]=='#':
312 unfolded_line = self._unfoldLDIFLine()
313 if not unfolded_line or unfolded_line=='\n' or unfolded_line=='\r\n':
316 colon_pos = unfolded_line.index(':')
318 # Treat malformed lines without colon as non-existent
320 attr_type = unfolded_line[0:colon_pos]
321 # if needed attribute value is BASE64 decoded
322 value_spec = unfolded_line[colon_pos:colon_pos+2]
324 # attribute value needs base64-decoding
325 attr_value = base64.decodestring(unfolded_line[colon_pos+2:])
326 elif value_spec==':<':
327 # fetch attribute value from URL
328 url = unfolded_line[colon_pos+2:].strip()
330 if self._process_url_schemes:
331 u = urlparse.urlparse(url)
332 if self._process_url_schemes.has_key(u[0]):
333 attr_value = urllib.urlopen(url).read()
334 elif value_spec==':\r\n' or value_spec=='\n':
337 attr_value = unfolded_line[colon_pos+2:].lstrip()
338 return attr_type,attr_value
342 Continously read and parse LDIF records
344 self._line = self._input_file.readline()
346 while self._line and \
347 (not self._max_entries or self.records_read<self._max_entries):
350 version = None; dn = None; changetype = None; modop = None; entry = {}
352 attr_type,attr_value = self._parseAttrTypeandValue()
354 while attr_type!=None and attr_value!=None:
356 # attr type and value pair was DN of LDIF record
358 raise ValueError, 'Two lines starting with dn: in one record.'
359 if not is_dn(attr_value):
360 raise ValueError, 'No valid string-representation of distinguished name %s.' % (repr(attr_value))
362 elif attr_type=='version' and dn is None:
364 elif attr_type=='changetype':
365 # attr type and value pair was DN of LDIF record
367 raise ValueError, 'Read changetype: before getting valid dn: line.'
369 raise ValueError, 'Two lines starting with changetype: in one record.'
370 if not valid_changetype_dict.has_key(attr_value):
371 raise ValueError, 'changetype value %s is invalid.' % (repr(attr_value))
372 changetype = attr_value
373 elif attr_value!=None and \
374 not self._ignored_attr_types.has_key(attr_type.lower()):
375 # Add the attribute to the entry if not ignored attribute
376 if entry.has_key(attr_type):
377 entry[attr_type].append(attr_value)
379 entry[attr_type]=[attr_value]
381 # Read the next line within an entry
382 attr_type,attr_value = self._parseAttrTypeandValue()
385 # append entry to result list
386 self.handle(dn,entry)
387 self.records_read = self.records_read+1
392 class LDIFRecordList(LDIFParser):
394 Collect all records of LDIF input into a single list.
395 of 2-tuples (dn,entry). It can be a memory hog!
401 ignored_attr_types=None,max_entries=0,process_url_schemes=None
404 See LDIFParser.__init__()
406 Additional Parameters:
408 List instance for storing parsed records
410 LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
411 self.all_records = []
413 def handle(self,dn,entry):
415 Append single record to dictionary of all records.
417 self.all_records.append((dn,entry))
420 class LDIFCopy(LDIFParser):
422 Copy LDIF input to LDIF output containing all data retrieved
428 input_file,output_file,
429 ignored_attr_types=None,max_entries=0,process_url_schemes=None,
430 base64_attrs=None,cols=76,line_sep='\n'
433 See LDIFParser.__init__() and LDIFWriter.__init__()
435 LDIFParser.__init__(self,input_file,ignored_attr_types,max_entries,process_url_schemes)
436 self._output_ldif = LDIFWriter(output_file,base64_attrs,cols,line_sep)
438 def handle(self,dn,entry):
440 Write single LDIF record to output file.
442 self._output_ldif.unparse(dn,entry)
445 def ParseLDIF(f,ignore_attrs=None,maxentries=0):
447 Parse LDIF records read from file.
448 This is a compability function. Use is deprecated!
450 ldif_parser = LDIFRecordList(
451 f,ignored_attr_types=ignore_attrs,max_entries=maxentries,process_url_schemes=0
454 return ldif_parser.all_records