8 # Try to load our external libraries, but fail gracefully.
10 require Frontier::Client;
15 This script requires Ken MacLeod\'s Frontier::RPC2 module. You can get this
16 from CPAN or from his website at http://bitsko.slc.ut.us/~ken/xml-rpc/ .
18 For installation instructions, see the XML-RPC HOWTO at:
19 http://www.linuxdoc.org/HOWTO/XML-RPC-HOWTO/index.html
21 This script also requires MIME::Base64. You can get this from CPAN.
26 # Parse our command-line arguments.
28 print STDERR "Usage: binmode-rpc2xml-rpc < data.binmode > data.xml\n";
32 # Perform our I/O in binary mode (hence the name of the protocol).
33 binmode STDIN; # Because we're reading raw binary data.
34 binmode STDOUT; # Because we want our XML left unmolested.
36 # Just suck all our input into one string and glom it together.
37 my $binmode_data = join('', <STDIN>);
39 # Check for the mandatory header.
40 unless ($binmode_data =~ /^binmode-rpc:/) {
41 die "$0: No 'binmode-rpc:' header present, stopping";
44 # Set our decoding-position counter to point just past the header, and
45 # our end pointer to just beyond the end of the entire message.
46 my $position = length('binmode-rpc:');
47 my $end = length($binmode_data);
49 # Set our starting output indentation to zero (for the pretty-printer).
52 # Build an empty codebook of strings.
55 # Begin the hard work.
56 decode_call_or_response();
58 # Print a warning if there's leftover data.
59 if ($position != $end) {
60 printf STDERR "binmode-rpc2xml-rpc: warning: Trailing data ignored\n";
67 #--------------------------------------------------------------------------
69 #--------------------------------------------------------------------------
71 sub escape_string ($) {
73 $string =~ s/&/&/g;
74 $string =~ s/</</g;
75 $string =~ s/\"/"/g;
79 sub push_indentation_level () {
83 sub pop_indentation_level () {
87 sub print_xml_line ($) {
89 print STDOUT (' ' x $indentation) . $xml . $crlf;
93 #--------------------------------------------------------------------------
95 #--------------------------------------------------------------------------
96 # These routines read raw input from our string, advance the current
97 # position, and return something in a Perl-friendly format.
99 # This is all icky binary I/O using Perl's built-in unpack function.
102 die "Unexpected end of input" unless ($position + 1 <= $end);
103 my $byte = unpack('C', substr($binmode_data, $position, 1));
105 die "Weird error decoding byte" unless (defined $byte);
109 sub peek_character () {
110 die "Unexpected end of input" unless ($position + 1 <= $end);
111 my $byte = chr(unpack('c', substr($binmode_data, $position, 1)));
112 die "Weird error decoding character" unless (defined $byte);
116 sub read_character () {
117 my $byte = peek_character();
122 sub read_unsigned_lsb () {
123 die "Unexpected end of input" unless ($position + 4 <= $end);
124 my $integer = unpack('V', substr($binmode_data, $position, 4));
126 die "Weird error decoding integer" unless (defined $integer);
127 unless ($integer >= 0) {
128 die "Perl can't handle 32-bit unsigned integers portably, stopping";
133 sub read_signed_lsb () {
134 die "Unexpected end of input" unless ($position + 4 <= $end);
135 my $integer = unpack('V', substr($binmode_data, $position, 4));
137 die "Weird error decoding integer" unless (defined $integer);
143 die "Unexpected end of input" unless ($position + $length <= $end);
144 my $data = unpack("a$length", substr($binmode_data, $position, $length));
145 $position += $length;
146 die "Weird error decoding data" unless (defined $data);
147 die "Wrong data length" unless (length($data) == $length);
151 sub read_data_w_byte_length () {
152 my $length = read_byte();
153 return read_data($length);
156 sub read_data_w_unsigned_lsb_length () {
157 my $length = read_unsigned_lsb();
158 return read_data($length)
161 sub read_string_data () {
162 my $string = read_data_w_unsigned_lsb_length();
163 validate_utf8($string);
168 #--------------------------------------------------------------------------
169 # High-level input routines
170 #--------------------------------------------------------------------------
171 # These use the low-level input routines to read data from the buffer,
172 # and then convert it into Frontier::RPC2 objects.
175 my $type = read_character();
176 #print STDERR "DEBUG: Reading from '$type'\n";
178 return _read_int_value();
179 } elsif ($type eq 't') {
180 return Frontier::RPC2::Boolean->new(1);
181 } elsif ($type eq 'f') {
182 return Frontier::RPC2::Boolean->new(0);
183 } elsif ($type eq 'D') {
184 return _read_double_value();
185 } elsif ($type eq '8') {
186 return _read_dateTime_value();
187 } elsif ($type eq 'B') {
188 return _read_base64_value();
189 } elsif ($type eq 'A') {
190 return _read_array_value();
191 } elsif ($type eq 'S') {
192 return _read_struct_value();
193 } elsif ($type eq 'U') {
194 return _read_regular_string_value();
195 } elsif ($type eq '>') {
196 return _read_recorded_string_value();
197 } elsif ($type eq '<') {
198 return _read_recalled_string_value();
199 } elsif ($type eq 'O') {
200 die "Type 'O' Binmode RPC data not supported";
202 die "Type '$type' Binmode RPC data does not exist";
206 sub read_value_and_typecheck ($) {
207 my ($wanted_type) = @_;
208 my $value = read_value();
209 my $value_type = ref($value);
210 die "$0: Expected $wanted_type, got $value_type, stopping"
211 unless ($wanted_type eq $value_type);
215 sub _read_int_value () {
216 return Frontier::RPC2::Integer->new(read_signed_lsb);
219 sub _read_double_value () {
220 return Frontier::RPC2::Double->new(read_data_w_byte_length);
223 sub _read_dateTime_value () {
224 return Frontier::RPC2::DateTime::ISO8601->new(read_data_w_byte_length);
227 sub _read_base64_value () {
228 my $binary = read_data_w_unsigned_lsb_length;
229 my $encoded = MIME::Base64::encode_base64($binary, $crlf);
230 return Frontier::RPC2::Base64->new($encoded);
233 sub _read_array_value () {
234 my $size = read_unsigned_lsb;
236 for (my $i = 0; $i < $size; $i++) {
237 push @values, read_value;
242 sub _read_struct_value () {
243 my $size = read_unsigned_lsb;
245 for (my $i = 0; $i < $size; $i++) {
246 my $key = read_value_and_typecheck('Frontier::RPC2::String');
247 $struct{$key->value} = read_value;
252 sub _read_regular_string_value () {
253 return Frontier::RPC2::String->new(read_string_data);
256 sub _read_recorded_string_value () {
257 my $codebook_entry = read_byte;
258 my $string = Frontier::RPC2::String->new(read_string_data);
259 $codebook[$codebook_entry] = $string;
263 sub _read_recalled_string_value () {
264 my $codebook_entry = read_byte;
265 my $string = $codebook[$codebook_entry];
266 unless (defined $string) {
267 die "$0: Attempted to use undefined codebook position $codebook_entry";
273 #--------------------------------------------------------------------------
274 # High-level output routines
275 #--------------------------------------------------------------------------
276 # We don't use Frontier::RPC2's output routines, because we're looking
277 # for maximum readability. This is a debugging tool, after all.
279 sub print_xml_header () {
280 print_xml_line '<?xml version="1.0" encoding="UTF-8"?>';
283 sub get_escaped_string ($) {
285 return escape_string($value->value);
288 sub print_simple_value ($$) {
289 my ($tag, $value) = @_;
290 my $string = get_escaped_string($value);
291 print_xml_line "<value><$tag>$string</$tag></value>";
294 sub print_value ($) {
296 my $type = ref($value);
297 if ($type eq 'Frontier::RPC2::Integer') {
298 print_simple_value("int", $value);
299 } elsif ($type eq 'Frontier::RPC2::Double') {
300 print_simple_value("double", $value);
301 } elsif ($type eq 'Frontier::RPC2::Boolean') {
302 print_simple_value("boolean", $value);
303 } elsif ($type eq 'Frontier::RPC2::String') {
304 print_simple_value("string", $value);
305 } elsif ($type eq 'Frontier::RPC2::DateTime::ISO8601') {
306 print_simple_value("dateTime.iso8601", $value);
307 } elsif ($type eq 'Frontier::RPC2::Base64') {
308 print_base64_data($value);
309 } elsif ($type eq 'ARRAY') {
310 print_array_value($value);
311 } elsif ($type eq 'HASH') {
312 print_struct_value($value);
314 die "Unxpected type '$type', stopping";
318 sub print_params ($) {
321 die "Wanted array" unless (ref($params) eq 'ARRAY');
323 print_xml_line '<params>';
324 push_indentation_level;
326 foreach my $item (@$params) {
327 print_xml_line '<param>';
328 push_indentation_level;
330 pop_indentation_level;
331 print_xml_line '</param>';
334 pop_indentation_level;
335 print_xml_line '</params>';
338 sub print_base64_data ($) {
340 print_xml_line '<value>';
341 push_indentation_level;
342 print_xml_line '<base64>';
344 print_xml_line '</base64>';
345 pop_indentation_level;
346 print_xml_line '</value>';
349 sub print_array_value ($) {
352 print_xml_line '<value>';
353 push_indentation_level;
354 print_xml_line '<array>';
355 push_indentation_level;
356 print_xml_line '<data>';
357 push_indentation_level;
359 foreach my $item (@$array) {
363 pop_indentation_level;
364 print_xml_line '</data>';
365 pop_indentation_level;
366 print_xml_line '</array>';
367 pop_indentation_level;
368 print_xml_line '</value>';
371 sub print_struct_value ($) {
374 print_xml_line '<value>';
375 push_indentation_level;
376 print_xml_line '<struct>';
377 push_indentation_level;
379 for my $key (keys %$struct) {
380 print_xml_line '<member>';
381 push_indentation_level;
383 my $name = escape_string($key);
384 print_xml_line "<name>$name</name>";
385 print_value($struct->{$key});
387 pop_indentation_level;
388 print_xml_line '</member>';
391 pop_indentation_level;
392 print_xml_line '</struct>';
393 pop_indentation_level;
394 print_xml_line '</value>';
398 #--------------------------------------------------------------------------
399 # High-level decoder routines
400 #--------------------------------------------------------------------------
401 # These routines convert Binmode RPC data into the corresponding XML-RPC
404 sub decode_call_or_response () {
405 my $type = read_character();
408 } elsif ($type eq 'R') {
411 die "$0: Unknown binmode-rpc request type '$type', stopping";
416 my $namevalue = read_value_and_typecheck('Frontier::RPC2::String');
417 my $params = read_value_and_typecheck('ARRAY');
420 print_xml_line '<methodCall>';
421 push_indentation_level;
423 my $name = get_escaped_string($namevalue);
424 print_xml_line "<methodName>$name</methodName>";
426 print_params($params);
428 pop_indentation_level;
429 print_xml_line '</methodCall>';
432 sub decode_response () {
433 my $maybe_fault = peek_character;
434 if ($maybe_fault eq 'F') {
436 my $fault = read_value_and_typecheck('HASH');
439 print_xml_line '<methodResponse>';
440 push_indentation_level;
441 print_xml_line '<fault>';
442 push_indentation_level;
446 pop_indentation_level;
447 print_xml_line '</fault>';
448 pop_indentation_level;
449 print_xml_line '</methodResponse>';
451 my $value = read_value;
453 print_xml_line '<methodResponse>';
454 push_indentation_level;
455 print_params [$value];
456 pop_indentation_level;
457 print_xml_line '</methodResponse>';
462 #--------------------------------------------------------------------------
464 #--------------------------------------------------------------------------
465 # This is based on the UTF-8 section of the Secure Programs HOWTO.
466 # http://new.linuxnow.com/docs/content/HOWTO/Secure-Programs-HOWTO/
467 # This code *hasn't* been stress-tested for correctness yet; please see:
468 # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
469 # This is not yet good enough to be used as part of a UTF-8 decoder or
470 # security validator, but it's OK to make sure nobody is sending Latin-1.
474 use vars qw{@illegal_initial_bytes @sequence_length_info};
476 # Bytes are represented as data/mask pairs.
477 @illegal_initial_bytes =
478 (# 10xxxxxx illegal as initial byte of char (80..BF)
480 # 1100000x illegal, overlong (C0..C1 80..BF)
482 # 11100000 100xxxxx illegal, overlong (E0 80..9F)
483 [0xE0, 0xFF, 0x80, 0xE0],
484 # 11110000 1000xxxx illegal, overlong (F0 80..8F)
485 [0xF0, 0xFF, 0x80, 0xF0],
486 # 11111000 10000xxx illegal, overlong (F8 80..87)
487 [0xF8, 0xFF, 0x80, 0xF8],
488 # 11111100 100000xx illegal, overlong (FC 80..83)
489 [0xFC, 0xFF, 0x80, 0xFC],
490 # 1111111x illegal; prohibited by spec
493 # Items are byte, mask, sequence length.
494 @sequence_length_info =
497 # 1110xxxx 10xxxxxx 10xxxxxx
499 # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
501 # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
503 # 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
507 sub validate_utf8 ($) {
509 my $end = length($string);
513 my $byte = ord(substr($string, $i, 1));
514 #print STDERR "Checking byte $byte\n";
516 # Check for illegal bytes at the start of this sequence.
518 foreach my $illegal_byte_info (@illegal_initial_bytes) {
520 for (my $j = 0; $j < @$illegal_byte_info; $j += 2) {
521 my $pattern = $illegal_byte_info->[$j];
522 my $mask = $illegal_byte_info->[$j+1];
523 my $data = ord(substr($string, $i+$offset, 1));
524 #print STDERR " B: $byte P: $pattern M: $mask D: $data\n";
525 next NEXT_CANDIDATE unless ($data & $mask) == $pattern;
528 die "Illegal UTF-8 sequence (" . substr($string, $i, 2) . ")";
531 # Find the length of the sequence, and make sure we have enough data.
533 foreach my $length_info (@sequence_length_info) {
534 my ($pattern, $mask, $length_candidate) = @$length_info;
535 if (($byte & $mask) == $pattern) {
536 $length = $length_candidate;
540 die "$0: Unexpected end of UTF-8 sequence, stopping"
541 unless $i + $length <= $end;
543 # Verify the sequence is well-formed.
545 while ($length > 0) {
546 die "$0: Malformed UTF-8 sequence, stopping"
547 unless (ord(substr($string, $i, 1)) & 0xC0) == 0x80;
551 #printf STDERR "DEBUG: Verified $i bytes\n";