4 use vars qw($VERSION $AUTOLOAD);
10 my $CRLF = "\015\012"; # "\r\n" is not portable
11 $HTTP::URI_CLASS ||= $ENV{PERL_HTTP_URI_CLASS} || "URI";
12 eval "require $HTTP::URI_CLASS"; die $@ if $@;
14 *_utf8_downgrade = defined(&utf8::downgrade) ?
16 utf8::downgrade($_[0], 1) or
17 Carp::croak("HTTP::Message content must be bytes")
25 my($class, $header, $content) = @_;
26 if (defined $header) {
27 Carp::croak("Bad header argument") unless ref $header;
28 if (ref($header) eq "ARRAY") {
29 $header = HTTP::Headers->new(@$header);
32 $header = $header->clone;
36 $header = HTTP::Headers->new;
38 if (defined $content) {
39 _utf8_downgrade($content);
46 '_headers' => $header,
47 '_content' => $content,
54 my($class, $str) = @_;
58 if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
62 elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
72 new($class, \@hdr, $str);
79 my $clone = HTTP::Message->new($self->headers,
81 $clone->protocol($self->protocol);
88 $self->{_headers}->clear;
90 delete $self->{_parts};
95 sub protocol { shift->_elem('_protocol', @_); }
100 if (defined(wantarray)) {
101 $self->_content unless exists $self->{_content};
102 my $old = $self->{_content};
103 $old = $$old if ref($old) eq "SCALAR";
104 &_set_content if @_ > 1;
112 Carp::carp("Useless content call in void context") if $^W;
118 _utf8_downgrade($_[1]);
119 if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
120 ${$self->{_content}} = $_[1];
123 die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
124 $self->{_content} = $_[1];
125 delete $self->{_content_ref};
127 delete $self->{_parts} unless $_[2];
134 $self->_content unless exists $self->{_content};
135 my $chunkref = \$_[0];
136 $chunkref = $$chunkref if ref($$chunkref); # legacy
138 _utf8_downgrade($$chunkref);
140 my $ref = ref($self->{_content});
142 $self->{_content} .= $$chunkref;
144 elsif ($ref eq "SCALAR") {
145 ${$self->{_content}} .= $$chunkref;
148 Carp::croak("Can't append to $ref content");
150 delete $self->{_parts};
153 sub add_content_utf8 {
154 my($self, $buf) = @_;
157 $self->add_content($buf);
163 $self->_content unless exists $self->{_content};
164 delete $self->{_parts};
165 my $old = \$self->{_content};
166 my $old_cref = $self->{_content_ref};
169 Carp::croak("Setting content_ref to a non-ref") unless ref($new);
170 delete $self->{_content}; # avoid modifying $$old
171 $self->{_content} = $new;
172 $self->{_content_ref}++;
174 $old = $$old if $old_cref;
181 my($self, %opt) = @_;
183 my $content_ref_iscopy;
187 require HTTP::Headers::Util;
189 if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
190 ($ct, undef, %ct_param) = @{$ct[-1]};
193 die "Can't decode multipart content" if $ct =~ m,^multipart/,;
196 $content_ref = $self->content_ref;
197 die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
199 if (my $h = $self->header("Content-Encoding")) {
202 for my $ce (reverse split(/\s*,\s*/, lc($h))) {
203 next unless $ce || $ce eq "identity";
204 if ($ce eq "gzip" || $ce eq "x-gzip") {
205 require Compress::Zlib;
206 unless ($content_ref_iscopy) {
207 # memGunzip is documented to destroy its buffer argument
208 my $copy = $$content_ref;
209 $content_ref = \$copy;
210 $content_ref_iscopy++;
212 $content_ref = \Compress::Zlib::memGunzip($$content_ref);
213 die "Can't gunzip content" unless defined $$content_ref;
215 elsif ($ce eq "x-bzip2") {
216 require Compress::Bzip2;
217 $content_ref = Compress::Bzip2::decompress($$content_ref);
218 die "Can't bunzip content" unless defined $$content_ref;
219 $content_ref_iscopy++;
221 elsif ($ce eq "deflate") {
222 require Compress::Zlib;
223 my $out = Compress::Zlib::uncompress($$content_ref);
224 unless (defined $out) {
225 # "Content-Encoding: deflate" is supposed to mean the "zlib"
226 # format of RFC 1950, but Microsoft got that wrong, so some
227 # servers sends the raw compressed "deflate" data. This
228 # tries to inflate this format.
229 unless ($content_ref_iscopy) {
230 # the $i->inflate method is documented to destroy its
232 my $copy = $$content_ref;
233 $content_ref = \$copy;
234 $content_ref_iscopy++;
237 my($i, $status) = Compress::Zlib::inflateInit(
238 WindowBits => -Compress::Zlib::MAX_WBITS(),
240 my $OK = Compress::Zlib::Z_OK();
241 die "Can't init inflate object" unless $i && $status == $OK;
242 ($out, $status) = $i->inflate($content_ref);
243 if ($status != Compress::Zlib::Z_STREAM_END()) {
244 if ($status == $OK) {
245 $self->push_header("Client-Warning" =>
246 "Content might be truncated; incomplete deflate stream");
249 # something went bad, can't trust $out any more
254 die "Can't inflate content" unless defined $out;
255 $content_ref = \$out;
256 $content_ref_iscopy++;
258 elsif ($ce eq "compress" || $ce eq "x-compress") {
259 die "Can't uncompress content";
261 elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
262 require MIME::Base64;
263 $content_ref = \MIME::Base64::decode($$content_ref);
264 $content_ref_iscopy++;
266 elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
267 require MIME::QuotedPrint;
268 $content_ref = \MIME::QuotedPrint::decode($$content_ref);
269 $content_ref_iscopy++;
272 die "Don't know how to decode Content-Encoding '$ce'";
277 if ($ct && $ct =~ m,^text/,,) {
278 my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
279 $charset = lc($charset);
280 if ($charset ne "none") {
282 if (do{my $v = $Encode::VERSION; $v =~ s/_//g; $v} < 2.0901 &&
283 !$content_ref_iscopy)
285 # LEAVE_SRC did not work before Encode-2.0901
286 my $copy = $$content_ref;
287 $content_ref = \$copy;
288 $content_ref_iscopy++;
290 $content_ref = \Encode::decode($charset, $$content_ref,
291 ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
296 Carp::croak($@) if $opt{raise_error};
300 return $opt{ref} ? $content_ref : $$content_ref;
306 my($self, $eol) = @_;
307 $eol = "\n" unless defined $eol;
309 # The calculation of content might update the headers
310 # so we need to do that first.
311 my $content = $self->content;
313 return join("", $self->{'_headers'}->as_string($eol),
316 (@_ == 1 && length($content) &&
317 $content !~ /\n\z/) ? "\n" : "",
322 sub headers { shift->{'_headers'}; }
323 sub headers_as_string { shift->{'_headers'}->as_string(@_); }
328 if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
331 my $old = $self->{_parts};
333 my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
334 my $ct = $self->content_type || "";
335 if ($ct =~ m,^message/,) {
336 Carp::croak("Only one part allowed for $ct content")
339 elsif ($ct !~ m,^multipart/,) {
340 $self->remove_content_headers;
341 $self->content_type("multipart/mixed");
343 $self->{_parts} = \@parts;
344 _stale_content($self);
346 return @$old if wantarray;
352 if (($self->content_type || "") !~ m,^multipart/,) {
353 my $p = HTTP::Message->new($self->remove_content_headers,
355 $self->content_type("multipart/mixed");
356 $self->{_parts} = [$p];
358 elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
362 push(@{$self->{_parts}}, @_);
363 _stale_content($self);
369 if (ref($self->{_content}) eq "SCALAR") {
370 # must recalculate now
374 # just invalidate cache
375 delete $self->{_content};
376 delete $self->{_content_ref};
381 # delegate all other method calls the the _headers object.
384 my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
385 return if $method eq "DESTROY";
387 # We create the function here so that it will not need to be
388 # autoloaded the next time.
390 *$method = eval "sub { shift->{'_headers'}->$method(\@_) }";
395 # Private method to access members in %$self
400 my $old = $self->{$elem};
401 $self->{$elem} = $_[0] if @_;
406 # Create private _parts attribute from current _content
409 my $ct = $self->content_type;
410 if ($ct =~ m,^multipart/,) {
411 require HTTP::Headers::Util;
412 my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
413 die "Assert" unless @h;
415 if (defined(my $b = $h{boundary})) {
416 my $str = $self->content;
417 $str =~ s/\r?\n--\Q$b\E--\r?\n.*//s;
418 if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
419 $self->{_parts} = [map HTTP::Message->parse($_),
420 split(/\r?\n--\Q$b\E\r?\n/, $str)]
424 elsif ($ct eq "message/http") {
425 require HTTP::Request;
426 require HTTP::Response;
427 my $content = $self->content;
428 my $class = ($content =~ m,^(HTTP/.*)\n,) ?
429 "HTTP::Response" : "HTTP::Request";
430 $self->{_parts} = [$class->parse($content)];
432 elsif ($ct =~ m,^message/,) {
433 $self->{_parts} = [ HTTP::Message->parse($self->content) ];
436 $self->{_parts} ||= [];
440 # Create private _content attribute from current _parts
443 my $ct = $self->header("Content-Type") || "multipart/mixed";
444 if ($ct =~ m,^\s*message/,i) {
445 _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
449 require HTTP::Headers::Util;
450 my @v = HTTP::Headers::Util::split_header_words($ct);
451 Carp::carp("Multiple Content-Type headers") if @v > 1;
456 for (my @tmp = @v; @tmp;) {
457 my($k, $v) = splice(@tmp, 0, 2);
458 if (lc($k) eq "boundary") {
460 $boundary_index = @v - @tmp - 1;
465 my @parts = map $_->as_string($CRLF), @{$self->{_parts}};
468 $boundary = _boundary() unless defined $boundary;
472 if (index($_, $boundary) >= 0) {
473 # must have a better boundary
474 $boundary = _boundary(++$bno);
480 if ($boundary_index) {
481 $v[$boundary_index] = $boundary;
484 push(@v, boundary => $boundary);
487 $ct = HTTP::Headers::Util::join_header_words(@v);
488 $self->header("Content-Type", $ct);
490 _set_content($self, "--$boundary$CRLF" .
491 join("$CRLF--$boundary$CRLF", @parts) .
492 "$CRLF--$boundary--$CRLF",
499 my $size = shift || return "xYzZY";
500 require MIME::Base64;
501 my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
502 $b =~ s/[\W]/X/g; # ensure alnum only
514 HTTP::Message - HTTP style message (base class)
518 use base 'HTTP::Message';
522 An C<HTTP::Message> object contains some headers and a content body.
523 The following methods are available:
527 =item $mess = HTTP::Message->new
529 =item $mess = HTTP::Message->new( $headers )
531 =item $mess = HTTP::Message->new( $headers, $content )
533 This constructs a new message object. Normally you would want
534 construct C<HTTP::Request> or C<HTTP::Response> objects instead.
536 The optional $header argument should be a reference to an
537 C<HTTP::Headers> object or a plain array reference of key/value pairs.
538 If an C<HTTP::Headers> object is provided then a copy of it will be
539 embedded into the constructed message, i.e. it will not be owned and
540 can be modified afterwards without affecting the message.
542 The optional $content argument should be a string of bytes.
544 =item $mess = HTTP::Message->parse( $str )
546 This constructs a new message object by parsing the given string.
550 Returns the embedded C<HTTP::Headers> object.
552 =item $mess->headers_as_string
554 =item $mess->headers_as_string( $eol )
556 Call the as_string() method for the headers in the
557 message. This will be the same as
559 $mess->headers->as_string
561 but it will make your program a whole character shorter :-)
565 =item $mess->content( $bytes )
567 The content() method sets the raw content if an argument is given. If no
568 argument is given the content is not touched. In either case the
569 original raw content is returned.
571 Note that the content should be a string of bytes. Strings in perl
572 can contain characters outside the range of a byte. The C<Encode>
573 module can be used to turn such strings into a string of bytes.
575 =item $mess->add_content( $bytes )
577 The add_content() methods appends more data bytes to the end of the
578 current content buffer.
580 =item $mess->add_content_utf8( $string )
582 The add_content_utf8() method appends the UTF-8 bytes representing the
583 string to the end of the current content buffer.
585 =item $mess->content_ref
587 =item $mess->content_ref( \$bytes )
589 The content_ref() method will return a reference to content buffer string.
590 It can be more efficient to access the content this way if the content
591 is huge, and it can even be used for direct manipulation of the content,
594 ${$res->content_ref} =~ s/\bfoo\b/bar/g;
596 This example would modify the content buffer in-place.
598 If an argument is passed it will setup the content to reference some
599 external source. The content() and add_content() methods
600 will automatically dereference scalar references passed this way. For
601 other references content() will return the reference itself and
602 add_content() will refuse to do anything.
604 =item $mess->decoded_content( %options )
606 Returns the content with any C<Content-Encoding> undone and the raw
607 content encoded to perl's Unicode strings. If the C<Content-Encoding>
608 or C<charset> of the message is unknown this method will fail by
611 The following options can be specified.
617 This override the charset parameter for text content. The value
618 C<none> can used to suppress decoding of the charset.
620 =item C<default_charset>
622 This override the default charset of "ISO-8859-1".
624 =item C<charset_strict>
626 Abort decoding if malformed characters is found in the content. By
627 default you get the substitution character ("\x{FFFD}") in place of
628 malformed characters.
632 If TRUE then raise an exception if not able to decode content. Reason
633 might be that the specified C<Content-Encoding> or C<charset> is not
634 supported. If this option is FALSE, then decoded_content() will return
635 C<undef> on errors, but will still set $@.
639 If TRUE then a reference to decoded content is returned. This might
640 be more efficient in cases where the decoded content is identical to
641 the raw content as no data copying is required in this case.
647 =item $mess->parts( @parts )
649 =item $mess->parts( \@parts )
651 Messages can be composite, i.e. contain other messages. The composite
652 messages have a content type of C<multipart/*> or C<message/*>. This
653 method give access to the contained messages.
655 The argumentless form will return a list of C<HTTP::Message> objects.
656 If the content type of $msg is not C<multipart/*> or C<message/*> then
657 this will return the empty list. In scalar context only the first
658 object is returned. The returned message parts should be regarded as
659 are read only (future versions of this library might make it possible
660 to modify the parent by modifying the parts).
662 If the content type of $msg is C<message/*> then there will only be
665 If the content type is C<message/http>, then the return value will be
666 either an C<HTTP::Request> or an C<HTTP::Response> object.
668 If an @parts argument is given, then the content of the message will be
669 modified. The array reference form is provided so that an empty list
670 can be provided. The @parts array should contain C<HTTP::Message>
671 objects. The @parts objects are owned by $mess after this call and
672 should not be modified or made part of other messages.
674 When updating the message with this method and the old content type of
675 $mess is not C<multipart/*> or C<message/*>, then the content type is
676 set to C<multipart/mixed> and all other content headers are cleared.
678 This method will croak if the content type is C<message/*> and more
679 than one part is provided.
681 =item $mess->add_part( $part )
683 This will add a part to a message. The $part argument should be
684 another C<HTTP::Message> object. If the previous content type of
685 $mess is not C<multipart/*> then the old content (together with all
686 content headers) will be made part #1 and the content type made
687 C<multipart/mixed> before the new part is added. The $part object is
688 owned by $mess after this call and should not be modified or made part
691 There is no return value.
695 Will clear the headers and set the content to the empty string. There
698 =item $mess->protocol
700 =item $mess->protocol( $proto )
702 Sets the HTTP protocol used for the message. The protocol() is a string
703 like C<HTTP/1.0> or C<HTTP/1.1>.
707 Returns a copy of the message object.
709 =item $mess->as_string
711 =item $mess->as_string( $eol )
713 Returns the message formatted as a single string.
715 The optional $eol parameter specifies the line ending sequence to use.
716 The default is "\n". If no $eol is given then as_string will ensure
717 that the returned string is newline terminated (even when the message
718 content is not). No extra newline is appended if an explicit $eol is
723 All methods unknown to C<HTTP::Message> itself are delegated to the
724 C<HTTP::Headers> object that is part of every message. This allows
725 convenient access to these methods. Refer to L<HTTP::Headers> for
726 details of these methods:
728 $mess->header( $field => $val )
729 $mess->push_header( $field => $val )
730 $mess->init_header( $field => $val )
731 $mess->remove_header( $field )
732 $mess->remove_content_headers
733 $mess->header_field_names
734 $mess->scan( \&doit )
738 $mess->if_modified_since
739 $mess->if_unmodified_since
742 $mess->content_encoding
743 $mess->content_length
744 $mess->content_language
750 $mess->www_authenticate
752 $mess->proxy_authorization
753 $mess->authorization_basic
754 $mess->proxy_authorization_basic
758 Copyright 1995-2004 Gisle Aas.
760 This library is free software; you can redistribute it and/or
761 modify it under the same terms as Perl itself.