# $Id: Daemon.pm,v 1.25 2001/08/07 19:32:40 gisle Exp $ # use strict; package HTTP::Daemon; =head1 NAME HTTP::Daemon - a simple http server class =head1 SYNOPSIS use HTTP::Daemon; use HTTP::Status; my $d = HTTP::Daemon->new || die; print "Please contact me at: url, ">\n"; while (my $c = $d->accept) { while (my $r = $c->get_request) { if ($r->method eq 'GET' and $r->url->path eq "/xyzzy") { # remember, this is *not* recommened practice :-) $c->send_file_response("/etc/passwd"); } else { $c->send_error(RC_FORBIDDEN) } } $c->close; undef($c); } =head1 DESCRIPTION Instances of the I class are HTTP/1.1 servers that listen on a socket for incoming requests. The I is a sub-class of I, so you can perform socket operations directly on it too. The accept() method will return when a connection from a client is available. In a scalar context the returned value will be a reference to a object of the I class which is another I subclass. In a list context a two-element array is returned containing the new I reference and the peer address; the list will be empty upon failure. Calling the get_request() method on the I object will read data from the client and return an I object reference. This HTTP daemon does not fork(2) for you. Your application, i.e. the user of the I is reponsible for forking if that is desirable. Also note that the user is responsible for generating responses that conform to the HTTP/1.1 protocol. The I class provides some methods that make this easier. =head1 METHODS The following is a list of methods that are new (or enhanced) relative to the I base class. =over 4 =cut use vars qw($VERSION @ISA $PROTO $DEBUG); $VERSION = sprintf("%d.%02d", q$Revision: 1.25 $ =~ /(\d+)\.(\d+)/); use IO::Socket qw(AF_INET INADDR_ANY inet_ntoa); @ISA=qw(IO::Socket::INET); $PROTO = "HTTP/1.1"; =item $d = new HTTP::Daemon The constructor takes the same parameters as the I constructor. It can also be called without specifying any parameters. The daemon will then set up a listen queue of 5 connections and allocate some random port number. A server that wants to bind to some specific address on the standard HTTP port will be constructed like this: $d = new HTTP::Daemon LocalAddr => 'www.someplace.com', LocalPort => 80; =cut sub new { my($class, %args) = @_; $args{Listen} ||= 5; $args{Proto} ||= 'tcp'; return $class->SUPER::new(%args); } =item $c = $d->accept([$pkg]) This method is the same as I but returns an I reference by default. It returns undef if you specify a timeout and no connection is made within that time. In a scalar context the returned value will be a reference to a object of the I class which is another I subclass. In a list context a two-element array is returned containing the new I reference and the peer address; the list will be empty upon failure. =cut sub accept { my $self = shift; my $pkg = shift || "HTTP::Daemon::ClientConn"; my ($sock, $peer) = $self->SUPER::accept($pkg); if ($sock) { ${*$sock}{'httpd_daemon'} = $self; return wantarray ? ($sock, $peer) : $sock; } else { return; } } =item $d->url Returns a URL string that can be used to access the server root. =cut sub url { my $self = shift; my $url = "http://"; my $addr = $self->sockaddr; if ($addr eq INADDR_ANY) { require Sys::Hostname; $url .= lc Sys::Hostname::hostname(); } else { $url .= gethostbyaddr($addr, AF_INET) || inet_ntoa($addr); } my $port = $self->sockport; $url .= ":$port" if $port != 80; $url .= "/"; $url; } =item $d->product_tokens Returns the name that this server will use to identify itself. This is the string that is sent with the I response header. The main reason to have this method is that subclasses can override it if they want to use another product name. =cut sub product_tokens { "libwww-perl-daemon/$HTTP::Daemon::VERSION"; } package HTTP::Daemon::ClientConn; use vars qw(@ISA $DEBUG); use IO::Socket (); @ISA=qw(IO::Socket::INET); *DEBUG = \$HTTP::Daemon::DEBUG; use HTTP::Request (); use HTTP::Response (); use HTTP::Status; use HTTP::Date qw(time2str); use LWP::MediaTypes qw(guess_media_type); use Carp (); my $CRLF = "\015\012"; # "\r\n" is not portable my $HTTP_1_0 = _http_version("HTTP/1.0"); my $HTTP_1_1 = _http_version("HTTP/1.1"); =back The I is also a I subclass. Instances of this class are returned by the accept() method of I. The following additional methods are provided: =over 4 =item $c->get_request([$headers_only]) Read data from the client and turn it into an I object which is then returned. It returns C if reading of the request fails. If it fails, then the I object ($c) should be discarded, and you should not call this method again. The $c->reason method might give you some information about why $c->get_request returned C. The $c->get_request method supports HTTP/1.1 request content bodies, including I transfer encoding with footer and self delimiting I content types. The $c->get_request method will normally not return until the whole request has been received from the client. This might not be what you want if the request is an upload of a multi-mega-byte file (and with chunked transfer encoding HTTP can even support infinite request messages - uploading live audio for instance). If you pass a TRUE value as the $headers_only argument, then $c->get_request will return immediately after parsing the request headers and you are responsible for reading the rest of the request content. If you are going to call $c->get_request again on the same connection you better read the correct number of bytes. =cut sub get_request { my($self, $only_headers) = @_; if (${*$self}{'httpd_nomore'}) { $self->reason("No more requests from this connection"); return; } $self->reason(""); my $buf = ${*$self}{'httpd_rbuf'}; $buf = "" unless defined $buf; my $timeout = $ {*$self}{'io_socket_timeout'}; my $fdset = ""; vec($fdset, $self->fileno, 1) = 1; local($_); READ_HEADER: while (1) { # loop until we have the whole header in $buf $buf =~ s/^(?:\015?\012)+//; # ignore leading blank lines if ($buf =~ /\012/) { # potential, has at least one line if ($buf =~ /^\w+[^\012]+HTTP\/\d+\.\d+\015?\012/) { if ($buf =~ /\015?\012\015?\012/) { last READ_HEADER; # we have it } elsif (length($buf) > 16*1024) { $self->send_error(413); # REQUEST_ENTITY_TOO_LARGE $self->reason("Very long header"); return; } } else { last READ_HEADER; # HTTP/0.9 client } } elsif (length($buf) > 16*1024) { $self->send_error(414); # REQUEST_URI_TOO_LARGE $self->reason("Very long first line"); return; } print STDERR "Need more data for complete header\n" if $DEBUG; return unless $self->_need_more($buf, $timeout, $fdset); } if ($buf !~ s/^(\S+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) { ${*$self}{'httpd_client_proto'} = _http_version("HTTP/1.0"); $self->send_error(400); # BAD_REQUEST $self->reason("Bad request line: $buf"); return; } my $method = $1; my $uri = $2; my $proto = $3 || "HTTP/0.9"; $uri = "http://$uri" if $method eq "CONNECT"; $uri = $HTTP::URI_CLASS->new($uri, $self->daemon->url); my $r = HTTP::Request->new($method, $uri); $r->protocol($proto); ${*$self}{'httpd_client_proto'} = $proto = _http_version($proto); if ($proto >= $HTTP_1_0) { # we expect to find some headers my($key, $val); HEADER: while ($buf =~ s/^([^\012]*)\012//) { $_ = $1; s/\015$//; if (/^([\w\-]+)\s*:\s*(.*)/) { $r->push_header($key, $val) if $key; ($key, $val) = ($1, $2); } elsif (/^\s+(.*)/) { $val .= " $1"; } else { last HEADER; } } $r->push_header($key, $val) if $key; } my $conn = $r->header('Connection'); if ($proto >= $HTTP_1_1) { ${*$self}{'httpd_nomore'}++ if $conn && lc($conn) =~ /\bclose\b/; } else { ${*$self}{'httpd_nomore'}++ unless $conn && lc($conn) =~ /\bkeep-alive\b/; } if ($only_headers) { ${*$self}{'httpd_rbuf'} = $buf; return $r; } # Find out how much content to read my $te = $r->header('Transfer-Encoding'); my $ct = $r->header('Content-Type'); my $len = $r->header('Content-Length'); if ($te && lc($te) eq 'chunked') { # Handle chunked transfer encoding my $body = ""; CHUNK: while (1) { print STDERR "Chunked\n" if $DEBUG; if ($buf =~ s/^([^\012]*)\012//) { my $chunk_head = $1; unless ($chunk_head =~ /^([0-9A-Fa-f]+)/) { $self->send_error(400); $self->reason("Bad chunk header $chunk_head"); return; } my $size = hex($1); last CHUNK if $size == 0; my $missing = $size - length($buf) + 2; # 2=CRLF at chunk end # must read until we have a complete chunk while ($missing > 0) { print STDERR "Need $missing more bytes\n" if $DEBUG; my $n = $self->_need_more($buf, $timeout, $fdset); return unless $n; $missing -= $n; } $body .= substr($buf, 0, $size); substr($buf, 0, $size+2) = ''; } else { # need more data in order to have a complete chunk header return unless $self->_need_more($buf, $timeout, $fdset); } } $r->content($body); # pretend it was a normal entity body $r->remove_header('Transfer-Encoding'); $r->header('Content-Length', length($body)); my($key, $val); FOOTER: while (1) { if ($buf !~ /\012/) { # need at least one line to look at return unless $self->_need_more($buf, $timeout, $fdset); } else { $buf =~ s/^([^\012]*)\012//; $_ = $1; s/\015$//; if (/^([\w\-]+)\s*:\s*(.*)/) { $r->push_header($key, $val) if $key; ($key, $val) = ($1, $2); } elsif (/^\s+(.*)/) { $val .= " $1"; } elsif (!length) { last FOOTER; } else { $self->reason("Bad footer syntax"); return; } } } $r->push_header($key, $val) if $key; } elsif ($te) { $self->send_error(501); # Unknown transfer encoding $self->reason("Unknown transfer encoding '$te'"); return; } elsif ($ct && lc($ct) =~ m/^multipart\/\w+\s*;.*boundary\s*=\s*(\w+)/) { # Handle multipart content type my $boundary = "$CRLF--$1--$CRLF"; my $index; while (1) { $index = index($buf, $boundary); last if $index >= 0; # end marker not yet found return unless $self->_need_more($buf, $timeout, $fdset); } $index += length($boundary); $r->content(substr($buf, 0, $index)); substr($buf, 0, $index) = ''; } elsif ($len) { # Plain body specified by "Content-Length" my $missing = $len - length($buf); while ($missing > 0) { print "Need $missing more bytes of content\n" if $DEBUG; my $n = $self->_need_more($buf, $timeout, $fdset); return unless $n; $missing -= $n; } if (length($buf) > $len) { $r->content(substr($buf,0,$len)); substr($buf, 0, $len) = ''; } else { $r->content($buf); $buf=''; } } ${*$self}{'httpd_rbuf'} = $buf; $r; } sub _need_more { my $self = shift; #my($buf,$timeout,$fdset) = @_; if ($_[1]) { my($timeout, $fdset) = @_[1,2]; print STDERR "select(,,,$timeout)\n" if $DEBUG; my $n = select($fdset,undef,undef,$timeout); unless ($n) { $self->reason(defined($n) ? "Timeout" : "select: $!"); return; } } print STDERR "sysread()\n" if $DEBUG; my $n = sysread($self, $_[0], 2048, length($_[0])); $self->reason(defined($n) ? "Client closed" : "sysread: $!") unless $n; $n; } =item $c->read_buffer([$new_value]) Bytes read by $c->get_request, but not used are placed in the I. The next time $c->get_request is called it will consume the bytes in this buffer before reading more data from the network connection itself. The read buffer is invalid after $c->get_request has returned an undefined value. If you handle the reading of the request content yourself you need to empty this buffer before you read more and you need to place unconsumed bytes here. You also need this buffer if you implement services like I<101 Switching Protocols>. This method always return the old buffer content and can optionally replace the buffer content if you pass it an argument. =cut sub read_buffer { my $self = shift; my $old = ${*$self}{'httpd_rbuf'}; if (@_) { ${*$self}{'httpd_rbuf'} = shift; } $old; } =item $c->reason When $c->get_request returns C you can obtain a short string describing why it happened by calling $c->reason. =cut sub reason { my $self = shift; my $old = ${*$self}{'httpd_reason'}; if (@_) { ${*$self}{'httpd_reason'} = shift; } $old; } =item $c->proto_ge($proto) Return TRUE if the client announced a protocol with version number greater or equal to the given argument. The $proto argument can be a string like "HTTP/1.1" or just "1.1". =cut sub proto_ge { my $self = shift; ${*$self}{'httpd_client_proto'} >= _http_version(shift); } sub _http_version { local($_) = shift; return 0 unless m,^(?:HTTP/)?(\d+)\.(\d+)$,i; $1 * 1000 + $2; } =item $c->antique_client Return TRUE if the client speaks the HTTP/0.9 protocol. No status code and no headers should be returned to such a client. This should be the same as !$c->proto_ge("HTTP/1.0"). =cut sub antique_client { my $self = shift; ${*$self}{'httpd_client_proto'} < $HTTP_1_0; } =item $c->force_last_request Make sure that $c->get_request will not try to read more requests off this connection. If you generate a response that is not self delimiting, then you should signal this fact by calling this method. This attribute is turned on automatically if the client announces protocol HTTP/1.0 or worse and does not include a "Connection: Keep-Alive" header. It is also turned on automatically when HTTP/1.1 or better clients send the "Connection: close" request header. =cut sub force_last_request { my $self = shift; ${*$self}{'httpd_nomore'}++; } =item $c->send_status_line( [$code, [$mess, [$proto]]] ) Send the status line back to the client. If $code is omitted 200 is assumed. If $mess is omitted, then a message corresponding to $code is inserted. If $proto is missing the content of the $HTTP::Daemon::PROTO variable is used. =cut sub send_status_line { my($self, $status, $message, $proto) = @_; return if $self->antique_client; $status ||= RC_OK; $message ||= status_message($status) || ""; $proto ||= $HTTP::Daemon::PROTO || "HTTP/1.1"; print $self "$proto $status $message$CRLF"; } =item $c->send_crlf Send the CRLF sequence to the client. =cut sub send_crlf { my $self = shift; print $self $CRLF; } =item $c->send_basic_header( [$code, [$mess, [$proto]]] ) Send the status line and the "Date:" and "Server:" headers back to the client. This header is assumed to be continued and does not end with an empty CRLF line. =cut sub send_basic_header { my $self = shift; return if $self->antique_client; $self->send_status_line(@_); print $self "Date: ", time2str(time), $CRLF; my $product = $self->daemon->product_tokens; print $self "Server: $product$CRLF" if $product; } =item $c->send_response( [$res] ) Write a I object to the client as a response. We try hard to make sure that the response is self delimiting so that the connection can stay persistent for further request/response exchanges. The content attribute of the I object can be a normal string or a subroutine reference. If it is a subroutine, then whatever this callback routine returns is written back to the client as the response content. The routine will be called until it return an undefined or empty value. If the client is HTTP/1.1 aware then we will use chunked transfer encoding for the response. =cut sub send_response { my $self = shift; my $res = shift; if (!ref $res) { $res ||= RC_OK; $res = HTTP::Response->new($res, @_); } my $content = $res->content; my $chunked; unless ($self->antique_client) { my $code = $res->code; $self->send_basic_header($code, $res->message, $res->protocol); if ($code =~ /^(1\d\d|[23]04)$/) { # make sure content is empty $res->remove_header("Content-Length"); $content = ""; } elsif ($res->request && $res->request->method eq "HEAD") { # probably OK } elsif (ref($content) eq "CODE") { if ($self->proto_ge("HTTP/1.1")) { $res->push_header("Transfer-Encoding" => "chunked"); $chunked++; } else { $self->force_last_request; } } elsif (length($content)) { $res->header("Content-Length" => length($content)); } else { $self->force_last_request; } print $self $res->headers_as_string($CRLF); print $self $CRLF; # separates headers and content } if (ref($content) eq "CODE") { while (1) { my $chunk = &$content(); last unless defined($chunk) && length($chunk); if ($chunked) { printf $self "%x%s%s%s", length($chunk), $CRLF, $chunk, $CRLF; } else { print $self $chunk; } } print $self "0$CRLF$CRLF" if $chunked; # no trailers either } elsif (length $content) { print $self $content; } } =item $c->send_redirect( $loc, [$code, [$entity_body]] ) Send a redirect response back to the client. The location ($loc) can be an absolute or relative URL. The $code must be one the redirect status codes, and defaults to "301 Moved Permanently" =cut sub send_redirect { my($self, $loc, $status, $content) = @_; $status ||= RC_MOVED_PERMANENTLY; Carp::croak("Status '$status' is not redirect") unless is_redirect($status); $self->send_basic_header($status); my $base = $self->daemon->url; $loc = $HTTP::URI_CLASS->new($loc, $base) unless ref($loc); $loc = $loc->abs($base); print $self "Location: $loc$CRLF"; if ($content) { my $ct = $content =~ /^\s*force_last_request; # no use keeping the connection open } =item $c->send_error( [$code, [$error_message]] ) Send an error response back to the client. If the $code is missing a "Bad Request" error is reported. The $error_message is a string that is incorporated in the body of the HTML entity body. =cut sub send_error { my($self, $status, $error) = @_; $status ||= RC_BAD_REQUEST; Carp::croak("Status '$status' is not an error") unless is_error($status); my $mess = status_message($status); $error ||= ""; $mess = <$status $mess

$status $mess

$error EOT unless ($self->antique_client) { $self->send_basic_header($status); print $self "Content-Type: text/html$CRLF"; print $self "Content-Length: " . length($mess) . $CRLF; print $self $CRLF; } print $self $mess; $status; } =item $c->send_file_response($filename) Send back a response with the specified $filename as content. If the file is a directory we try to generate an HTML index of it. =cut sub send_file_response { my($self, $file) = @_; if (-d $file) { $self->send_dir($file); } elsif (-f _) { # plain file local(*F); sysopen(F, $file, 0) or return $self->send_error(RC_FORBIDDEN); binmode(F); my($ct,$ce) = guess_media_type($file); my($size,$mtime) = (stat _)[7,9]; unless ($self->antique_client) { $self->send_basic_header; print $self "Content-Type: $ct$CRLF"; print $self "Content-Encoding: $ce$CRLF" if $ce; print $self "Content-Length: $size$CRLF" if $size; print $self "Last-Modified: ", time2str($mtime), "$CRLF" if $mtime; print $self $CRLF; } $self->send_file(\*F); return RC_OK; } else { $self->send_error(RC_NOT_FOUND); } } sub send_dir { my($self, $dir) = @_; $self->send_error(RC_NOT_FOUND) unless -d $dir; $self->send_error(RC_NOT_IMPLEMENTED); } =item $c->send_file($fd); Copy the file to the client. The file can be a string (which will be interpreted as a filename) or a reference to an I or glob. =cut sub send_file { my($self, $file) = @_; my $opened = 0; if (!ref($file)) { local(*F); open(F, $file) || return undef; binmode(F); $file = \*F; $opened++; } my $cnt = 0; my $buf = ""; my $n; while ($n = sysread($file, $buf, 8*1024)) { last if !$n; $cnt += $n; print $self $buf; } close($file) if $opened; $cnt; } =item $c->daemon Return a reference to the corresponding I object. =cut sub daemon { my $self = shift; ${*$self}{'httpd_daemon'}; } =back =head1 SEE ALSO RFC 2068 L, L =head1 COPYRIGHT Copyright 1996-2001, Gisle Aas This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1;