(* $Id: nethttpd_kernel.mli 1410 2010-02-14 19:44:28Z gerd $ * *) (* * Copyright 2005 Baretta s.r.l. and Gerd Stolpmann * * This file is part of Nethttpd. * * Nethttpd is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * Nethttpd is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Nethttpd; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *) (** {1 The protocol kernel of the HTTP daemon} * * This module mainly defines the [http_protocol] class which implements the * exchange of messages with a HTTP client. The request messages are represented * as sequence of [req_token] values. The response is encapsulated in a separate * [http_response] class. The contents of the response are represented as sequence * of [resp_token] values. *) (* HTTP protocol kernel for daemon *) open Nethttp open Nethttpd_types type fatal_error = [ `Broken_pipe | `Broken_pipe_ignore | `Message_too_long | `Timeout | `Unix_error of Unix.error | `Server_error ] (** These are the serious protocol violations after that the daemon stops * any further processing. * * Note that [`Timeout] refers to a timeout in the middle of a request. * [`Broken_pipe_ignore] is the "harmless" version of [`Broken_pipe] * (see [config_suppress_broken_pipe]). * * Long messages are fatal because it is suspected that they are denial * of service attacks. The kernel generates [`Message_too_long] only for * long headers, not for long bodies. * * Fatal server errors can happen when exceptions are not properly handled. * As last resort the HTTP daemon closes the connection without notifying * the client. *) val string_of_fatal_error : fatal_error -> string (** Convert error to a string, for logging *) type bad_request_error = [ `Bad_header_field of string | `Bad_header | `Bad_trailer | `Bad_request_line | `Request_line_too_long | `Protocol_not_supported | `Unexpected_eof | `Format_error of string ] (** A bad request is a violation where the current request cannot be * decoded, and it is not possible to accept further requests over the * current connection. *) val string_of_bad_request_error : bad_request_error -> string (** Convert error to a string, for logging *) val status_of_bad_request_error : bad_request_error -> http_status (** Returns the best response code for the error *) type data_chunk = string * int * int (** A [data_chunk] is a substring of a string. The substring is described by * the triple [(s, pos, len)] where [s] is the container, [pos] is the * position where the substring begins, and [len] its length. *) type status_line = int * string (** = (code, phrase) *) type transfer_coding = [ `Identity | `Chunked ] type resp_token = [ `Resp_info_line of (status_line * http_header) | `Resp_status_line of status_line | `Resp_header of http_header | `Resp_body of data_chunk | `Resp_trailer of http_trailer | `Resp_end | `Resp_action of (unit -> unit) ] (** The [resp_token] represents a textual part of the response to send: * - [`Resp_info_line] is an informational status line (code=100..199). There can * be several informational lines, and they can be accompanied with their own * headers. Such lines are only sent to HTTP/1.1 clients. * - [`Resp_status_line] is the final status line to send (code >= 200) * - [`Resp_header] is the whole response header to send * - [`Resp_body] is the next part of the response body to send. * - [`Resp_trailer] is the whole response trailer to send (currently ignored) * - [`Resp_action] is special because it does not directly represent a token * to send. The argument is a function which is called when the token is * the next token on the active event queue. The function is also called when * the event queue is dropped because of an error (the state of the * response object indicates this). The function must not raise exceptions * except [Unix_error], and it must not block. *) val resp_100_continue : resp_token (** The predefined token for the "100 Continue" response *) type resp_state = [ `Inhibited | `Queued | `Active | `Processed | `Error | `Dropped ] (** The response state: * - [`Inhibited] = it is not yet allowed to start the response * - [`Queued] = the response waits on the queue for activation * - [`Active] = the response is currently being transmitted * - [`Processed] = the response has been completely sent * - [`Error] = an error occurred during the transmission of this response * - [`Dropped] = an earlier response forced to close the connection, and * this response is dequeued *) type front_token = [ `Resp_wire_data of data_chunk | `Resp_end ] (** Tokens generated by [http_response]: * - [`Resp_wire_data] are data tokens. * - [`Resp_end] indicates the end of the response. *) exception Send_queue_empty type announcement = [`Ignore | `Ocamlnet | `Ocamlnet_and of string | `As of string ] (** See config *) (** Encapsultation of the HTTP response for a single request *) class type http_response = object (** Represents the action of sending the response * * This class has an internal * queue of response tokens that are not yet processed. One can easily add * new tokens to the end of the queue ([send]). * * The class is responsible for determining the transfer encoding: * - When the HTTP version is 1.0, the encoding is always "identity", and the * connection will be closed after the response. * - When the HTTP version is 1.1, and there is a Content-length header, * the encoding will be selected as "identity". It is checked whether the * body has really this length. If too short, it is suggested to close * the connection. If too long, the extra part of the message is silently * dropped. * - When the HTTP version is 1.1, and there is no Content-length header, * the encoding will be selected as "chunked". * * Currently, the [TE] request header is not taken into account. The trailer * is always empty. * * The following headers are set (or removed) by this class: * - [Transfer-Encoding] * - [Trailer] * - [Date] * - [Connection] * - [Upgrade] * - [Server] (it is appended to this field) * * Responses for HEAD requests have the special behaviour that the body is silently * dropped. The calculation of header fields is not affected by this. This means * that HEAD can be easily implemented by doing the same as for GET. * * Responses for other requests that must not include a body must set * [Content-Length] to 0. *) (** These methods can be called by the content provider: *) method state : resp_state (** Reports the state. The initial state is [`Inhibited] *) method bidirectional_phase : bool (** The bidrectional phase starts after "100 Continue" has been sent to the * client, and stops when the response body begins. The bidirectional phase * is special for the calculation of timeout values (input determines the timeout * although the response has started). *) method send : resp_token -> unit (** Add token to the end of the send queue *) method send_queue_empty : bool (** Return whether the send queue is empty. When the state is [`Inhibited], this * method fakes an empty queue. *) method protocol : protocol (** The HTTP version of the response. This is currently always HTTP/1.1, but maybe * we need to fake lower versions for buggy clients. Let's see what comes. *) method close_connection : bool (** Returns whether the connection should be closed after this response. * This flag should be evaluated when the [`Resp_end] front token has been * reached. *) method transfer_encoding : transfer_coding (** Returns the selected transfer encoding. This is valid after the header * has been passed to this object with [send]. *) method front_token : front_token (** The first token of the queue, represented as [data_chunk]. Raises * [Send_queue_empty] when there is currently no front token, or the state * is [`Inhibited]. * If there is a front token, it will never have length 0. * * Note that [Unix_error] exceptions can be raised when [`Resp_action] * tokens are processed. *) method set_callback : (unit -> unit) -> unit (** The function will be called when either [set_state] changes the state, * or when the send queue becomes empty. Note that the callback must never * fail, it is called in situations that make it hard to recover from errors. *) method body_size : int64 (** Accumulated size of the response body *) (** These methods must only be called by the HTTP protocol processor: *) method set_state : resp_state -> unit (** Sets the state *) method advance : int -> unit (** Tell this object that [n] bytes of the front token could be really * sent using [Unix.write]. If this means that the whole front token * has been sent, the next token is pulled from the queue and is made * the new front token. Otherwise, the data chunk representing the * front token is modified such that the position is advanced by * [n], and the length is reduced by [n]. *) end class http_response_impl : ?close:bool -> ?suppress_body:bool -> int64 -> protocol -> announcement -> http_response (** Exported for debugging and testing only *) val send_static_response : http_response -> http_status -> http_header option -> string -> unit (** Sends the string argument as response body, together with the given status and * the header (optional). Response header fields are set as follows: * - The [Content-Length] is set to the length of the string. * - The [Content-Type] is set to "text/html" unless given by the header. * If the header object is passed in, these modifications are done * directly in this object as side effect. *) val send_file_response : http_response -> http_status -> http_header option -> Unix.file_descr -> int64 -> unit (** Sends the contents of a file as response body, together with the given status and * the header (optional). The descriptor must be a file descriptor (that cannot * block). The int64 number is the length * of the body. Response header fields are set as follows: * - The [Content-Length] is set to the length of the string. * - The [Content-Type] is set to "text/html" unless given by the header. * * Note that [Content-Range] is not set automatically, even if the file is only * partially transferred. * * If the header object is passed in, these modifications are done * directly in this object as side effect. * * The function does not send the file immediately, but rather sets the [http_response] * object up that the next chunk of the file is added when the send queue becomes * empty. This file will be closed when the transfer is done. *) type request_line = http_method * protocol (** The method (including the URI), and the HTTP version *) type req_token = [ `Req_header of request_line * http_header * http_response | `Req_expect_100_continue | `Req_body of data_chunk | `Req_trailer of http_trailer | `Req_end | `Eof | `Fatal_error of fatal_error | `Bad_request_error of bad_request_error * http_response | `Timeout ] (** A [req_token] represents a textual part of the received request: * - [`Req_header] is the full received header. Together with the header, * the corresponding [http_response] object is returned which must * be used to transmit the response. * - [`Req_expect_100_continue] is generated when the client expects that the * server sends a "100 Continue" response (or a final status code) now. * One should add [`Resp_info_line resp_100_continue] to the send queue * if the header is acceptable, or otherwise generate an error response. In any * case, the rest of the request must be read until [`Req_end]. * - [`Req_body] is a part of the request body. The transfer-coding, if any, * is already decoded. * - [`Req_trailer] is the received trailer * - [`Req_end] indicates the end of the request (the next request may begin * immediately). * - [`Eof] indicates the end of the stream * - [`Bad_request_error] indicates that the request violated the HTTP protocol * in a serious way and cannot be decoded. It is required to send a * "400 Bad Request" response. The following token will be [`Eof]. * - [`Fatal_error] indicates that the connection crashed. * The following token will be [`Eof]. * - [`Timeout] means that nothing has been received for a certain amount * of time, and the protocol is in a state that the next request can begin. * The following token will be [`Eof]. * * Note that it is always allowed to [send] tokens to the client. The protocol * implementation takes care that the response is transmitted at the right point * in time. *) val string_of_req_token : req_token -> string (** For debugging *) exception Recv_queue_empty (** Configuration values for the HTTP kernel *) class type http_protocol_config = object method config_max_reqline_length : int (** Maximum size of the request line. Longer lines are immediately replied with * a "Request URI too long" response. Suggestion: 32768. *) method config_max_header_length : int (** Maximum size of the header, {b including} the request line. Longer headers * are treated as attack, and cause the fatal error [`Message_too_long]. * Suggestion: 65536. *) method config_max_trailer_length : int (** Maximum size of the trailer *) method config_limit_pipeline_length : int (** Limits the length of the pipeline (= unreplied requests). A value of 0 * disables pipelining. A value of n allows that another request is * received although there are already n unreplied requests. *) method config_limit_pipeline_size : int (** Limits the size of the pipeline in bytes. If the buffered bytes in the * input queue exceed this value, the receiver temporarily stops reading * more data. The value 0 has the effect that even the read-ahead of * data of the current request is disabled. The value (-1) disables the * receiver completely (not recommended). *) method config_announce_server : announcement (** Whether to set the [Server] header: * - [`Ignore]: The kernel does not touch the [Server] header. * - [`Ocamlnet]: Announce this web server as "Ocamlnet/<version>" * - [`Ocamlnet_and s]: Announce this web server as [s] and append * the Ocamlnet string. * - [`As s]: Announce this web server as [s] *) method config_suppress_broken_pipe : bool (** Whether to suppress [`Broken_pipe] errors. Instead * [`Broken_pipe_ignore] is reported. *) end val default_http_protocol_config : http_protocol_config (** Default config: - [config_max_reqline_length = 32768] - [config_max_header_length = 65536] - [config_max_trailer_length = 32768] - [config_limit_pipeline_length = 5] - [config_limit_pipeline_size = 65536] - [config_announce_server = `Ocamlnet] - [config_suppress_broken_pipe = false] *) class modify_http_protocol_config : ?config_max_reqline_length:int -> ?config_max_header_length:int -> ?config_max_trailer_length:int -> ?config_limit_pipeline_length:int -> ?config_limit_pipeline_size:int -> ?config_announce_server:announcement -> ?config_suppress_broken_pipe:bool -> http_protocol_config -> http_protocol_config (** Modifies the passed config object as specified by the optional arguments *) (** The core event loop of the HTTP daemon *) class http_protocol : #http_protocol_config -> Unix.file_descr -> object (** Exchange of HTTP messages * * In [fd] one must pass the already connected socket. It must be in non- * blocking mode. * * How to use this class: Basically, one invokes [cycle] until the whole * message exchange on [fd] is processed. [cycle] receives data from the * socket and sends data to the socket. There are two internal queues: * * The receive queue stores parts of received requests as [req_token]. * One can take values from the front of this queue by calling [receive]. * * The response queue stores [http_response] objects. Each of the objects * corresponds to a request that was received before. This queue is handled * fully automatically, but one can watch its length to see whether all responses * are actually transmitted over the wire. * * The basic algorithm to process messages is: * * {[ * let rec next_token () = * if proto # recv_queue_len = 0 then ( * proto # cycle (); * next_token() * ) * else * proto # receive() * * let cur_token = ref (next_token()) in * while !cur_token <> `Eof do * (* Process first token of next request: *) * match !cur_token with * | `Req_header(req_line, header, resp) -> * (* Depending on [req_line], read further tokens until [`Req_end] *) * ... * (* Switch to the first token of the next message: *) * cur_token := next_token() * | `Timeout -> ... * | `Bad_request_error(e,resp) -> * (* Generate 400 error, send it to [resp] *) * ... * (* Switch to the first token of the next message: *) * cur_token := next_token() * | `Fatal_error e -> failwith "Crash" * | _ -> assert false * done; * while proto # resp_queue_len > 0 do * proto # cycle (); * done; * proto # shutdown() * ]} * * See the file [tests/easy_daemon.ml] for a complete implementation of this. * * As one can see, it is essential to watch the lengths of the queues in order * to figure out what has happened during [cycle]. * * When the body of the request is empty, [`Req_body] tokens are omitted. * Note that for requests like [GET] that always have an empty body, it is * still possible that an errorneous client sends a body, and that [`Req_body] * tokens arrive. One must accept and ignore these tokens. * * Error handling: For serious errors, the connection is immediately aborted. * In this case, [receive] returns a [`Fatal_error] token. Note that the * queued responses cannot be sent! An example of this is [`Broken_pipe]. * * There is a large class of non-serious errors, esp. format errors * in the header and body. It is typical of these errors that one cannot determine * the end of the request properly. For this reason, the daemon stops reading * further data from the request, but the response queue is still delivered. * For these errors, [receive] returns a [`Bad_request_error] token. * This token contains a [http_response] object that must be filled with a * 400 error response. *) method cycle : ?block:float -> unit -> unit (** Looks at the file descriptor. If there is data to read from the descriptor, * and there is free space in the input buffer, additional data is read into * the buffer. It is also tried to interpret the new data as [req_token]s, * and if possible, new [req_token]s are appended to the receive queue. * * If the response queue has objects, and there is really data one can send, * and if the socket allows one to send data, it is tried to send as much * data as possible. * * The option [block] (default: 0) can be set to wait until data * can be exchanged with the socket. This avoids busy waiting. The number * is the duration in seconds to wait until the connection times out * (0 means not to wait at all, -1 means to wait infinitely). When a timeout * happens, and there is nothing to send, and the last request was fully * processed, [receive] will simply return [`Timeout] (i.e. when * [waiting_for_next_message] is [true]). Otherwise, the * fatal error [`Timeout] is generated. *) method receive : unit -> req_token (** Returns the first [req_token] from the receive queue. Raises * [Recv_queue_empty] when the queue is empty (= has no new data) *) method peek_recv : unit -> req_token (** Peeks the first token, but leaves it in the queue. * Raises [Recv_queue_empty] when the queue is empty. *) method recv_queue_len : int (** Returns the length of the receive queue (number of tokens) *) method resp_queue_len : int (** Returns the length of the internal response queue (number of [http_response] * objects that have not yet fully processed) *) method pipeline_len : int (** Returns the number of unanswered requests = Number of received [`Req_end] tokens * minus number of responses in state [`Processed]. Note that [pipeline_len] * can become [-1] when bad requests are responded. *) method recv_queue_byte_size : int (** Returns the (estimated) size of the input queue in bytes *) method waiting_for_next_message : bool (** Whether the kernel is currently waiting for the beginning of a new * arriving HTTP request. This is [false] while the request is being * received. *) method input_timeout_class : [ `Normal | `Next_message | `None ] (** Suggests the calculation of a timeout value for input: * - [`Normal]: The normal timeout value applies * - [`Next_message]: The timeout value applies while waiting for the next message * - [`None]: The connection is output-driven, no input timeout value *) method shutdown : unit -> unit (** Shuts the socket down. Note: the descriptor is not closed. *) method timeout : unit -> unit (** Process a timeout condition as [cycle] does *) method abort : fatal_error -> unit (** Stops the transmission of data. The receive queue is cleared and filled * with the two tokens [`Fatal_error] and [`Eof]. * The response queue is cleared. The [cycle] * method will return immediately without doing anything. *) method fd : Unix.file_descr (** Just returns the socket *) method do_input : bool (** Returns [true] iff the protocol engine is interested in new data from the * socket. Returns [false] after EOF and after errors. *) method do_output : bool (** Returns [true] iff the protocol engine has data to output to the socket *) method need_linger : bool (** Returns [true] when a lingering close operation is needed to reliably shut * down the socket. In many cases, this expensive operation is not necessary. * See the class [lingering_close] below. *) method config : http_protocol_config (** Just returns the configuration *) method test_coverage : string list (** For testing: returns a list of tokens indicating into which cases the program * ran. *) end (** Closes a file descriptor using the "lingering close" algorithm. The optional [preclose] function is called just before [Unix.close]. *) class lingering_close : ?preclose:(unit->unit) -> Unix.file_descr -> object (** Closes a file descriptor using the "lingering close" algorithm * * Usage: * {[ while lc # lingering do lc # cycle ~block:true () done ]} *) method cycle : ?block:bool -> unit -> unit (** Reads data from the file descriptor until EOF or until a fixed timeout * is over. Finally, the descriptor is closed. If [block] is set, the method * blocks until data is available. (Default: [false]) *) method lingering : bool (** Whether the socket is still lingering *) method fd : Unix.file_descr (** The file descriptor *) end (** {1 Debugging} *) module Debug : sig val enable : bool ref (** Enables {!Netlog}-style debugging of this module *) end