Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: nethttpd_kernel.mli 2195 2015-01-01 12:23:39Z gerd $
 *
 *)

(*
 * Copyright 2005 Baretta s.r.l. and Gerd Stolpmann
 *
 * This file is part of Nethttpd.
 *
 * Nethttpd is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Nethttpd is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Nethttpd; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)

(** {1 The protocol kernel of the HTTP daemon}
  *
  * This module mainly defines the [http_protocol] class which implements the
  * exchange of messages with a HTTP client. The request messages are represented
  * as sequence of [req_token] values. The response is encapsulated in a separate
  * [http_response] class. The contents of the response are represented as sequence
  * of [resp_token] values.
 *)

(* HTTP protocol kernel for daemon *)

open Nethttp
open Nethttpd_types

type fatal_error =
    [ `Broken_pipe
    | `Broken_pipe_ignore
    | `Message_too_long
    | `Timeout
    | `Unix_error of Unix.error
    | `TLS_error of string * string
    | `Server_error
    ]
    (** These are the serious protocol violations after that the daemon stops
      * any further processing.
      *
      * Note that [`Timeout] refers to a timeout in the middle of a request.
      * [`Broken_pipe_ignore] is the "harmless" version of [`Broken_pipe]
      * (see [config_suppress_broken_pipe]).
      *
      * Long messages are fatal because it is suspected that they are denial
      * of service attacks. The kernel generates [`Message_too_long] only for
      * long headers, not for long bodies.
      *
      * Fatal server errors can happen when exceptions are not properly handled.
      * As last resort the HTTP daemon closes the connection without notifying
      * the client.
      *
      * [`TLS_error] means any error on the TLS level. The two strings identify
      * the problem as for {!Netsys_tls.Error}.
     *)

val string_of_fatal_error : fatal_error -> string
  (** Convert error to a string, for logging *)

type bad_request_error =
    [ `Bad_header_field of string
    | `Bad_header
    | `Bad_trailer
    | `Bad_request_line
    | `Request_line_too_long
    | `Protocol_not_supported
    | `Unexpected_eof
    | `Format_error of string
    ]
    (** A bad request is a violation where the current request cannot be
      * decoded, and it is not possible to accept further requests over the
      * current connection.
     *)

val string_of_bad_request_error : bad_request_error -> string
  (** Convert error to a string, for logging *)

val status_of_bad_request_error : bad_request_error -> http_status
  (** Returns the best response code for the error *)


type data_chunk = string * int * int
    (** A [data_chunk] is a substring of a string. The substring is described by
      * the triple [(s, pos, len)] where [s] is the container, [pos] is the
      * position where the substring begins, and [len] its length.
     *)

type status_line = int * string
    (** = (code, phrase) *)

type transfer_coding =
    [ `Identity
    | `Chunked
    ]

type resp_token =
    [ `Resp_info_line of (status_line * http_header)
    | `Resp_status_line of status_line
    | `Resp_header of http_header
    | `Resp_body of data_chunk
    | `Resp_trailer of http_trailer
    | `Resp_end
    | `Resp_action of (unit -> unit)
    ]
    (** The [resp_token] represents a textual part of the response to send:
      * - [`Resp_info_line] is an informational status line (code=100..199). There can
      *   be several informational lines, and they can be accompanied with their own
      *   headers. Such lines are only sent to HTTP/1.1 clients.
      * - [`Resp_status_line] is the final status line to send (code >= 200)
      * - [`Resp_header] is the whole response header to send
      * - [`Resp_body] is the next part of the response body to send.
      * - [`Resp_trailer] is the whole response trailer to send (currently ignored)
      * - [`Resp_action] is special because it does not directly represent a token
      *   to send. The argument is a function which is called when the token is
      *   the next token on the active event queue. The function is also called when
      *   the event queue is dropped because of an error (the state of the
      *   response object indicates this). The function must not raise exceptions
      *   except [Unix_error], and it must not block.
     *)

val resp_100_continue : resp_token
  (** The predefined token for the "100 Continue" response *)

type resp_state =
    [ `Inhibited | `Queued | `Active | `Processed | `Error | `Dropped ]
    (** The response state:
      * - [`Inhibited] = it is not yet allowed to start the response
      * - [`Queued] = the response waits on the queue for activation
      * - [`Active] = the response is currently being transmitted
      * - [`Processed] = the response has been completely sent
      * - [`Error] = an error occurred during the transmission of this response
      * - [`Dropped] = an earlier response forced to close the connection, and
      *   this response is dequeued
     *)

type front_token = 
   [ `Resp_wire_data of data_chunk
   | `Resp_end 
   ]
   (** Tokens generated by [http_response]:
     * - [`Resp_wire_data] are data tokens.
     * - [`Resp_end] indicates the end of the response.
    *)

exception Send_queue_empty


type announcement =
    [`Ignore | `Ocamlnet | `Ocamlnet_and of string | `As of string ]
  (** See config *)


(** Encapsultation of the HTTP response for a single request *)
class type http_response  =
object
  (** Represents the action of sending the response
    *
    * This class has an internal
    * queue of response tokens that are not yet processed. One can easily add
    * new tokens to the end of the queue ([send]). 
    *
    * The class is responsible for determining the transfer encoding:
    * - When the HTTP version is 1.0, the encoding is always "identity", and the
    *   connection will be closed after the response.
    * - When the HTTP version is 1.1, and there is a Content-length header,
    *   the encoding will be selected as "identity". It is checked whether the
    *   body has really this length. If too short, it is suggested to close
    *   the connection. If too long, the extra part of the message is silently
    *   dropped.
    * - When the HTTP version is 1.1, and there is no Content-length header,
    *   the encoding will be selected as "chunked".
    *
    * Currently, the [TE] request header is not taken into account. The trailer
    * is always empty.
    *
    * The following headers are set (or removed) by this class:
    * - [Transfer-Encoding]
    * - [Trailer]
    * - [Date]
    * - [Connection]
    * - [Upgrade]
    * - [Server] (it is appended to this field)
    *
    * Responses for HEAD requests have the special behaviour that the body is silently
    * dropped. The calculation of header fields is not affected by this. This means
    * that HEAD can be easily implemented by doing the same as for GET.
    *
    * Responses for other requests that must not include a body must set
    * [Content-Length] to 0.
   *)

  (** These methods can be called by the content provider: *)

  method state : resp_state
    (** Reports the state. The initial state is [`Inhibited] *)

  method bidirectional_phase : bool
    (** The bidrectional phase starts after "100 Continue" has been sent to the
      * client, and stops when the response body begins. The bidirectional phase
      * is special for the calculation of timeout values (input determines the timeout
      * although the response has started).
     *)

  method send : resp_token -> unit
    (** Add token to the end of the send queue *)

  method send_queue_empty : bool
    (** Return whether the send queue is empty. When the state is [`Inhibited], this
     * method fakes an empty queue.
     *)

  method protocol : protocol
    (** The HTTP version of the response. This is currently always HTTP/1.1, but maybe
      * we need to fake lower versions for buggy clients. Let's see what comes.
     *)

  method close_connection : bool
    (** Returns whether the connection should be closed after this response.
      * This flag should be evaluated when the [`Resp_end] front token has been
      * reached.
     *)

  method transfer_encoding : transfer_coding
    (** Returns the selected transfer encoding. This is valid after the header
      * has been passed to this object with [send].
     *)

  method front_token : front_token
    (** The first token of the queue, represented as [data_chunk]. Raises 
      * [Send_queue_empty] when there is currently no front token, or the state
      * is [`Inhibited].
      * If there is a front token, it will never have length 0.
      *
      * Note that [Unix_error] exceptions can be raised when [`Resp_action]
      * tokens are processed.
     *)

  method set_callback : (unit -> unit) -> unit
    (** The function will be called when either [set_state] changes the state,
      * or when the send queue becomes empty. Note that the callback must never
      * fail, it is called in situations that make it hard to recover from errors.
     *)

  method body_size : int64
    (** Accumulated size of the response body *)

  (** These methods must only be called by the HTTP protocol processor: *)

  method set_state : resp_state -> unit
    (** Sets the state *)

  method advance : int -> unit
    (** Tell this object that [n] bytes of the front token could be really
      * sent using [Unix.write]. If this means that the whole front token
      * has been sent, the next token is pulled from the queue and is made
      * the new front token. Otherwise, the data chunk representing the
      * front token is modified such that the position is advanced by
      * [n], and the length is reduced by [n].
     *)

end

class http_response_impl : ?close:bool -> ?suppress_body:bool -> int64 -> protocol -> announcement -> http_response
  (** Exported for debugging and testing only *)

val send_static_response : 
      http_response -> 
      http_status -> http_header option -> string -> unit
  (** Sends the string argument as response body, together with the given status and
    * the header (optional). Response header fields are set as follows:
    * - The [Content-Length] is set to the length of the string.
    * - The [Content-Type] is set to "text/html" unless given by the header.
    * If the header object is passed in, these modifications are done 
    * directly in this object as side effect.
   *)

val send_file_response : http_response -> 
                         http_status -> http_header option ->
                            Unix.file_descr -> int64 -> unit
  (** Sends the contents of a file as response body, together with the given status and
    * the header (optional). The descriptor must be a file descriptor (that cannot
    * block). The int64 number is the length
    * of the body.  Response header fields are set as follows:
    * - The [Content-Length] is set to the length of the string.
    * - The [Content-Type] is set to "text/html" unless given by the header.
    *
    * Note that [Content-Range] is not set automatically, even if the file is only
    * partially transferred.
    *
    * If the header object is passed in, these modifications are done 
    * directly in this object as side effect.
    *
    * The function does not send the file immediately, but rather sets the [http_response]
    * object up that the next chunk of the file is added when the send queue becomes
    * empty. This file will be closed when the transfer is done.
   *)

type request_line = http_method * protocol
  (** The method (including the URI), and the HTTP version *)

type req_token =
    [ `Req_header of request_line * http_header * http_response
    | `Req_expect_100_continue
    | `Req_body of data_chunk
    | `Req_trailer of http_trailer
    | `Req_end
    | `Eof
    | `Fatal_error of fatal_error
    | `Bad_request_error of bad_request_error * http_response
    | `Timeout
    ]
    (** A [req_token] represents a textual part of the received request:
      * - [`Req_header] is the full received header. Together with the header,
      *   the corresponding [http_response] object is returned which must
      *   be used to transmit the response.
      * - [`Req_expect_100_continue] is generated when the client expects that the
      *   server sends a "100 Continue" response (or a final status code) now.
      *   One should add [`Resp_info_line resp_100_continue] to the send queue
      *   if the header is acceptable, or otherwise generate an error response. In any
      *   case, the rest of the request must be read until [`Req_end].
      * - [`Req_body] is a part of the request body. The transfer-coding, if any,
      *   is already decoded.
      * - [`Req_trailer] is the received trailer
      * - [`Req_end] indicates the end of the request (the next request may begin
      *   immediately).
      * - [`Eof] indicates the end of the stream
      * - [`Bad_request_error] indicates that the request violated the HTTP protocol
      *   in a serious way and cannot be decoded. It is required to send a 
      *   "400 Bad Request" response. The following token will be [`Eof].
      * - [`Fatal_error] indicates that the connection crashed. 
      *   The following token will be [`Eof].
      * - [`Timeout] means that nothing has been received for a certain amount
      *   of time, and the protocol is in a state that the next request can begin.
      *   The following token will be [`Eof].
      *
      * Note that it is always allowed to [send] tokens to the client. The protocol
      * implementation takes care that the response is transmitted at the right point
      * in time.
     *)

val string_of_req_token : req_token -> string
  (** For debugging *)

exception Recv_queue_empty

(** Configuration values for the HTTP kernel *)
class type http_protocol_config =
object
  method config_max_reqline_length : int
    (** Maximum size of the request line. Longer lines are immediately replied with
      * a "Request URI too long" response. Suggestion: 32768.
     *)

  method config_max_header_length : int
    (** Maximum size of the header, {b including} the request line. Longer headers
      * are treated as attack, and cause the fatal error [`Message_too_long].
      * Suggestion: 65536.
     *)

  method config_max_trailer_length : int
    (** Maximum size of the trailer *)

  method config_limit_pipeline_length : int
    (** Limits the length of the pipeline (= unreplied requests). A value of 0
      * disables pipelining. A value of n allows that another request is
      * received although there are already n unreplied requests.
     *)

  method config_limit_pipeline_size : int
    (** Limits the size of the pipeline in bytes. If the buffered bytes in the
      * input queue exceed this value, the receiver temporarily stops reading
      * more data. The value 0 has the effect that even the read-ahead of
      * data of the current request is disabled. The value (-1) disables the
      * receiver completely (not recommended).
     *)

  method config_announce_server : announcement
    (** Whether to set the [Server] header:
     * - [`Ignore]: The kernel does not touch the [Server] header.
     * - [`Ocamlnet]: Announce this web server as "Ocamlnet/<version>"
     * - [`Ocamlnet_and s]: Announce this web server as [s] and append
     *   the Ocamlnet string.
     * - [`As s]: Announce this web server as [s]
     *)

  method config_suppress_broken_pipe : bool
    (** Whether to suppress [`Broken_pipe] errors. Instead 
      * [`Broken_pipe_ignore] is reported.
     *)

  method config_tls : Netsys_crypto_types.tls_config option
    (** If set, the server enables TLS with the arugment config *)

end


val default_http_protocol_config : http_protocol_config 
  (** Default config:
      - [config_max_reqline_length = 32768]
      - [config_max_header_length = 65536]
      - [config_max_trailer_length = 32768]
      - [config_limit_pipeline_length = 5]
      - [config_limit_pipeline_size = 65536]
      - [config_announce_server = `Ocamlnet]
      - [config_suppress_broken_pipe = false]
      - [config_tls = None]
   *)

class modify_http_protocol_config : 
        ?config_max_reqline_length:int ->
        ?config_max_header_length:int ->
        ?config_max_trailer_length:int ->
        ?config_limit_pipeline_length:int ->
        ?config_limit_pipeline_size:int ->
        ?config_announce_server:announcement ->
        ?config_suppress_broken_pipe:bool ->
        ?config_tls:Netsys_crypto_types.tls_config option ->
        http_protocol_config -> http_protocol_config 
  (** Modifies the passed config object as specified by the optional
      arguments
   *)

(** Allows it to set hooks for {!Nethttpd_kernel.http_protocol} *)
class type http_protocol_hooks =
object
  method tls_set_cache : store:(string -> string -> unit) ->
                         remove:(string -> unit) ->
                         retrieve:(string -> string) ->
                            unit
    (** Sets the functions for accessing the session cache *)
end


(** The core event loop of the HTTP daemon *)
class http_protocol : #http_protocol_config -> Unix.file_descr ->
object
  (** Exchange of HTTP messages
    *
    * In [fd] one must pass the already connected socket. It must be in non-
    * blocking mode.
    *
    * How to use this class: Basically, one invokes [cycle] until the whole
    * message exchange on [fd] is processed. [cycle] receives data from the
    * socket and sends data to the socket. There are two internal queues:
    *
    * The receive queue stores parts of received requests as [req_token].
    * One can take values from the front of this queue by calling [receive].
    *
    * The response queue stores [http_response] objects. Each of the objects
    * corresponds to a request that was received before. This queue is handled
    * fully automatically, but one can watch its length to see whether all responses
    * are actually transmitted over the wire.
    *
    * The basic algorithm to process messages is:
    *
    * {[
    * let rec next_token () =
    *   if proto # recv_queue_len = 0 then (
    *     proto # cycle ();
    *     next_token()
    *   )
    *   else
    *     proto # receive()
    *
    * let cur_token = ref (next_token()) in
    * while !cur_token <> `Eof do
    *   (* Process first token of next request: *)
    *   match !cur_token with
    *    | `Req_header(req_line, header, resp) ->
    *         (* Depending on [req_line], read further tokens until [`Req_end] *)
    *         ...
    *         (* Switch to the first token of the next message: *)
    *         cur_token := next_token()
    *    | `Timeout -> ...
    *    | `Bad_request_error(e,resp) -> 
    *          (* Generate 400 error, send it to [resp] *)
    *          ...
    *          (* Switch to the first token of the next message: *)
    *          cur_token := next_token()
    *    | `Fatal_error e -> failwith "Crash"
    *    | _ -> assert false
    * done;
    * while proto # resp_queue_len > 0 do
    *   proto # cycle ();
    * done;
    * proto # shutdown()
    * ]}
    *
    * See the file [tests/easy_daemon.ml] for a complete implementation of this.
    *
    * As one can see, it is essential to watch the lengths of the queues in order
    * to figure out what has happened during [cycle].
    *
    * When the body of the request is empty, [`Req_body] tokens are omitted.
    * Note that for requests like [GET] that always have an empty body, it is
    * still possible that an errorneous client sends a body, and that [`Req_body]
    * tokens arrive. One must accept and ignore these tokens.
    *
    * Error handling: For serious errors, the connection is immediately aborted.
    * In this case, [receive] returns a [`Fatal_error] token. Note that the
    * queued responses cannot be sent! An example of this is [`Broken_pipe].
    *
    * There is a large class of non-serious errors, esp. format errors
    * in the header and body. It is typical of these errors that one cannot determine
    * the end of the request properly. For this reason, the daemon stops reading
    * further data from the request, but the response queue is still delivered.
    * For these errors, [receive] returns a [`Bad_request_error] token.
    * This token contains a [http_response] object that must be filled with a
    * 400 error response.
   *)

  method hooks : http_protocol_hooks
    (** Set hooks *)

  method cycle : ?block:float -> unit -> unit
    (** Looks at the file descriptor. If there is data to read from the descriptor,
      * and there is free space in the input buffer, additional data is read into
      * the buffer. It is also tried to interpret the new data as [req_token]s,
      * and if possible, new [req_token]s are appended to the receive queue.
      *
      * If the response queue has objects, and there is really data one can send,
      * and if the socket allows one to send data, it is tried to send as much
      * data as possible.
      *
      * The option [block] (default: 0) can be set to wait until data
      * can be exchanged with the socket. This avoids busy waiting. The number
      * is the duration in seconds to wait until the connection times out
      * (0 means not to wait at all, -1 means to wait infinitely). When a timeout
      * happens, and there is nothing to send, and the last request was fully
      * processed, [receive] will simply return [`Timeout] (i.e. when 
      * [waiting_for_next_message] is [true]). Otherwise, the
      * fatal error [`Timeout] is generated.
     *)

  method receive : unit -> req_token
    (** Returns the first [req_token] from the receive queue. Raises
      * [Recv_queue_empty] when the queue is empty (= has no new data)
     *)

  method peek_recv : unit -> req_token
    (** Peeks the first token, but leaves it in the queue.
      * Raises [Recv_queue_empty] when the queue is empty.
     *)

  method recv_queue_len : int
    (** Returns the length of the receive queue (number of tokens) *)

  method resp_queue_len : int
    (** Returns the length of the internal response queue (number of [http_response]
      * objects that have not yet fully processed)
     *)

  method pipeline_len : int
    (** Returns the number of unanswered requests = Number of received [`Req_end] tokens
      * minus number of responses in state [`Processed]. Note that [pipeline_len]
      * can become [-1] when bad requests are responded.
     *)

  method recv_queue_byte_size : int
    (** Returns the (estimated) size of the input queue in bytes *)

  method waiting_for_next_message : bool
    (** Whether the kernel is currently waiting for the beginning of a new
      * arriving HTTP request. This is [false] while the request is being
      * received.
     *)

  method input_timeout_class : [ `Normal | `Next_message | `None ]
    (** Suggests the calculation of a timeout value for input:
      * - [`Normal]: The normal timeout value applies
      * - [`Next_message]: The timeout value applies while waiting for the next message
      * - [`None]: The connection is output-driven, no input timeout value
     *)

  method shutdown : unit -> unit
    (** Shuts the socket down. Note: the descriptor is not closed.

        TLS: You need to further [cycle] until XXX.
     *)

  method timeout : unit -> unit
    (** Process a timeout condition as [cycle] does *)

  method abort : fatal_error -> unit
    (** Stops the transmission of data. The receive queue is cleared and filled
      * with the two tokens [`Fatal_error] and [`Eof]. 
      * The response queue is cleared. The [cycle]
      * method will return immediately without doing anything.
     *)

  method fd : Unix.file_descr
    (** Just returns the socket *)

  method do_input : bool
    (** Returns [true] iff the protocol engine is interested in new data from the
      * socket. Returns [false] after EOF and after errors.
     *)

  method do_output : bool
    (** Returns [true] iff the protocol engine has data to output to the socket *)

  method resp_queue_filled : bool
    (** Whether there is data to send in the internal output queue. If
        TLS is enabled, this is not always the same as [do_output].
     *)

  method need_linger : bool
    (** Returns [true] when a lingering close operation is needed to reliably shut
      * down the socket. In many cases, this expensive operation is not necessary.
      * See the class [lingering_close] below.
     *)

  method tls_session_props : Nettls_support.tls_session_props option
    (** If TLS is enabled, this returns the session properties. These
        are first available after the TLS handshake.
     *)
   
  method config : http_protocol_config
    (** Just returns the configuration *)

  method test_coverage : string list
    (** For testing: returns a list of tokens indicating into which cases the program
      * ran.
     *)

end


(** Closes a file descriptor using the "lingering close" algorithm.
    The optional [preclose] function is called just before [Unix.close].
 *)
class lingering_close : ?preclose:(unit->unit) -> Unix.file_descr ->
object
  (** Closes a file descriptor using the "lingering close" algorithm
    * 
    * Usage:
    * {[ while lc # lingering do lc # cycle ~block:true () done ]}
   *)

  method cycle : ?block:bool -> unit -> unit
    (** Reads data from the file descriptor until EOF or until a fixed timeout
      * is over. Finally, the descriptor is closed. If [block] is set, the method
      * blocks until data is available. (Default: [false])
     *)

  method lingering : bool
    (** Whether the socket is still lingering *)

  method fd : Unix.file_descr
    (** The file descriptor *)
end


(** {1 Debugging} *)

module Debug : sig
  val enable : bool ref
    (** Enables {!Netlog}-style debugging of this module
     *)
end

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml