Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: nethttpd_engine.mli 2195 2015-01-01 12:23:39Z gerd $
 *
 *)

(*
 * Copyright 2005 Baretta s.r.l. and Gerd Stolpmann
 *
 * This file is part of Nethttpd.
 *
 * Nethttpd is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Nethttpd is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Nethttpd; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)

(** {1 The event-based encapsulation of the HTTP daemon}
  * 
  * This is a user-friendlier encapsulation of the HTTP daemon. It uses
  * the engine module defined in [Uq_engines].
 *)

(* Integration into event-based server design *)

open Nethttpd_types

type engine_req_state =
    [ `Received_header
    | `Receiving_body
    | `Received_request 
    | `Finishing
    ]

class type http_engine_config =
object
  inherit Nethttpd_reactor.http_processor_config

  method config_input_flow_control : bool
    (** If [true], the engine stops reading input tokens from the HTTP kernel when
      * there is data in the input channel of the engine not yet read. If [false],
      * all available input tokens are fetched from the kernel and buffered up
      * in the input channel.
      *
      * In general, this {b should} be set to [true]. However, this is only possible
      * when the user of the engine is prepared for flow control. In particular,
      * all data contained in the input channel must be immediately read, or else
      * the engine blocks. By calling [input_ch_async # request_notification], the
      * user can be notified when there is data to read.
      *
      * When set to [false], the engine never blocks, but the price is that the
      * input channel may become as large as needed to store the whole request.
      *
      * The option [config_limit_pipeline_size] does not have any effect for engines.
     *)

  method config_output_flow_control : bool
    (** If [true], the engine signals the user when there is already enough data
      * to output, and no more output should be generated. The user can query
      * this state by calling [output_ch_async # can_output], and react
      * accordingly. The user can also ignore this signal, and the output channel
      * buffers all data.
      *
      * If [false], the mentioned method [can_output] returns always [true]. This
      * turns off flow control in the case it is implemented by the user of the
      * engine, but actually not wanted.
      *
      * The internal processing of data is not affected by this configuration option.
      * In doubt, set it to [true].
     *)

end


val default_http_engine_config : http_engine_config
  (** The default config:
       - [config_input_flow_control=false]
       - [config_output_flow_control=true]
   *)

class modify_http_engine_config :
        ?modify_http_protocol_config:
           (Nethttpd_kernel.http_protocol_config -> 
              Nethttpd_kernel.http_protocol_config) ->
        ?modify_http_processor_config:
           (Nethttpd_reactor.http_processor_config -> 
              Nethttpd_reactor.http_processor_config) ->
        ?config_input_flow_control:bool ->
        ?config_output_flow_control:bool ->
         http_engine_config ->  http_engine_config
  (** Modifies the passed config object as specified by the optional
      arguments.

      [modify_http_protocol_config] and [modify_http_processor_config]:
      These functions can be used to modify the
      parts of the config object that are inherited from [http_protocol_config]
      and [http_processor_config], respectively:
      For example:

      {[
        let new_cfg =
          new modify_http_engine_config
            ~modify_http_protocol_config:
               (new Nethttpd_kernel.modify_http_protocol_config
                  ~config_suppress_broken_pipe:true)
            ~modify_http_processor_config:
               (new Nethttpd_reactor.modify_http_processor_config
                  ~config_timeout:15.0)
            old_cfg
      ]}
   *)


class type extended_async_environment =
object
  inherit extended_environment

  (** Environment also providing asynchronous views to I/O *)

  method input_ch_async : Uq_engines.async_in_channel
    (** This is the [input_ch] channel taken as asynchonous channel. This type of
      * channel indicates when data is available to read, and also sends notifications.
      * Note that this is only an alternate interface of the [input_ch] object.
      *
      * The method [can_input] returns true when there is at least one byte of
      * the body to read, or the EOF has been seen. The channel buffers any arriving
      * data (which can be limited in amount by [config_pipeline_size]).
      *
      * The behaviour of this channel is influenced by the configuration option
      * [config_input_flow_control].
     *)

  method output_ch_async : Uq_engines.async_out_channel
    (** This is the [output_ch] channel taken as asynchronous channel. This type of
      * channel indicates when space is available for further output, and also sends
      * notifications. 
      * Note that this is only an alternate interface of the [output_ch] object.
      *
      * The method [can_output] returns [true] only when the internal buffer is empty,
      * i.e. all data have been transmitted to the client. Independent of this, the
      * channel buffers all data written to it.
      *
      * The behaviour of this channel is influenced by the configuration option
      * [config_output_flow_control].
     *)
end


class type http_request_header_notification =
object
  (** Notification that a new request header has arrived
    *
    * This object notifies the user that a new request header has arrived.
    * The header is accessible by the [environment] object. The channels
    * also contained in this object are locked at this moment. The user must
    * now either call [schedule_accept_body] or [schedule_reject_body]. The
    * user will get a second notification (a [http_request_notification], below)
    * when the request body has completely arrived (in case of acceptance), or
    * immediately (in case of rejection). One can also call [schedule_finish]
    * at any time to drop the current request.
   *)
  
  method req_state : engine_req_state
    (** Returns the request processing state which is [`Received_header] at the
      * moment when this notification is delivered.
     *)

  method environment : extended_async_environment
    (** The request environment. Depending on the request processing state, parts
      * of the environment are already set up or still unaccessible ("locked").
      * In the state [`Received_header] only the request header and the 
      * derived fields are accessible, and the input and output channels are
      * locked. In the state [`Receiving_body] the input channel is unlocked,
      * but it is not yet filled (reading from it may cause the exception 
      * [Buffer_underrun]). The output channel remains locked.
      * In the state [`Received_request], the input channel is unlocked and filled
      * with data, and the output channel is unlocked, too.
      *
      * This environment is not fully CGI-compatible. In particular, the following
      * differences exist:
      * - There is no [cgi_path_info] and no [cgi_path_translated].
      * - The user is always unauthenticated.
      * - The [Status] response header works as in CGI. The [Location] header, however,
      *   must be a full URL when set (only browser redirects)
      * - When the request body is transmitted by chunked encoding, the header
      *   [Content-Length] is not set. In CGI this is interpreted as missing body.
      *   It is unlikely that clients send requests with chunked encoding, as this
      *   may cause interoperability problems anyway.
      *   
     *)

  method schedule_accept_body : on_request:(http_request_notification -> unit) ->
                               ?on_error:(unit -> unit) -> 
                               unit -> unit
    (** Schedules that the request body is accepted. In terms of HTTP, this sends the
      * "100 Continue" response when necessary. One can reply with a positive or
      * negative message.
      *
      * This method returns immediately, and sets callbacks for certain events.
      * When the body has completely arrived (or is empty), the function 
      * [on_request] is called back. The argument is the full request notification
      * (see below).
      *
      * When the request is dropped for some reason, [on_error] is called back instead.
      * This can be used to free resources, for example.
      *
      * Neither of the callbacks must raise exceptions.
     *)

  method schedule_reject_body : on_request:(http_request_notification -> unit) ->
                               ?on_error:(unit -> unit) -> 
                               unit -> unit
    (** Schedules that the request body is rejected. In terms of HTTP, this prevents
      * sending the "100 Continue" response. Any arriving request body is silently
      * discarded. One should immediately reply with an error mesage.
      * negative message.
      *
      * This method returns immediately, and sets callbacks for certain events.
      * When the body has completely arrived (or is empty), the function 
      * [on_request] is called back. The argument is the full request notification
      * (see below).
      *
      * When the request is dropped for some reason, [on_error] is called back instead.
      * This can be used to free resources, for example.
      *
      * Neither of the callbacks must raise exceptions.
     *)

  method schedule_finish : unit -> unit
    (** Schedules that the request is finished. This method should be called after
      * the regular processing of the request to ensure that the HTTP protocol
      * is fulfilled. If the request body has not been
      * fully read, this is now done, and its data are dropped. If the response
      * is incomplete, it is completed. If the error is not recoverable, a "Server
      * Error" is generated.
     *)

end


and http_request_notification =
object
  (** Notification that the whole request has arrived
    *
    * This object notifies the user that the request has fully arrived (including
    * the body if accepted), and can now be responded. The [environment] is the
    * same as in the request header notification, but the channels are now
    * unlocked.
   *)

  method req_state : engine_req_state
    (** Returns the request processing state which is [`Received_request] at the
      * moment when this notification is delivered.
     *)

  method environment : extended_async_environment
    (** The request environment. See above. *)

  method schedule_finish : unit -> unit
    (** Schedules that the request is finished. See above. *)

end


class http_engine : 
                 ?config_hooks:(Nethttpd_kernel.http_protocol_hooks -> unit) ->
                 on_request_header:(http_request_header_notification -> unit) ->
                    unit -> 
                    #http_engine_config -> Unix.file_descr -> 
                    Unixqueue.unix_event_system ->
                      [unit] Uq_engines.engine
  (** This engine processes the requests arriving on the file descriptor using
    * the Unix event system. Whenever a new request header arrives, the function
    * [on_request_header] is called back, and must handle the request.
    *
    * Unless aborted using the [abort] method, this engine is always successful.
    * Errors are logged, but not returned as result.
    *
    * The file descriptor is closed after processing all HTTP requests and
    * responses. It is also closed on error and when the engine is aborted.
    *
    * An aborted engine does not try to clean up resources external to the 
    * engine, e.g. by calling the [on_error] functions. This is up to the user.
   *)


class type http_engine_processing_config =
object
  method config_synch_input : 
           (Netchannels.in_obj_channel -> unit) ->
           Uq_engines.async_in_channel ->
           unit
    (** The "input synchronizer": It is called as [obj # config_synch_input f ch]
      * to create a synchronous input channel from an asynchronous one, [ch].
      * The function [f] must be called back by the synchronizer when synchronisation
      * is established, and with the synchronous channel [ch'] as argument.
      * In particular, the task of the synchronizer is to turn blocking reads of
      * [ch'] into non-blocking reads of [ch]. In general there are two ways of
      * implementation:
      * - Buffer all input from [ch] until the end of the channel is reached,
      *   then call [f] with a wrapper channel [ch'] that just reads from the
      *   buffer.
      * - Run [f] in a different thread that blocks whenever there is nothing to
      *   read from [ch]. 
      *
      * Both implementations are allowed, i.e. {b it is allowed that [f] runs in
      * a different thread}.
      *
      * CHECK: How to handle exceptions raised from [f]? Idea: [f] is obliged to
      * close [ch'] in this case, even if [ch] is not yet at the end. The rest of
      * exception handling is up to the user. - The complementary must also be true:
      * When there is an error in the engine, [ch] must be closed to signal the
      * other thread that we have a problem.
     *)

  method config_synch_output : 
           (Netchannels.out_obj_channel -> unit) ->
           Uq_engines.async_out_channel ->
           unit
    (** The "output synchronizer": It is called as [obj # config_synch_output f ch]
      * to create a synchronous output channel from an asynchronous one, [ch].
      * The function [f] must be called back by the synchronizer when synchronisation
      * is established, and with the synchronous channel [ch'] as argument.
      * In particular, the task of the synchronizer is to turn blocking writes to
      * [ch'] into non-blocking writes to [ch]. In general there are two ways of
      * implementation:
      * - Call [f], then buffer all output to [ch'] until the end of the channel is
      *   reached, and finally output the contents of the buffer in an asynchronous
      *   way.
      * - Run [f] in a different thread that blocks whenever there is no space to
      *   write to [ch]. 
      *
      * Both implementations are allowed, i.e. {b it is allowed that [f] runs in
      * a different thread}.
      *
      * CHECK: Exceptions.
     *)
end

class buffering_engine_processing_config : http_engine_processing_config
  (** Implements the synchronisation by buffering *)

class type http_engine_processing_context =
object

  method engine : unit Uq_engines.engine
    (** The engine doing HTTP *)

end

val process_connection :
      ?config_hooks:(Nethttpd_kernel.http_protocol_hooks -> unit) ->
      #Nethttpd_reactor.http_processor_config ->
      #http_engine_processing_config ->
      Unix.file_descr ->
      Unixqueue.unix_event_system ->
      'a http_service ->
        http_engine_processing_context
  (** Sets up an engine that processes all requests using the service description.
    * This function returns immediately, one needs to [Unixqueue.run] the event
    * system to start the engine.
    *
    * The passed [http_engine_processing_config] is crucial for good performance.
    * XXX
   *)

(** {1 Debugging} *)

module Debug : sig
  val enable : bool ref
    (** Enables {!Netlog}-style debugging of this module
     *)
end

This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml