(* $Id: nethttpd_engine.mli 1411 2010-02-14 19:49:46Z gerd $
*
*)
(*
* Copyright 2005 Baretta s.r.l. and Gerd Stolpmann
*
* This file is part of Nethttpd.
*
* Nethttpd is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Nethttpd is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Nethttpd; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
(** {1 The event-based encapsulation of the HTTP daemon}
*
* This is a user-friendlier encapsulation of the HTTP daemon. It uses
* the engine module defined in [Uq_engines].
*)
(* Integration into event-based server design *)
open Nethttpd_types
type engine_req_state =
[ `Received_header
| `Receiving_body
| `Received_request
| `Finishing
]
class type http_engine_config =
object
inherit Nethttpd_reactor.http_processor_config
method config_input_flow_control : bool
(** If [true], the engine stops reading input tokens from the HTTP kernel when
* there is data in the input channel of the engine not yet read. If [false],
* all available input tokens are fetched from the kernel and buffered up
* in the input channel.
*
* In general, this {b should} be set to [true]. However, this is only possible
* when the user of the engine is prepared for flow control. In particular,
* all data contained in the input channel must be immediately read, or else
* the engine blocks. By calling [input_ch_async # request_notification], the
* user can be notified when there is data to read.
*
* When set to [false], the engine never blocks, but the price is that the
* input channel may become as large as needed to store the whole request.
*
* The option [config_limit_pipeline_size] does not have any effect for engines.
*)
method config_output_flow_control : bool
(** If [true], the engine signals the user when there is already enough data
* to output, and no more output should be generated. The user can query
* this state by calling [output_ch_async # can_output], and react
* accordingly. The user can also ignore this signal, and the output channel
* buffers all data.
*
* If [false], the mentioned method [can_output] returns always [true]. This
* turns off flow control in the case it is implemented by the user of the
* engine, but actually not wanted.
*
* The internal processing of data is not affected by this configuration option.
* In doubt, set it to [true].
*)
end
val default_http_engine_config : http_engine_config
(** The default config:
- [config_input_flow_control=false]
- [config_output_flow_control=true]
*)
class modify_http_engine_config :
?modify_http_protocol_config:
(Nethttpd_kernel.http_protocol_config ->
Nethttpd_kernel.http_protocol_config) ->
?modify_http_processor_config:
(Nethttpd_reactor.http_processor_config ->
Nethttpd_reactor.http_processor_config) ->
?config_input_flow_control:bool ->
?config_output_flow_control:bool ->
http_engine_config -> http_engine_config
(** Modifies the passed config object as specified by the optional
arguments.
[modify_http_protocol_config] and [modify_http_processor_config]:
These functions can be used to modify the
parts of the config object that are inherited from [http_protocol_config]
and [http_processor_config], respectively:
For example:
{[
let new_cfg =
new modify_http_engine_config
~modify_http_protocol_config:
(new Nethttpd_kernel.modify_http_protocol_config
~config_suppress_broken_pipe:true)
~modify_http_processor_config:
(new Nethttpd_reactor.modify_http_processor_config
~config_timeout:15.0)
old_cfg
]}
*)
class type extended_async_environment =
object
inherit extended_environment
(** Environment also providing asynchronous views to I/O *)
method input_ch_async : Uq_engines.async_in_channel
(** This is the [input_ch] channel taken as asynchonous channel. This type of
* channel indicates when data is available to read, and also sends notifications.
* Note that this is only an alternate interface of the [input_ch] object.
*
* The method [can_input] returns true when there is at least one byte of
* the body to read, or the EOF has been seen. The channel buffers any arriving
* data (which can be limited in amount by [config_pipeline_size]).
*
* The behaviour of this channel is influenced by the configuration option
* [config_input_flow_control].
*)
method output_ch_async : Uq_engines.async_out_channel
(** This is the [output_ch] channel taken as asynchronous channel. This type of
* channel indicates when space is available for further output, and also sends
* notifications.
* Note that this is only an alternate interface of the [output_ch] object.
*
* The method [can_output] returns [true] only when the internal buffer is empty,
* i.e. all data have been transmitted to the client. Independent of this, the
* channel buffers all data written to it.
*
* The behaviour of this channel is influenced by the configuration option
* [config_output_flow_control].
*)
end
class type http_request_header_notification =
object
(** Notification that a new request header has arrived
*
* This object notifies the user that a new request header has arrived.
* The header is accessible by the [environment] object. The channels
* also contained in this object are locked at this moment. The user must
* now either call [schedule_accept_body] or [schedule_reject_body]. The
* user will get a second notification (a [http_request_notification], below)
* when the request body has completely arrived (in case of acceptance), or
* immediately (in case of rejection). One can also call [schedule_finish]
* at any time to drop the current request.
*)
method req_state : engine_req_state
(** Returns the request processing state which is [`Received_header] at the
* moment when this notification is delivered.
*)
method environment : extended_async_environment
(** The request environment. Depending on the request processing state, parts
* of the environment are already set up or still unaccessible ("locked").
* In the state [`Received_header] only the request header and the
* derived fields are accessible, and the input and output channels are
* locked. In the state [`Receiving_body] the input channel is unlocked,
* but it is not yet filled (reading from it may cause the exception
* [Buffer_underrun]). The output channel remains locked.
* In the state [`Received_request], the input channel is unlocked and filled
* with data, and the output channel is unlocked, too.
*
* This environment is not fully CGI-compatible. In particular, the following
* differences exist:
* - There is no [cgi_path_info] and no [cgi_path_translated].
* - The user is always unauthenticated.
* - The [Status] response header works as in CGI. The [Location] header, however,
* must be a full URL when set (only browser redirects)
* - When the request body is transmitted by chunked encoding, the header
* [Content-Length] is not set. In CGI this is interpreted as missing body.
* It is unlikely that clients send requests with chunked encoding, as this
* may cause interoperability problems anyway.
*
*)
method schedule_accept_body : on_request:(http_request_notification -> unit) ->
?on_error:(unit -> unit) ->
unit -> unit
(** Schedules that the request body is accepted. In terms of HTTP, this sends the
* "100 Continue" response when necessary. One can reply with a positive or
* negative message.
*
* This method returns immediately, and sets callbacks for certain events.
* When the body has completely arrived (or is empty), the function
* [on_request] is called back. The argument is the full request notification
* (see below).
*
* When the request is dropped for some reason, [on_error] is called back instead.
* This can be used to free resources, for example.
*
* Neither of the callbacks must raise exceptions.
*)
method schedule_reject_body : on_request:(http_request_notification -> unit) ->
?on_error:(unit -> unit) ->
unit -> unit
(** Schedules that the request body is rejected. In terms of HTTP, this prevents
* sending the "100 Continue" response. Any arriving request body is silently
* discarded. One should immediately reply with an error mesage.
* negative message.
*
* This method returns immediately, and sets callbacks for certain events.
* When the body has completely arrived (or is empty), the function
* [on_request] is called back. The argument is the full request notification
* (see below).
*
* When the request is dropped for some reason, [on_error] is called back instead.
* This can be used to free resources, for example.
*
* Neither of the callbacks must raise exceptions.
*)
method schedule_finish : unit -> unit
(** Schedules that the request is finished. This method should be called after
* the regular processing of the request to ensure that the HTTP protocol
* is fulfilled. If the request body has not been
* fully read, this is now done, and its data are dropped. If the response
* is incomplete, it is completed. If the error is not recoverable, a "Server
* Error" is generated.
*)
end
and http_request_notification =
object
(** Notification that the whole request has arrived
*
* This object notifies the user that the request has fully arrived (including
* the body if accepted), and can now be responded. The [environment] is the
* same as in the request header notification, but the channels are now
* unlocked.
*)
method req_state : engine_req_state
(** Returns the request processing state which is [`Received_request] at the
* moment when this notification is delivered.
*)
method environment : extended_async_environment
(** The request environment. See above. *)
method schedule_finish : unit -> unit
(** Schedules that the request is finished. See above. *)
end
class http_engine : on_request_header:(http_request_header_notification -> unit) ->
unit ->
#http_engine_config -> Unix.file_descr ->
Unixqueue.unix_event_system ->
[unit] Uq_engines.engine
(** This engine processes the requests arriving on the file descriptor using
* the Unix event system. Whenever a new request header arrives, the function
* [on_request_header] is called back, and must handle the request.
*
* Unless aborted using the [abort] method, this engine is always successful.
* Errors are logged, but not returned as result.
*
* The file descriptor is closed after processing all HTTP requests and
* responses. It is also closed on error and when the engine is aborted.
*
* An aborted engine does not try to clean up resources external to the
* engine, e.g. by calling the [on_error] functions. This is up to the user.
*)
class type http_engine_processing_config =
object
method config_synch_input :
(Netchannels.in_obj_channel -> unit) ->
Uq_engines.async_in_channel ->
unit
(** The "input synchronizer": It is called as [obj # config_synch_input f ch]
* to create a synchronous input channel from an asynchronous one, [ch].
* The function [f] must be called back by the synchronizer when synchronisation
* is established, and with the synchronous channel [ch'] as argument.
* In particular, the task of the synchronizer is to turn blocking reads of
* [ch'] into non-blocking reads of [ch]. In general there are two ways of
* implementation:
* - Buffer all input from [ch] until the end of the channel is reached,
* then call [f] with a wrapper channel [ch'] that just reads from the
* buffer.
* - Run [f] in a different thread that blocks whenever there is nothing to
* read from [ch].
*
* Both implementations are allowed, i.e. {b it is allowed that [f] runs in
* a different thread}.
*
* CHECK: How to handle exceptions raised from [f]? Idea: [f] is obliged to
* close [ch'] in this case, even if [ch] is not yet at the end. The rest of
* exception handling is up to the user. - The complementary must also be true:
* When there is an error in the engine, [ch] must be closed to signal the
* other thread that we have a problem.
*)
method config_synch_output :
(Netchannels.out_obj_channel -> unit) ->
Uq_engines.async_out_channel ->
unit
(** The "output synchronizer": It is called as [obj # config_synch_output f ch]
* to create a synchronous output channel from an asynchronous one, [ch].
* The function [f] must be called back by the synchronizer when synchronisation
* is established, and with the synchronous channel [ch'] as argument.
* In particular, the task of the synchronizer is to turn blocking writes to
* [ch'] into non-blocking writes to [ch]. In general there are two ways of
* implementation:
* - Call [f], then buffer all output to [ch'] until the end of the channel is
* reached, and finally output the contents of the buffer in an asynchronous
* way.
* - Run [f] in a different thread that blocks whenever there is no space to
* write to [ch].
*
* Both implementations are allowed, i.e. {b it is allowed that [f] runs in
* a different thread}.
*
* CHECK: Exceptions.
*)
end
class buffering_engine_processing_config : http_engine_processing_config
(** Implements the synchronisation by buffering *)
class type http_engine_processing_context =
object
method engine : unit Uq_engines.engine
(** The engine doing HTTP *)
end
val process_connection :
#Nethttpd_reactor.http_processor_config ->
#http_engine_processing_config ->
Unix.file_descr ->
Unixqueue.unix_event_system ->
'a http_service ->
http_engine_processing_context
(** Sets up an engine that processes all requests using the service description.
* This function returns immediately, one needs to [Unixqueue.run] the event
* system to start the engine.
*
* The passed [http_engine_processing_config] is crucial for good performance.
* XXX
*)
(** {1 Debugging} *)
module Debug : sig
val enable : bool ref
(** Enables {!Netlog}-style debugging of this module
*)
end