(* Copyright 2010 Gerd Stolpmann This file is part of Plasma, a distributed filesystem and a map/reduce computation framework. Unless you have a written license agreement with the copyright holder (Gerd Stolpmann), the following terms apply: Plasma is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Plasma is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Foobar. If not, see <http://www.gnu.org/licenses/>. *) (* $Id: plasma_filebuf.mli 434 2011-10-07 12:35:20Z gerd $ *) (** Buffer for [read] and [write] *) type errno = Plasma_util.errno type strmem = [`String of string | `Memory of Netsys_mem.memory] type buf_state = [ `Invalid | `Clean | `Dirty | `Reading of int64 option Uq_engines.engine | `Writing of int64 option Uq_engines.engine | `Written | `Dropped ] (** Buffer states: - an [`Invalid] buffer is reserved for a certain block but it is not yet filled with any meaningful data - a [`Clean] buffer contains valid data of a file which is unmodified - a [`Dirty] buffer contains modified data of a file (which still needs to be written) - a [`Reading] buffer is being filled with data. For the user this is very much like [`Invalid] (data are unusable), but the argument engine terminates when the buffer changes state again. - a [`Writing] buffer is being written out. For the user this is very much like [`Clean] (data can be read but not modified again). The argument engine terminates when the buffer changes state again. - the [`Written] state is used after [`Writing] as long as it is still unclear whether the write is successful or not. On success, the buffer can be set to [`Clean] again. On error, it will go back to [`Dirty]. - a [`Dropped] buffer is reused for a different purpose *) type buffer = { buf_inode : int64; buf_index : int64; buf : Netsys_mem.memory; buf_ord : int; (* ordinal number of the buffer *) mutable buf_seqno : int64; (* for cache validation only *) mutable buf_state : buf_state; mutable buf_dirty : bool; mutable buf_delayed_drop : bool; mutable buf_flushing : bool; } (** Buffer descriptor. The user of this API should never modify entries of it. A buffer descriptor is handed out for the lifetime of a buffer. The states are: - A freshly designated buffer is in [`Invalid] state - While reading the block the buffer is in [`Reading] state - If [`Reading] is successful and there is no pressure to reassign the buffer immediately, it becomes [`Clean]. A [`Clean] buffer can be dropped by the system at any time. - When the contents are modified the buffer becomes [`Dirty] - Dirty buffers can be written to disk. The buffer enters [`Writing] state. Note that it is not allowed to modify the buffer while in [`Writing] state - one must wait for the completion of the write first - A written buffer is set to [`Written]. This is a special state meaning that the write is done but not yet committed. - The contents of a written buffer can be modified. This is only recorded by the [buf_dirty] flag. - After the commit of the write, the [`Written] buffer becomes [`Clean] or [`Dirty], depending on the argument flag. If the commit is not successful the buffer is set to [`Dirty]. When a clean buffer is dropped, the state in the descriptor is set to [`Dropped]. At the same time, the buffer memory is reused for a different descriptor. The [`Invalid] state must not be kept for longer than a moment. If there are several requests for the same block, and the buffer is [`Invalid] the other requests can only use busy waiting to handle this case. The [buf_dirty] flag can be set in some contexts to indicate that there was a data modification and [`Dirty] should be entered next. *) type flush_request = { flush_inode : int64; mutable flush_min_eof : int64; mutable flush_min_mtime : Plasma_rpcapi_aux.time; mutable flush_index_list : Plasma_util.I64Set.t; } type buffer_system val create_buffer_system : int -> int -> Unixqueue.event_system -> buffer_system (** [create_buffer_system n_bufs blocksize esys] *) val mem_size : buffer_system -> int (** size of buffers in bytes *) val blit_from_buffer : buffer_system -> buffer -> int -> strmem -> int -> int -> unit val blit_to_buffer : buffer_system -> strmem -> int -> buffer -> int -> int -> unit val clean_access : buffer_system -> buffer -> unit (** a clean buffer is continued to be used as clean buffer after a read access *) val dirty_access : buffer_system -> buffer -> int64 -> unit (** A clean or invalid buffer is set to dirty. The int is the EOF position that should now be ensured at least *) val switch_to_reading : buffer_system -> buffer -> int64 option Uq_engines.engine -> int64 option -> unit (** [switch_to_reading sys b e eof_opt]: The buffer is switched to [`Reading] state (from either [`Invalid], [`Clean] or [`Dirty]). The engine [e] must be in a non-final state. When the engine terminates, this is taken as indication that the read is finished. The return value is [seqno_opt]. If [Some seqno] the read has been successful and the buffer is filled with data of this sequence number of the file. The return value [None] means error. This function arranges that the buffer is switched to a follow-up state when the engine is finished: - if the buffer is scheduled for being dropped, this is done now - the buffer is also dropped when the read is non-successful - if [eof_opt <> None] the buffer becomes [`Dirty], and the EOF value is considered for the flush request - if there is pressure for memory the buffer is reassigned - otherwise the buffer becomes [`Clean] *) val switch_to_writing : buffer_system -> buffer -> int64 option Uq_engines.engine -> unit (** The buffer is switched to [`Writing] state (from [`Dirty]). The argument engine must be in a non-final state. When the engine terminates, this is taken as indication that the buffer is written out. This function arranges that the buffer state is switched again when this occurs. The follow-up state is always [`Written]. The engine returns the new sequence number. A [`Written] buffer should be committed, and then one of three functions needs to be called: - [write_committed]: if the commit has been successful - [write_erroneous]: if an error occurred - [write_cancelled]: if the block will no longer be written because there was an error for another block of the same transaction *) val schedule_drop : buffer_system -> buffer -> unit (** If the buffer can be immediately invalidated this is done. Otherwise the [buf_delayed_drop] flag is set, and and the end of the ongoing read/write the buffer will be set to [`Dropped]. *) val schedule_drop_inode : buffer_system -> int64 -> unit (** Same for a whole inode. It is no error if there is no buffer for this inode *) val lookup_buffer : buffer_system -> int64 -> int64 -> buffer (** [lookup_buffer sys inode index]: Looks the buffer up for [inode] and [index], or raises [Not_found] if there is none yet, or the existing descriptor is in [`Dropped] state. *) val request_buffer_e : buffer_system -> int64 -> int64 -> buffer option Uq_engines.engine (** [request_buffer_e sys inode index]: This function is to be used when [lookup_buffer] raises [Not_found] to get a new buffer. The new buffer is initially in [`Invalid] state. Note that competing engine-driven threads can see this state in a certain moment. Note that one should immediately set the buffer to a different state when the engine is done. The function returns [None] via the engine when the buffer already exists or when a concurrent request was faster (if there are several calls of [request_buffer_e] for the same inode/index pair, only one call gets the buffer, and the other calls see [None]). The function also returns [None] when an error is recorded for this inode (with [write_erroneous]). Because of the latter, it is recommended to [reset_inode_error] just before requesting a buffer. *) val select_for_flush : buffer_system -> int64 -> flush_request (** [select_for_flush sys inode]: Returns a list of blocks that need to be written. Once a block is returned by [select_for_flush] it is not again returned in future calls of this function unless it is set to [`Dirty] again after leaving the [`Written] state. The [buf_flushing] flag is used for managing this. If [buf_flushing] it is prevented that a call of [dirty_access] records the block again for flushing. This flag is cleared when entering [`Writing]. This function may raise [Not_found] if nothing appropriate is found. *) val select_inodes : buffer_system -> int64 list (** Returns the inodes that can be flushed, in the order of precedence. *) val get_flush_min_eof : buffer_system -> int64 -> int64 val get_flush_min_mtime : buffer_system -> int64 -> Plasma_rpcapi_aux.time (** Return the values from the flush record (or [Not_found]) *) val write_committed : buffer_system -> buffer -> unit (** Records that the write is committed: If the buffer is in [`Written false] state it is reset to [`Clean] (or reassigned for a different purpose). If it is in [`Written true] state it is set to [`Dirty]. It is an error to call this function for a different state. *) val write_cancelled : buffer_system -> buffer -> unit (** The write is cancelled. The buffer becomes dirty again *) val write_erroneous : buffer_system -> int64 -> errno -> unit (** Records a write error. This should be called after [write_cancelled] with the error code. Any pending [request_buffer_e] for this inode is interrupted, so [None] is returned. *) val release_flush_request : buffer_system -> flush_request -> unit (** Checks whether the buffers are in the right state after finishing or aborting a flush request. In particular, all buffers are set to [`Dirty] that are still in [`Written] state. Also, the [buf_flushing] flag is cleared. *) val inode_error : buffer_system -> int64 -> errno option (** Whether there is a write error for this inode *) val reset_inode_error : buffer_system -> int64 -> unit (** Resets the error *) val n_waiting : buffer_system -> int (** The number of buffer requests that can be currently not satisfied *) val dump_buffers : buffer_system -> unit (** Writes the buffer table to log (debug level) *) val max_time : Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time (** The max of two time structs *)