(*
Copyright 2010 Gerd Stolpmann
This file is part of Plasma, a distributed filesystem and a
map/reduce computation framework. Unless you have a written license
agreement with the copyright holder (Gerd Stolpmann), the following
terms apply:
Plasma is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Plasma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Foobar. If not, see <http://www.gnu.org/licenses/>.
*)
(* $Id: plasma_filebuf.mli 434 2011-10-07 12:35:20Z gerd $ *)
(** Buffer for [read] and [write] *)
type errno = Plasma_util.errno
type strmem = [`String of string | `Memory of Netsys_mem.memory]
type buf_state =
[ `Invalid
| `Clean
| `Dirty
| `Reading of int64 option Uq_engines.engine
| `Writing of int64 option Uq_engines.engine
| `Written
| `Dropped
]
(** Buffer states:
- an [`Invalid] buffer is reserved for a certain block but it is
not yet filled with any meaningful data
- a [`Clean] buffer contains valid data of a file which is unmodified
- a [`Dirty] buffer contains modified data of a file (which still needs
to be written)
- a [`Reading] buffer is being filled with data. For the user this is
very much like [`Invalid] (data are unusable), but the argument
engine terminates when the buffer changes state again.
- a [`Writing] buffer is being written out. For the user this is
very much like [`Clean] (data can be read but not modified again).
The argument engine terminates when the buffer changes state again.
- the [`Written] state is used after [`Writing] as long as it is
still unclear whether the write is successful or not. On success,
the buffer can be set to [`Clean] again. On error, it will go back
to [`Dirty].
- a [`Dropped] buffer is reused for a different purpose
*)
type buffer =
{ buf_inode : int64;
buf_index : int64;
buf : Netsys_mem.memory;
buf_ord : int; (* ordinal number of the buffer *)
mutable buf_seqno : int64; (* for cache validation only *)
mutable buf_state : buf_state;
mutable buf_dirty : bool;
mutable buf_delayed_drop : bool;
mutable buf_flushing : bool;
}
(** Buffer descriptor. The user of this API should never modify entries
of it.
A buffer descriptor is handed out for the lifetime of a buffer. The
states are:
- A freshly designated buffer is in [`Invalid] state
- While reading the block the buffer is in [`Reading] state
- If [`Reading] is successful and there is no pressure to reassign
the buffer immediately, it becomes [`Clean]. A [`Clean] buffer
can be dropped by the system at any time.
- When the contents are modified the buffer becomes [`Dirty]
- Dirty buffers can be written to disk. The buffer enters [`Writing]
state. Note that it is not allowed to modify the buffer while in
[`Writing] state - one must wait for the completion of the write
first
- A written buffer is set to [`Written]. This is a special state
meaning that the write is done but not yet committed.
- The contents of a written buffer can be modified. This is only
recorded by the [buf_dirty] flag.
- After the commit of the write, the [`Written] buffer becomes
[`Clean] or [`Dirty], depending on the argument flag. If the
commit is not successful the buffer
is set to [`Dirty].
When a clean buffer is dropped, the state in the descriptor is set
to [`Dropped]. At the same time, the buffer memory is reused for a
different descriptor.
The [`Invalid] state must not be kept for longer than a moment.
If there are several requests for the same block, and the buffer
is [`Invalid] the other requests can only use busy waiting to
handle this case.
The [buf_dirty] flag can be set in some contexts to indicate that
there was a data modification and [`Dirty] should be entered next.
*)
type flush_request =
{ flush_inode : int64;
mutable flush_min_eof : int64;
mutable flush_min_mtime : Plasma_rpcapi_aux.time;
mutable flush_index_list : Plasma_util.I64Set.t;
}
type buffer_system
val create_buffer_system : int -> int -> Unixqueue.event_system -> buffer_system
(** [create_buffer_system n_bufs blocksize esys] *)
val mem_size : buffer_system -> int
(** size of buffers in bytes *)
val blit_from_buffer :
buffer_system -> buffer -> int -> strmem -> int -> int -> unit
val blit_to_buffer :
buffer_system -> strmem -> int -> buffer -> int -> int -> unit
val clean_access : buffer_system -> buffer -> unit
(** a clean buffer is continued to be used as clean buffer
after a read access
*)
val dirty_access : buffer_system -> buffer -> int64 -> unit
(** A clean or invalid buffer is set to dirty. The int is the EOF position
that should now be ensured at least
*)
val switch_to_reading :
buffer_system -> buffer -> int64 option Uq_engines.engine ->
int64 option -> unit
(** [switch_to_reading sys b e eof_opt]:
The buffer is switched to [`Reading] state (from either [`Invalid],
[`Clean] or [`Dirty]). The engine [e] must be in a non-final
state. When the engine terminates, this is taken as indication that
the read is finished. The return value is [seqno_opt]. If [Some seqno]
the read has been
successful and the buffer is filled with data of this sequence number
of the file. The return value [None] means error. This function arranges
that the buffer is switched to a follow-up state when the engine
is finished:
- if the buffer is scheduled for being dropped, this is done now
- the buffer is also dropped when the read is non-successful
- if [eof_opt <> None] the buffer becomes [`Dirty], and the
EOF value is considered for the flush request
- if there is pressure for memory the buffer is reassigned
- otherwise the buffer becomes [`Clean]
*)
val switch_to_writing :
buffer_system -> buffer -> int64 option Uq_engines.engine -> unit
(** The buffer is switched to [`Writing] state (from [`Dirty]).
The argument engine must be in a non-final
state. When the engine terminates, this is taken as indication that
the buffer is written out. This function arranges that the
buffer state is switched again when this occurs. The follow-up
state is always [`Written].
The engine returns the new sequence number.
A [`Written] buffer should be committed, and then one of three
functions needs to be called:
- [write_committed]: if the commit has been successful
- [write_erroneous]: if an error occurred
- [write_cancelled]: if the block will no longer be written because
there was an error for another block of the same transaction
*)
val schedule_drop : buffer_system -> buffer -> unit
(** If the buffer can be immediately invalidated this is done. Otherwise
the [buf_delayed_drop] flag is set, and and the end of the ongoing
read/write the buffer will be set to [`Dropped].
*)
val schedule_drop_inode : buffer_system -> int64 -> unit
(** Same for a whole inode. It is no error if there is no buffer
for this inode
*)
val lookup_buffer : buffer_system -> int64 -> int64 -> buffer
(** [lookup_buffer sys inode index]: Looks the buffer up for [inode]
and [index], or raises [Not_found] if there is none yet, or
the existing descriptor is in [`Dropped] state.
*)
val request_buffer_e :
buffer_system -> int64 -> int64 -> buffer option Uq_engines.engine
(** [request_buffer_e sys inode index]: This function is to be used
when [lookup_buffer] raises [Not_found] to get a new buffer.
The new buffer is initially in [`Invalid] state. Note that
competing engine-driven threads can see this state in a certain
moment.
Note that one should immediately set the buffer to a different state
when the engine is done.
The function returns [None] via the engine when the buffer already
exists or when a concurrent request was faster (if there are
several calls of [request_buffer_e] for the same inode/index pair,
only one call gets the buffer, and the other calls see [None]).
The function also returns [None] when an error is recorded
for this inode (with [write_erroneous]). Because of the latter,
it is recommended to [reset_inode_error] just before requesting
a buffer.
*)
val select_for_flush : buffer_system -> int64 -> flush_request
(** [select_for_flush sys inode]: Returns a list of blocks that need
to be written. Once a block is returned by [select_for_flush]
it is not again returned in future calls of this function
unless it is set to [`Dirty] again after leaving the [`Written]
state.
The [buf_flushing] flag is used for managing this. If [buf_flushing]
it is prevented that a call of [dirty_access] records the block
again for flushing. This flag is cleared when entering [`Writing].
This function may raise [Not_found] if nothing appropriate is found.
*)
val select_inodes : buffer_system -> int64 list
(** Returns the inodes that can be flushed, in the order of precedence.
*)
val get_flush_min_eof : buffer_system -> int64 -> int64
val get_flush_min_mtime : buffer_system -> int64 -> Plasma_rpcapi_aux.time
(** Return the values from the flush record (or [Not_found]) *)
val write_committed : buffer_system -> buffer -> unit
(** Records that the write is committed: If the buffer is in
[`Written false] state it is reset to [`Clean] (or reassigned for
a different purpose). If it is in [`Written true] state it is set
to [`Dirty].
It is an error to call this function for a different state.
*)
val write_cancelled : buffer_system -> buffer -> unit
(** The write is cancelled. The buffer becomes dirty again *)
val write_erroneous : buffer_system -> int64 -> errno -> unit
(** Records a write error. This should be called after
[write_cancelled] with the error code.
Any pending [request_buffer_e] for this inode is interrupted,
so [None] is returned.
*)
val release_flush_request : buffer_system -> flush_request -> unit
(** Checks whether the buffers are in the right state after finishing
or aborting a flush request. In particular, all buffers are set
to [`Dirty] that are still in [`Written] state. Also, the
[buf_flushing] flag is cleared.
*)
val inode_error : buffer_system -> int64 -> errno option
(** Whether there is a write error for this inode *)
val reset_inode_error : buffer_system -> int64 -> unit
(** Resets the error *)
val n_waiting : buffer_system -> int
(** The number of buffer requests that can be currently not satisfied *)
val dump_buffers : buffer_system -> unit
(** Writes the buffer table to log (debug level) *)
val max_time :
Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time -> Plasma_rpcapi_aux.time
(** The max of two time structs *)