module Mapred_tasks: sig
.. end
Representation of tasks
type
file_fragment = string * int64 * int64
Filename, start block pos, length in blocks. If the length is 0,
this means "till end of file"
type
file = file_fragment list
type
map_task = {
|
map_input : file ; |
|
map_output_prefix : string ; |
|
map_id : int ; |
|
map_best_hosts : Unix.inet_addr list ; |
}
type
sort_task = {
|
sort_input : file ; |
|
sort_input_del : bool ; |
|
sort_output : string ; |
|
sort_id : int ; |
}
type
shuffle_task = {
|
shuffle_input : (file * int * int) list ; |
|
shuffle_input_del : bool ; |
|
shuffle_output : (string * int * int) list ; |
|
shuffle_partitions : int * int ; |
|
shuffle_coverage : int * int ; |
|
shuffle_reduce : bool ; |
}
Shuffling means to rearrange the output of the map/sort tasks
into a set of partitions. The map tasks are numbered 0 to n-1
(via
map_id
) and for each task there is a file that serves here as input.
The partitions are numbered 0 to m-1
, and finally there is
for each partition exactly one output file.
type
task = [ `Cleanup
| `Map of map_task
| `Shuffle of shuffle_task
| `Sort of sort_task ]
type
task_result = [ `Corrupt_input of file list
| `Error of string
| `Ok of file list
| `Retry_later ]
-
`Ok files
: Task is successful.
files
is the list of created
files (or file fragments)
`Retry_later
: Task cannot be started because of lack of resources.
No files have been created (or these are already deleted). The task
should be again tried when it is suspected that resources have been
freed.
`Corrupt_input files
: Inputs files
of the task were not readable.
The files should be created again and the task retried. No files
have been created (or these are already deleted).
`Error msg
: Fatal error msg
. This is an internal error or
an error from user code
module Ord: sig
.. end
val encode_task : task -> string
val decode_task : string -> task
val encode_task_result : task_result -> string
val decode_task_result : string -> task_result
val string_of_task_id : task -> string
val print_task_id : task -> int -> unit
val print_task : task -> int -> unit
val print_file : ?tag:string -> file -> int -> unit