module Mapred_tasks: sig
.. end
Representation of tasks
type
file_fragment = string * int64 * int64
Filename, start block pos, length in blocks. If the length is 0,
this means "till end of file"
type
file = file_fragment list
type
map_task = {
|
map_input : file ; |
|
map_output_prefix : string ; |
|
map_id : int ; |
|
map_best_hosts : Unix.inet_addr list ; |
}
type
sort_task = {
|
sort_input : file ; |
|
sort_input_del : bool ; |
|
sort_output : string ; |
|
sort_id : int ; |
}
type
shuffle_task = {
|
shuffle_input : (file * int * int) list ; |
|
shuffle_input_del : bool ; |
|
shuffle_output : (string * int * int) list ; |
|
shuffle_partitions : int * int ; |
|
shuffle_coverage : int * int ; |
|
shuffle_reduce : bool ; |
}
Shuffling means to rearrange the output of the map/sort tasks
into a set of partitions. The map tasks are numbered 0 to n-1
(via
map_id
) and for each task there is a file that serves here as input.
The partitions are numbered 0 to m-1
, and finally there is
for each partition exactly one output file.
type
task = [ `Cleanup
| `Map of map_task
| `Shuffle of shuffle_task
| `Sort of sort_task ]
type
task_result = [ `Error of string | `Ok of file list ]
val encode_task : task -> string
val decode_task : string -> task
val encode_task_result : task_result -> string
val decode_task_result : string -> task_result
val string_of_task_id : task -> string
val print_task_id : task -> int -> unit
val print_task : task -> int -> unit
val print_file : ?tag:string -> file -> int -> unit