module Mapred_sched:Schedulersig
..end
type
plan_config
val configure_plan : ?keep_temp_files:bool ->
Mapred_def.mapred_job_config ->
Mapred_config.mapred_config -> plan_config
configure_plan jc conf
Parameters:
keep_temp_files
: if true, temporary files created during the
map/reduce execution are not immediately deletedtype
plan
val create_plan : Plasma_client.plasma_cluster -> plan_config -> plan
val bigblock_size : plan -> int
configure_plan
(via jc) rounded up to the next multiple of blocks.val add_inputs : plan -> unit
val add_map_output : plan ->
int ->
(Mapred_tasks.file_tag * Mapred_tasks.file) list -> Unix.inet_addr -> unit
The IP addr points to the machine that executed the map or emap task
(which is also the likely storage for the files)
val plan_complete : plan -> bool
val complete_inputs : plan -> unit
val executable_tasks : plan -> Mapred_tasks.task list
val hosts : plan -> (string * Unix.inet_addr) list
val mark_as_finished : plan -> Mapred_tasks.task -> unit
val mark_as_started : plan ->
Mapred_tasks.task -> Unix.inet_addr -> int -> bool -> unit
val remove_marks : plan -> Mapred_tasks.task -> unit
mark_as_started
or mark_as_finished
val task_depends_on_list : plan -> Mapred_tasks.task -> Mapred_tasks.task list
val plan_finished : plan -> bool
val n_running : plan -> int
val n_finished : plan -> int
val n_total : plan -> int
val avg_running : plan -> float
val cluster : plan -> Plasma_client.plasma_cluster
val print_plan : plan -> unit
val generate_svg : plan -> string
val task_stats : plan -> Mapred_tasks.task -> int * int
Not_found
if the task has never been started