Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: validate.ml 712 2005-05-04 13:09:58Z gerd $
 * ----------------------------------------------------------------------
 *
 *)


open Pxp_document;;
open Pxp_yacc;;
open Pxp_types;;

let error_happened = ref false;;

let print_error e =
  print_endline (string_of_exn e)
;;

class warner =
  object 
    method warn w =
      print_endline ("WARNING: " ^ w)
  end
;;

let resolve_by_helper scheme program sends_mime_header =
  let url_syntax =
    { Neturl.ip_url_syntax with 
	Neturl.url_accepts_8bits = true
    }
  in
  let get_url id =
    (* Only accept SYSTEM Ids with the right scheme: *)
    match id with
	System sysid ->
	  ( try
	      let sysid_scheme = 
		try Neturl.extract_url_scheme sysid 
		with Neturl.Malformed_URL -> scheme
		     (* If no scheme found: assume our own scheme *)
	      in
	      if sysid_scheme = scheme then
		Neturl.url_of_string url_syntax sysid  (* or Malformed_URL *)
	      else
		raise Pxp_reader.Not_competent
	    with
		(* If the URL is syntactically wrong, do not accept it: *)
		Neturl.Malformed_URL -> raise Pxp_reader.Not_competent
	  )
      | _ ->
	  raise Pxp_reader.Not_competent
  in
  let read_mime_header ch =
    let empty_re = Str.regexp "^[ \t\r\n]*$" in
    let is_empty s = Str.string_match empty_re s 0 in
    let buffer = Buffer.create 1024 in
    let line = ref(input_line ch) in
    if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then
      line := input_line ch;
    while not (is_empty !line) do
      Buffer.add_string buffer !line;
      Buffer.add_string buffer "\n";
      line := input_line ch
    done;
    Buffer.add_string buffer "\n";
    Buffer.contents buffer
  in
  let open_channel id url =
    let url_string = Neturl.string_of_url url in
    let command = program ^ " " ^ Filename.quote url_string in
    let ch = Unix.open_process_in command in
    if sends_mime_header then
      let header_string = read_mime_header ch in
      let header_alist,_ = Mimestring.scan_header 
			     header_string 0 (String.length header_string) in
      let content_type = 
	try List.assoc "content-type" header_alist 
	with Not_found -> "application/octet-stream" in
      let mime_type, mime_type_params =
	Mimestring.scan_mime_type content_type [] in
      let encoding =
	try Some(Netconversion.encoding_of_string
		   (List.assoc "charset" mime_type_params))
	with Not_found -> None in
      ch, encoding
    else
      ch, None
  in
  let close_channel ch =
    match Unix.close_process_in ch with
	Unix.WEXITED 0 ->
	  ()
      | Unix.WEXITED n ->
	  failwith("Command terminated with exit code " ^ string_of_int n)
      | Unix.WSIGNALED n ->
	  failwith("Command terminated by signal " ^ string_of_int n)
      | _ -> assert false
  in
  new Pxp_reader.resolve_read_url_channel
    ~close:close_channel
    ~url_of_id: get_url
    ~channel_of_url: open_channel
    ()
;;


let parse debug wf namespaces iso88591 helpers filename =
  try 
    (* Parse the document: *)
    let parse_fn =
      if wf then parse_wfdocument_entity ?transform_dtd:None
      else 
	let index = new hash_index in
	parse_document_entity 
	  ?transform_dtd:None 
	  ~id_index:(index :> 'ext index)
    in
    let mng =
      if namespaces then
	Some (new Pxp_dtd.namespace_manager)
      else
	None
    in
    let resolver =
      let file_resolver =
	new Pxp_reader.resolve_as_file() in
      new Pxp_reader.combine (helpers @ [file_resolver])
    in
    let start_id =
      System filename in
    let spec =
      if namespaces then default_namespace_spec else default_spec in
    let doc =
      parse_fn
	  { default_config with 
	      debugging_mode = debug;
	      encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8;
	      idref_pass = true;
	      enable_namespace_processing = mng;
	      warner = new warner
          }
	  (ExtID(start_id, resolver))
	  spec 
    in
    ()
  with
      e ->
	(* Print error; remember that there was an error *)
	error_happened := true;
	print_error e;
	(* raise e *)
;;


let main() =
  let debug = ref false in
  let wf = ref false in
  let namespaces = ref false in
  let iso88591 = ref false in
  let helpers = ref [] in
  let files = ref [] in

  let eq_split s =
    let eq = 
      try String.index s '='
      with Not_found -> raise(Arg.Bad "Syntax error")
    in
    let before_eq = String.sub s 0 eq in
    let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in
    (before_eq, after_eq)
  in

  let add_helper sends_mime_header s =
    let scheme,cmd = eq_split s in
    let h = resolve_by_helper scheme cmd sends_mime_header in
    helpers := !helpers @ [h]
  in

  let add_pubid s =
    let pubid,filename = eq_split s in
    let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in
    helpers := !helpers @ [h]
  in

  let add_sysid s =
    let sysid,filename = eq_split s in
    let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in
    helpers := !helpers @ [h]
  in

  Arg.parse
      [ "-d",   Arg.Set debug, 
           "                     turn debugging mode on";
	"-wf",  Arg.Set wf,    
            "                    check only for well-formedness";
	"-namespaces", Arg.Set namespaces,
	            "            enable namespace support";
        "-iso-8859-1", Arg.Set iso88591, 
                    "            use ISO-8859-1 as internal encoding instead of UTF-8";
	"-helper", Arg.String (add_helper false),
	        "scheme=cmd      add this helper command";
	"-helper-mh", Arg.String (add_helper true),
	           "scheme=cmd   add this helper command (which sends mime headers)";
	"-pubid", Arg.String add_pubid,
	       "id=file          map this PUBLIC id to this file";
	"-sysid", Arg.String add_sysid,
	       "id=file          map this SYSTEM id to this file";
      ]
      (fun x -> files := x :: !files)
      "
usage: pxpvalidate [options] URL ...

- checks the validity of XML documents. See below for list of options.

<title>PXP - The XML parser for Objective Caml</title>

List of options:";
  files := List.rev !files;
  List.iter (parse !debug !wf !namespaces !iso88591 !helpers) !files;
;;


main();
if !error_happened then exit(1);;


This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml