(* $Id: lexpp_file.ml 667 2004-06-02 15:21:19Z gerd $
* ----------------------------------------------------------------------
*
*)
open Printf;;
let section_re =
Netstring_str.regexp "^[(][*][ \t]*\\[\\([A-Za-z0-9_-]+\\)\\][ \t]*[*][)]";;
let read_sections filename =
let f = open_in filename in
printf "[reading %s]\n" filename; flush stdout;
let current_section = ref None in
let current_data = Buffer.create 1000 in
let sections = ref [] in
let save_section() =
match !current_section with
None -> ()
| Some s ->
sections := (s, Buffer.contents current_data) :: !sections;
current_section := None;
in
try
while true do
let line = input_line f in
match Netstring_str.string_match section_re line 0 with
Some mtch ->
let section_name = Netstring_str.matched_group mtch 1 line in
(* save old section: *)
save_section();
(* begin new section: *)
current_section := Some section_name;
Buffer.clear current_data;
| None ->
Buffer.add_string current_data line;
Buffer.add_char current_data '\n';
done;
assert false
with
End_of_file ->
close_in f;
save_section();
List.rev !sections
;;
let parse_char_classes s =
Uni_parser.main Uni_lexer.token (Lexing.from_string s)
;;
(* The following printing functions have originally been written by Claudio
* Sacerdoti Coen.
*)
(* padded_string_of_int i returns the string representing the *)
(* integer i (i < 256) using exactly 3 digits (example: 13 -> "013") *)
let padded_string_of_int i =
if i < 10 then
"00" ^ string_of_int i
else if i < 100 then
"0" ^ string_of_int i
else
string_of_int i
;;
(* Two functions useful to print a definition *)
let rec print_disjunction ?(first = true) out =
function
[] ->
if first then output_string out " ['b'-'a' (*empty*) ] "
| he::tl ->
if not first then output_string out " | " ;
print_re out he ;
print_disjunction ~first:false out tl
and print_re out =
function
Uni_types.Char i -> output_string out ("'\\" ^ padded_string_of_int i ^ "'")
| Uni_types.Interval (l,u) ->
output_string out ("['\\" ^ padded_string_of_int l ^ "'-'\\" ^
padded_string_of_int u ^ "']")
| Uni_types.Identifier i -> output_string out i
| Uni_types.Concat rell ->
let foo rel =
if List.length rel > 1 then
(output_string out "(" ; print_disjunction out rel ;
output_string out ")")
else
print_disjunction out rel
in
List.iter foo rell
;;
(* print_definition prints a definition in the format expected by ocamllex *)
let print_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
output_string out ("let " ^ id ^ " =\n ") ;
print_disjunction out rel ;
output_string out "\n\n"
;;
(**********************************************************************)
(* print a definition in the format expected by ulex: *)
(**********************************************************************)
let rec print_ulex_disjunction ?(first = true) out =
function
[] ->
if first then output_string out " ['b'-'a' (*empty*) ] "
| he::tl ->
if not first then output_string out " | " ;
print_ulex_re out he ;
print_ulex_disjunction ~first:false out tl
and print_ulex_re out =
function
Uni_types.Char i -> output_string out (string_of_int i)
| Uni_types.Interval (l,u) ->
output_string out ("[" ^ string_of_int l ^ "-" ^
string_of_int u ^ "]")
| Uni_types.Identifier i -> output_string out i
| Uni_types.Concat rell ->
let foo rel =
if List.length rel > 1 then
(output_string out "(" ; print_ulex_disjunction out rel ;
output_string out ")")
else
print_ulex_disjunction out rel
in
List.iter foo rell
;;
let print_ulex_definition out { Uni_types.id = id ; Uni_types.rel = rel } =
output_string out ("let regexp " ^ id ^ " =\n ") ;
print_ulex_disjunction out rel ;
output_string out "\n\n"
;;