(* * \begin{doc} * \section{Higher-level IO functions} * * \subsection{Regular expressions} * \index{regular expressions} * * Many of the higher-level functions use regular expressions. * Regular expressions are defined by strings with syntax nearly identical * to \Cmd{awk}{1}. * * Strings may contain the following character constants. * * \begin{itemize} * \item \verb+\\+ : a literal backslash. * \item \verb+\a+ : the alert character \verb+^G+. * \item \verb+\b+ : the backspace character \verb+^H+. * \item \verb+\f+ : the formfeed character \verb+^L+. * \item \verb+\n+ : the newline character \verb+^J+. * \item \verb+\r+ : the carriage return character \verb+^M+. * \item \verb+\t+ : the tab character \verb+^I+. * \item \verb+\v+ : the vertical tab character. * \item \verb+\xhh...+ : the character represented by the string * of hexadecimal digits \verb+h+. All valid hexadecimal digits * following the sequence are considered to be part of the sequence. * \item \verb+\ddd+ : the character represented by 1, 2, or 3 octal * digits. * \end{itemize} * * Regular expressions are defined using the special characters \verb+.\^$[(){}*?++. * * \begin{itemize} * \item \verb+c+ : matches the literal character \verb+c+ if \verb+c+ is not * a special character. * \item \verb+\c+ : matches the literal character \verb+c+, even if \verb+c+ * is a special character. * \item \verb+.+ : matches any character, including newline. * \item \verb+^+ : matches the beginning of a line. * \item \verb+$+ : matches the end of line. * \item \verb+[abc...]+ : matches any of the characters \verb+abc...+ * \item \verb+[^abc...]+ : matches any character except \verb+abc...+ * \item \verb+r1|r2+ : matches either \verb+r1+ or \verb+r2+. * \item \verb+r1r2+ : matches \verb+r1+ and then \verb+r2+. * \item \verb+r++ : matches one or more occurrences of \verb+r+. * \item \verb+r*+ : matches zero or more occurrences of \verb+r+. * \item \verb+r?+ : matches zero or one occurrence of \verb+r+. * \item \verb+(r)+ : parentheses are used for grouping; matches \verb+r+. * \item \verb+\(r\)+ : also defines grouping, but the expression matched * within the parentheses is available to the output processor * through one of the variables \verb+$1+, \verb+$2+, ... * \item \verb+r{n}+ : matches exactly \verb+n+ occurrences of \verb+r+. * \item \verb+r{n,}+ : matches \verb+n+ or more occurrences of \verb+r+. * \item \verb+r{n,m}+ : matches at least \verb+n+ occurrences of \verb+r+, * and no more than \verb+m+ occurrences. * \item \verb+\y+: matches the empty string at either the beginning or * end of a word. * \item \verb+\B+: matches the empty string within a word. * \item \verb+\<+: matches the empty string at the beginning of a word. * \item \verb+\>+: matches the empty string at the end of a word. * \item \verb+\w+: matches any character in a word. * \item \verb+\W+: matches any character that does not occur within a word. * \item \verb+\`+: matches the empty string at the beginning of a file. * \item \verb+\'+: matches the empty string at the end of a file. * \end{itemize} * * Character classes can be used to specify character sequences * abstractly. Some of these sequences can change depending on your LOCALE. * * \begin{itemize} * \item \verb+[[:alnum:]]+ Alphanumeric characters. * \item \verb+[[:alpha:]]+ Alphabetic characters. * \item \verb+[[:lower:]]+ Lowercase alphabetic characters. * \item \verb+[[:upper:]]+ Uppercase alphabetic characters. * \item \verb+[[:cntrl:]]+ Control characters. * \item \verb+[[:digit:]]+ Numeric characters. * \item \verb+[[:xdigit:]]+ Numeric and hexadecimal characters. * \item \verb+[[:graph:]]+ Characters that are printable and visible. * \item \verb+[[:print:]]+ Characters that are printable, whether they are visible or not. * \item \verb+[[:punct:]]+ Punctuation characters. * \item \verb+[[:blank:]]+ Space or tab characters. * \item \verb+[[:space:]]+ Whitespace characters. * \end{itemize} * \end{doc} * *) include Omake_pos.Make (struct let name = "Omake_builtin_io_fun" end) let debug_parsing = Lm_debug.create_debug (**) { debug_name = "parsing"; debug_description = "Debug parsing operations"; debug_value = false } (* * Concatenate files into a string. * * \begin{doc} * \fun{cat} * * \begin{verbatim} * cat(files) : Sequence * files : File or InChannel Sequence * \end{verbatim} * * The \verb+cat+ function concatenates the output from multiple files * and returns it as a string. * \end{doc} *) let cat venv pos loc args = let pos = string_pos "cat" pos in match args with [arg] -> let names = Omake_value.values_of_value venv pos arg in let buf = Buffer.create 1024 in List.iter (fun name -> try let inp, close_flag = Omake_value.in_channel_of_any_value venv pos name in let inx = Omake_env.venv_find_channel venv pos inp in let rec copy () = let c = Lm_channel.input_char inx in Buffer.add_char buf c; copy () in let () = try copy () with End_of_file -> () in if close_flag then Omake_env.venv_close_channel venv pos inp with Sys_error _ -> let print_error buf = Format.fprintf buf "unable to open file: %a" Omake_value_print.pp_print_value name in raise (Omake_value_type.OmakeException (loc_pos loc pos, LazyError print_error))) names; Omake_value_type.ValString (Buffer.contents buf) | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) (* * Grep takes some flags. * * \begin{doc} * \fun{grep} * * \begin{verbatim} * grep(pattern) : String # input from stdin, default options * pattern : String * grep(pattern, files) : String # default options * pattern : String * files : File Sequence * grep(options, pattern, files) : String * options : String * pattern : String * files : File Sequence * \end{verbatim} * * The \verb+grep+ function searches for occurrences of a regular * expression \verb+pattern+ in a set of files, and prints lines that match. * This is like a highly-simplified version of \Cmd{grep}{1}. * * The options are: * \begin{description} * \item[q] If specified, the output from \verb+grep+ is not displayed. * \item[h] If specified, output lines will not include the filename (default, when only one input * file is given). * \item[n] If specified, output lines include the filename (default, when more than one input file * is given). * \item[v] If specified, search for lines without a match instead of lines with a match, * \end{description} * * The \verb+pattern+ is a regular expression. * * If successful (\verb+grep+ found a match), the function returns \verb+true+. * Otherwise, it returns \verb+false+. * \end{doc} *) type grep_flag = GrepQuiet | GrepPrint | GrepNoPrint | GrepNoMatch let grep_flags pos loc s = let len = String.length s in let rec collect flags i = if i = len then flags else let flag = match s.[i] with 'q' -> GrepQuiet | 'n' -> GrepPrint | 'v' -> GrepNoMatch | 'h' -> GrepNoPrint | c -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError ("illegal grep option", String.make 1 c))) in collect (flag::flags) (succ i) in collect [] 0 let grep venv pos loc args = let pos = string_pos "grep" pos in let outx = Omake_value.channel_of_var venv pos loc Omake_var.stdout_var in let flags, pattern, files = match args with [pattern] -> Omake_value_type.ValNone, pattern, Omake_value_type.ValNone | [pattern; files] -> ValNone, pattern, files | [flags; pattern; files] -> flags, pattern, files | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityRange (1, 3), List.length args))) in let flags = grep_flags pos loc (Omake_value.string_of_value venv pos flags) in let pattern = Omake_value.string_of_value venv pos pattern in let pattern = try Omake_lexer.lexer_of_string pattern with Failure err -> let msg = Lm_printf.sprintf "Mailformed regular expression '%s'" pattern in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in let files = Omake_value.values_of_value venv pos files in let flags, files = match files with [] -> flags, [Omake_env.venv_find_var venv pos loc Omake_var.stdin_var] | [_] -> flags, files | _::_::_ -> (if List.mem GrepNoPrint flags then flags else GrepPrint :: flags), files in let verbose = not (List.mem GrepQuiet flags) in let print = List.mem GrepPrint flags in let matches = not (List.mem GrepNoMatch flags) in (* Grep against a single line *) let grep_line file found line = let b = (Omake_lexer.lexer_matches pattern line == matches) in if b && verbose then begin if print then begin Lm_channel.output_string outx file; Lm_channel.output_char outx ':' end; Lm_channel.output_string outx line; Lm_channel.output_char outx '\n' end; found || b in (* Open the file *) let grep_file found s = let filename = Omake_value.string_of_value venv pos s in let inp, close_flag = Omake_value.in_channel_of_any_value venv pos s in let inx = Omake_env.venv_find_channel venv pos inp in let rec search found = let text = try Some (Lm_channel.input_line inx) with End_of_file -> None in match text with Some line' -> search (grep_line filename found line') | None -> found in let found = search found in if close_flag then Omake_env.venv_close_channel venv pos inp; found in let b = List.fold_left grep_file false files in Lm_channel.flush outx; Omake_builtin_util.val_of_bool b let builtin_grep venv pos loc args = let pos = string_pos "builtin-grep" pos in let args = match args with [arg] -> Omake_value.values_of_value venv pos arg | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) in (* Eat options *) let flags, pattern, files = let rec collect flags args = match args with arg :: args -> (match Omake_value.string_of_value venv pos arg with "-q" -> collect ("q" ^ flags) args | "-n" -> collect ("n" ^ flags) args | "-v" -> collect ("v" ^ flags) args | "-h" -> collect ("h" ^ flags) args | pattern -> flags, pattern, args) | [] -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringError "no pattern specified")) in collect "" args in grep venv pos loc [ValData flags; ValData pattern; ValArray files] (* * \begin{doc} * \fun{scan} * * \begin{verbatim} * scan(input-files) * case string1 * body1 * case string2 * body2 * ... * default * bodyd * \end{verbatim} * * The \verb+scan+ function provides input processing in command-line form. * The function takes file/filename arguments. If called with no * arguments, the input is taken from \verb+stdin+. If arguments are provided, * each specifies an \verb+InChannel+, or the name of a file for input. * Output is always to \verb+stdout+. * * The \verb+scan+ function operates by reading the input one line at a time, * and processing it according to the following algorithm. * * For each line, * the record is first split into fields, and * the fields are bound to the variables \verb+$1, $2, ...+. The variable * \verb+$0+ is defined to be the entire line, and \verb+$*+ is an array * of all the field values. The \verb+$(NF)+ variable is defined to be the number * of fields. * * Next, a case expression is selected. If \verb+string_i+ matches the token \verb+$1+, * then \verb+body_i+ is evaluated. If the body ends in an \verb+export+, the state * is passed to the next clause. Otherwise the value is discarded. * * For example, here is an \verb+scan+ function that acts as a simple command processor. * * \begin{verbatim} * calc() = * i = 0 * scan(script.in) * case print * println($i) * case inc * i = $(add $i, 1) * export * case dec * i = $(sub $i, 1) * export * case addconst * i = $(add $i, $2) * export * default * eprintln($"Unknown command: $1") * \end{verbatim} * * The \verb+scan+ function also supports several options. * * \begin{verbatim} * scan(options, files) * ... * \end{verbatim} * * \begin{description} * \item[A] Parse each line as an argument list, where arguments * may be quoted. For example, the following line has three words, * ``\verb+ls+'', ``\verb+-l+'', ``\verb+Program Files+''. * * \begin{verbatim} * ls -l "Program Files" * \end{verbatim} * \item[O] Parse each line using white space as the separator, using the * usual \OMake{} algorithm for string parsing. This is the default. * \item[x] Once each line is split, reduce each word using the * hex representation. This is the usual hex representation used * in URL specifiers, so the string ``Program Files'' may be * alternately represented in the form Program%20Files or * Program+Files. * \end{description} * * Note, if you want to redirect the output to a file, the easiest way is to * redefine the \verb+stdout+ variable. The \verb+stdout+ variable is scoped the * same way as other variables, so this definition does not affect the meaning of * \verb+stdout+ outside the \verb+calc+ function. * * \begin{verbatim} * calc() = * stdout = $(fopen script.out, w) * scan(script.in) * ... * close($(stdout)) * \end{verbatim} * \end{doc} *) (* * Scanner options. *) type parse_option = ParseArgs | ParseWords type rewrite_option = RewriteHex | RewriteNone let scan_options _ pos loc options s = let len = String.length s in let rec collect ((poption, roption) as options) i = if i = len then options else let options = match s.[i] with 'A' -> ParseArgs, roption | 'O' -> ParseWords, roption | 'x' -> poption, RewriteHex | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError ("illegal option", s))) in collect options (succ i) in collect options 0 let scan_options venv pos loc options = List.fold_left (scan_options venv pos loc) (ParseWords, RewriteNone) (Omake_value.strings_of_value venv pos options) (* * The arguments. *) let scan_args venv pos loc (args : Omake_value_type.t list) = let pos = string_pos "scan_args" pos in let cases, options, files = match args with | [ValCases cases] -> cases, Omake_value_type.ValNone, Omake_env.venv_find_var venv pos loc Omake_var.stdin_var | [ValCases cases; files] -> cases, ValNone, files | [ValCases cases; options; files] -> cases, options, files | (ValBody _) :: _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("Unexpected body (bad indentation?)"))) | [_] | [_; _] | [_; _; _] -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("No cases"))) | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) in let poptions, roptions = scan_options venv pos loc options in cases, poptions, roptions, Omake_value.values_of_value venv pos files (* * Awk the value. *) let scan venv pos loc args _ = let pos = string_pos "scan" pos in let cases, token_mode, rewrite_mode, files = scan_args venv pos loc args in (* Get lexers for all the cases *) let cases, def = List.fold_left (fun (cases, def) (v, test, body, export) -> if Lm_symbol.eq v Omake_symbol.case_sym then let s = Omake_value.string_of_value venv pos test in let cases = Lm_symbol.SymbolTable.filter_add cases (Lm_symbol.add s) (fun b -> match b with Some _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("duplicate case", v))) | None -> body, export) in cases, def else if Lm_symbol.eq v Omake_symbol.default_sym then match def with Some _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringError "duplicate default case")) | None -> cases, Some (body, export) else raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("unknown case", v)))) (Lm_symbol.SymbolTable.empty, None) cases in (* Split a line into words *) let collect_words_argv line = let words = match token_mode with ParseArgs -> Lm_string_util.parse_args line | ParseWords -> Omake_value.strings_of_value venv pos (ValString line) in match rewrite_mode with RewriteHex -> List.map Lm_string_util.decode_hex_name words | RewriteNone -> words in (* Select a case and run it *) let eval_case venv words = let body = match words with command :: _ -> (try Some (Lm_symbol.SymbolTable.find cases (Lm_symbol.add command)) with Not_found -> def) | [] -> def in match body with Some (body, export) -> let venv_new, _ = Omake_eval.eval_sequence_exp venv pos body in Omake_env.add_exports venv venv_new pos export | None -> venv in (* Read the file a line at a time *) let rec line_loop venv inx = let text = try Some (Lm_channel.input_line inx) with End_of_file -> None in match text with Some line -> let words = collect_words_argv line in let venv = Omake_env.venv_add_match venv line words in let venv = eval_case venv words in line_loop venv inx | None -> venv in let rec file_loop venv args = match args with arg :: args -> let inp, close_in = Omake_value.in_channel_of_any_value venv pos arg in let inx = Omake_env.venv_find_channel venv pos inp in let venv = try line_loop venv inx with exn when close_in -> Omake_env.venv_close_channel venv pos inp; raise exn in if close_in then Omake_env.venv_close_channel venv pos inp; file_loop venv args | [] -> venv in file_loop venv files, Omake_value_type.ValNone (* * \begin{doc} * \fun{awk} * * \begin{verbatim} * awk(input-files) * case pattern1: * body1 * case pattern2: * body2 * ... * default: * bodyd *\end{verbatim} * * or * * \begin{verbatim} * awk(options, input-files) * case pattern1: * body1 * case pattern2: * body2 * ... * default: * bodyd * \end{verbatim} * * The \verb+awk+ function provides input processing similar to \Cmd{awk}{1}, * but more limited. The \verb+input-files+ argument is a sequence of values, * each specifies an \verb+InChannel+, or the name of a file for input. * If called with no options and no file arguments, the input is taken from \verb+stdin+. * Output is always to \verb+stdout+. * * The variables \verb+RS+ and \verb+FS+ define record and field separators * as regular expressions. * The default value of \verb+RS+ is the regular expression \verb+\r|\n|\r\n+. * The default value of \verb+FS+ is the regular expression \verb+[ \t]++. * * The \verb+awk+ function operates by reading the input one record at a time, * and processing it according to the following algorithm. * * For each line, * the record is first split into fields using the field separator \verb+FS+, and * the fields are bound to the variables \verb+$1, $2, ...+. The variable * \verb+$0+ is defined to be the entire line, and \verb+$*+ is an array * of all the field values. The \verb+$(NF)+ variable is defined to be the number * of fields. * * Next, the cases are evaluated in order. * For each case, if the regular expression \verb+pattern_i+ matches the record \verb+$0+, * then \verb+body_i+ is evaluated. If the body ends in an \verb+export+, the state * is passed to the next clause. Otherwise the value is discarded. If the regular * expression contains \verb+\(r\)+ expression, those expression override the * fields \verb+$1, $2, ...+. * * For example, here is an \verb+awk+ function to print the text between two * delimiters \verb+\begin{<name>}+ and \verb+\end{<name>}+, where the \verb+<name>+ * must belong to a set passed as an argument to the \verb+filter+ function. * * \begin{verbatim} * filter(names) = * print = false * * awk(Awk.in) * case $"^\\end\{\([[:alpha:]]+\)\}" * if $(mem $1, $(names)) * print = false * export * export * default * if $(print) * println($0) * case $"^\\begin\{\([[:alpha:]]+\)\}" * print = $(mem $1, $(names)) * export * \end{verbatim} * * Note, if you want to redirect the output to a file, the easiest way is to * redefine the \verb+stdout+ variable. The \verb+stdout+ variable is scoped the * same way as other variables, so this definition does not affect the meaning of * \verb+stdout+ outside the \verb+filter+ function. * * \begin{verbatim} * filter(names) = * stdout = $(fopen file.out, w) * awk(Awk.in) * ... * close($(stdout)) * \end{verbatim} * * Options. * \begin{description} * \item[b] ``Break'' when evaluating cases. Only the first case that matches will be selected. * \end{description} * * The \hyperfun{break} can be used to abort the loop, * exiting the \verb+awk+ function immediately. * \end{doc} *) (* * Evaluate all the cases that match. *) let rec awk_eval_cases venv pos loc break line cases = match cases with (None, body, export) :: cases -> let venv_new, _ = Omake_eval.eval_sequence_exp venv pos body in let venv = Omake_env.add_exports venv venv_new pos export in if break then venv else awk_eval_cases venv pos loc break line cases | (Some lex, body, export) :: cases -> let channel = Lm_channel.of_string line in let venv, stop = match Omake_lexer.Lexer.search lex channel with Some (_, _, _, _, args) -> let venv_new = Omake_env.venv_add_match_args venv args in let venv_new, _ = Omake_eval.eval_sequence_exp venv_new pos body in let venv = Omake_env.add_exports venv venv_new pos export in venv, break | None -> venv, false in if stop then venv else awk_eval_cases venv pos loc break line cases | [] -> venv (* * The arguments. *) let awk_args venv pos loc (args : Omake_value_type.t list) = let pos = string_pos "awk_args" pos in match args with | [ValCases cases] -> cases, [Omake_env.venv_find_var venv pos loc Omake_var.stdin_var] | [ValCases cases; files] -> cases, Omake_value.values_of_value venv pos files | (ValBody _) :: _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("Unexpected body (bad indentation?)"))) | [_] | [_; _] -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("No cases"))) | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityRange (1, 2), List.length args))) let awk_option_args venv pos loc (args : Omake_value_type.t list) = let pos = string_pos "awk_args" pos in match args with | [ValCases cases] -> cases, "", [Omake_env.venv_find_var venv pos loc Omake_var.stdin_var] | [ValCases cases; files] -> cases, "", Omake_value.values_of_value venv pos files | [ValCases cases; options; files] -> cases, Omake_value.string_of_value venv pos options, Omake_value.values_of_value venv pos files | (ValBody _) :: _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("Unexpected body (bad indentation?)"))) | [_] | [_; _] | [_; _; _] -> raise (Omake_value_type.OmakeException (loc_pos loc pos, SyntaxError("No cases"))) | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityRange (1, 3), List.length args))) type awk_flag = AwkBreak let awk_flags pos loc s = let len = String.length s in let rec collect flags i = if i = len then flags else let flag = match s.[i] with 'b' -> AwkBreak | c -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError ("illegal awk option", String.make 1 c))) in collect (flag :: flags) (succ i) in collect [] 0 (* * Awk the value. *) let awk venv pos loc args _ = let pos = string_pos "awk" pos in let cases, flags, files = awk_option_args venv pos loc args in let flags = awk_flags pos loc flags in let break = List.mem AwkBreak flags in (* Separator expressions *) let rs = try Omake_value.string_of_value venv pos (Omake_env.venv_find_var_exn venv Omake_var.rs_var) with Not_found -> "\r|\n|\r\n" in let fs = try Omake_value.string_of_value venv pos (Omake_env.venv_find_var_exn venv Omake_var.fs_var) with Not_found -> "[ \t]+" in let rs_lex = try Omake_lexer.lexer_of_string rs with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" rs in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in let fs_lex = try Omake_lexer.lexer_of_string fs with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" fs in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in (* Get lexers for all the cases *) let cases = List.map (fun (v, test, body, export) -> if Lm_symbol.eq v Omake_symbol.case_sym then let s = Omake_value.string_of_value venv pos test in let _, lex = try Omake_lexer.Lexer.add_clause Omake_lexer.Lexer.empty v s with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" s in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in Some lex, body, export else if Lm_symbol.eq v Omake_symbol.default_sym then None, body, export else raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("unknown case", v)))) cases in (* Split a line into words *) let collect_words line = let channel = Lm_channel.of_string line in let rec collect words = match Omake_lexer.Lexer.searchto fs_lex channel with Omake_lexer.Lexer.LexEOF -> List.rev words | Omake_lexer.Lexer.LexSkipped (_, skipped) | Omake_lexer.Lexer.LexMatched (_, _, skipped, _, _) -> collect (skipped :: words) in collect [] in (* Read the file a line at a time *) let rec line_loop venv inx lineno = match Omake_lexer.Lexer.searchto rs_lex inx with Omake_lexer.Lexer.LexEOF -> venv | Omake_lexer.Lexer.LexSkipped (_, line) | Omake_lexer.Lexer.LexMatched (_, _, line, _, _) -> (* Split into words *) let words = collect_words line in let venv = Omake_env.venv_add_match venv line words in let venv = Omake_env.venv_add_var venv Omake_var.fnr_var (ValInt lineno) in let venv = awk_eval_cases venv pos loc break line cases in line_loop venv inx (lineno + 1) in let rec file_loop venv args = match args with arg :: args -> let inp, close_in = Omake_value.in_channel_of_any_value venv pos arg in let inx = Omake_env.venv_find_channel venv pos inp in let venv = Omake_env.venv_add_var venv Omake_var.filename_var (ValData (Lm_channel.name inx)) in let venv = try line_loop venv inx 1 with exn when close_in -> Omake_env.venv_close_channel venv pos inp; raise exn in if close_in then Omake_env.venv_close_channel venv pos inp; file_loop venv args | [] -> venv in let venv = try file_loop venv files with Omake_env.Break (_, venv) -> venv in venv, Omake_value_type.ValNone (* * \begin{doc} * \fun{fsubst} * * \begin{verbatim} * fsubst(files) * case pattern1 [options] * body1 * case pattern2 [options] * body2 * ... * default * bodyd * \end{verbatim} * * The \verb+fsubst+ function provides a \Cmd{sed}{1}-like substitution * function. Similar to \verb+awk+, if \verb+fsubst+ is called with no * arguments, the input is taken from \verb+stdin+. If arguments are provided, * each specifies an \verb+InChannel+, or the name of a file for input. * * The \verb+RS+ variable defines a regular expression that determines a record separator, * The default value of \verb+RS+ is the regular expression \verb+\r|\n|\r\n+. * * The \verb+fsubst+ function reads the file one record at a time. * * For each record, the cases are evaluated in order. Each case defines * a substitution from a substring matching the \verb+pattern+ to * replacement text defined by the body. * * Currently, there is only one option: \verb+g+. * If specified, each clause specifies a global replacement, * and all instances of the pattern define a substitution. * Otherwise, the substitution is applied only once. * * Output can be redirected by redefining the \verb+stdout+ variable. * * For example, the following program replaces all occurrences of * an expression \verb+word.+ with its capitalized form. * * \begin{verbatim} * section * stdout = $(fopen Subst.out, w) * fsubst(Subst.in) * case $"\<\([[:alnum:]]+\)\." g * value $(capitalize $1). * close($(stdout)) * \end{verbatim} * \end{doc} *) (* * Substitution options. *) let subst_global_opt = 1 let subst_options _ pos loc options s = let len = String.length s in let rec collect options i = if i = len then options else let flag = match s.[i] with 'g' -> subst_global_opt | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError ("illegal option", s))) in collect (options lor flag) (succ i) in collect options 0 (* * Sed function performs a substitution line-by-line. *) let rec subst_eval_case venv pos loc buf channel lex options body = match Omake_lexer.Lexer.searchto lex channel with Omake_lexer.Lexer.LexEOF -> () | Omake_lexer.Lexer.LexSkipped (_, skipped) -> Buffer.add_string buf skipped | Omake_lexer.Lexer.LexMatched (_, _, skipped, matched, args) -> let venv' = Omake_env.venv_add_match venv matched args in let _, result = Omake_eval.eval_sequence_exp venv' pos body in Buffer.add_string buf skipped; Buffer.add_string buf (Omake_value.string_of_value venv pos result); if (options land subst_global_opt) <> 0 then subst_eval_case venv pos loc buf channel lex options body else Lm_channel.LexerInput.lex_buffer channel buf let subst_eval_line venv pos loc line cases = let buffer = Buffer.create (String.length line) in List.fold_left (fun line (lex, options, body) -> let channel = Lm_channel.of_string line in Buffer.clear buffer; subst_eval_case venv pos loc buffer channel lex options body; Buffer.contents buffer) line cases let fsubst venv pos loc args _ = let pos = string_pos "fsubst" pos in let cases, files = awk_args venv pos loc args in let outp = Omake_value.prim_channel_of_var venv pos loc Omake_var.stdout_var in let outx = Omake_env.venv_find_channel venv pos outp in (* Record separator *) let rs = try Omake_value.string_of_value venv pos (Omake_env.venv_find_var_exn venv Omake_var.rs_var) with Not_found -> "\r|\n|\r\n" in let rs_lex = try Omake_lexer.lexer_of_string rs with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" rs in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in (* Get lexers for all the cases *) let cases = List.map (fun (v, test, body, _) -> let args = Omake_value.values_of_value venv pos test in let pattern, options = match args with pattern :: options -> Omake_value.string_of_value venv pos pattern, options | [] -> "", [] in let pattern, options = if Lm_symbol.eq v Omake_symbol.case_sym then pattern, options else if Lm_symbol.eq v Omake_symbol.default_sym then ".*", [] else raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("unknown case", v))) in let options = List.fold_left (fun options arg -> subst_options venv pos loc options (Omake_value.string_of_value venv pos arg)) 0 options in let _, lex = try Omake_lexer.Lexer.add_clause Omake_lexer.Lexer.empty v pattern with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" pattern in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in lex, options, body) cases in (* Read the file a line at a time *) let rec line_loop inx = match Omake_lexer.Lexer.searchto rs_lex inx with Omake_lexer.Lexer.LexEOF -> () | Omake_lexer.Lexer.LexSkipped (_, line) -> let line = subst_eval_line venv pos loc line cases in Lm_channel.output_string outx line | Omake_lexer.Lexer.LexMatched (_, _, line, term, _) -> let line = subst_eval_line venv pos loc line cases in Lm_channel.output_string outx line; Lm_channel.output_string outx term; line_loop inx in let rec file_loop files = match files with file :: files -> let inp, close_in = Omake_value.in_channel_of_any_value venv pos file in let inx = Omake_env.venv_find_channel venv pos inp in let () = try line_loop inx with exn when close_in -> Omake_env.venv_close_channel venv pos inp; raise exn in if close_in then Omake_env.venv_close_channel venv pos inp; file_loop files | [] -> () in let venv = try file_loop files; venv with Omake_env.Break (_, venv) -> venv in Lm_channel.flush outx; venv, Omake_value_type.ValNone (* * \begin{doc} * \fun{lex} * * \begin{verbatim} * lex(files) * case pattern1 * body1 * case pattern2 * body2 * ... * default * bodyd * \end{verbatim} * * The \verb+lex+ function provides a simple lexical-style scanner * function. The input is a sequence of files or channels. The cases * specify regular expressions. Each time the input is read, the regular * expression that matches the \emph{longest prefix} of the input is selected, * and the body is evaluated. * * If two clauses both match the same input, the \emph{last} one is selected * for execution. The \verb+default+ case matches the regular expression \verb+.+; * you probably want to place it first in the pattern list. * * If the body end with an \verb+export+ directive, * the state is passed to the next clause. * * For example, the following program collects all occurrences of alphanumeric * words in an input file. * * \begin{verbatim} * collect-words(files) = * words[] = * lex($(files)) * default * # empty * case $"[[:alnum:]]+" g * words[] += $0 * export * value $(words) * \end{verbatim} * * The \verb+default+ case, if one exists, matches single characters. Since * * It is an error if the input does not match any of the regular expressions. * * The \hyperfun{break} can be used to abort the loop. * \end{doc} *) let eof_sym = Lm_symbol.add "eof" let lex venv pos loc args _ = let pos = string_pos "lex" pos in let cases, files = awk_args venv pos loc args in (* Add a clause for EOF *) let _, lex = Omake_lexer.Lexer.add_clause Omake_lexer.Lexer.empty eof_sym "\\'" in (* Get lexers for all the cases *) let lex, cases, _ = List.fold_left (fun (lex, cases, index) (v, test, body, export) -> let args = Omake_value.values_of_value venv pos test in let pattern = match args with pattern :: _ -> Omake_value.string_of_value venv pos pattern | [] -> "" in let pattern = if Lm_symbol.eq v Omake_symbol.case_sym then pattern else if Lm_symbol.eq v Omake_symbol.default_sym then "." else raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("unknown case", v))) in let action_sym = Lm_symbol.make "action" index in let _, lex = try Omake_lexer.Lexer.add_clause lex action_sym pattern with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" pattern in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in let cases = Lm_symbol.SymbolTable.add cases action_sym (body, export) in lex, cases, succ index) (lex, Lm_symbol.SymbolTable.empty, 0) cases in (* Process the files *) let rec input_loop venv inx = let action_sym, lexeme_loc, lexeme, args = Omake_lexer.Lexer.lex lex inx in if Lm_symbol.eq action_sym eof_sym then venv else let venv_new = Omake_env.venv_add_match venv lexeme args in let venv_new = Omake_env.venv_add_var venv_new Omake_var.parse_loc_var (ValOther (ValLocation lexeme_loc)) in let body, export = try Lm_symbol.SymbolTable.find cases action_sym with Not_found -> raise (Invalid_argument "lex") in let venv_new, _ = Omake_eval.eval_sequence_exp venv_new pos body in let venv = Omake_env.add_exports venv venv_new pos export in input_loop venv inx in let rec file_loop venv files = match files with file :: files -> let inp, close_in = Omake_value.in_channel_of_any_value venv pos file in let inx = Omake_env.venv_find_channel venv pos inp in let venv = try input_loop venv inx with (Omake_env.Break _ | Omake_value_type.Return _ ) as exn -> if close_in then Omake_env.venv_close_channel venv pos inp; raise exn | exn -> if close_in then Omake_env.venv_close_channel venv pos inp; Omake_eval.raise_uncaught_exception pos exn in if close_in then Omake_env.venv_close_channel venv pos inp; file_loop venv files | [] -> venv in let venv = try file_loop venv files with Omake_env.Break (_, venv) -> venv in venv, Omake_value_type.ValNone (* * \begin{doc} * \fun{lex-search} * * \begin{verbatim} * lex-search(files) * case pattern1 * body1 * case pattern2 * body2 * ... * default * bodyd * \end{verbatim} * * The \verb+lex-search+ function is like the \verb+lex+ function, but input that * does not match any of the regular expressions is skipped. If the clauses include * a \verb+default+ case, then the \verb+default+ matches any skipped text. * * For example, the following program collects all occurrences of alphanumeric * words in an input file, skipping any other text. * * \begin{verbatim} * collect-words($(files)) = * words[] = * lex-search($(files)) * default * eprintln(Skipped $0) * case $"[[:alnum:]]+" g * words[] += $0 * export * \end{verbatim} * * The \verb+default+ case, if one exists, matches single characters. Since * * It is an error if the input does not match any of the regular expressions. * * The \hyperfun{break} can be used to abort the loop. * \end{doc} *) let lex_search venv pos loc args _ = let pos = string_pos "lex-search" pos in let cases, files = awk_args venv pos loc args in (* Get lexers for all the cases *) let lex, cases, default, _ = List.fold_left (fun (lex, cases, default, index) (v, test, body, export) -> let args = Omake_value.values_of_value venv pos test in let pattern = match args with pattern :: _ -> Omake_value.string_of_value venv pos pattern | [] -> "" in if Lm_symbol.eq v Omake_symbol.case_sym then let action_sym = Lm_symbol.make "action" index in let _, lex = try Omake_lexer.Lexer.add_clause lex action_sym pattern with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" pattern in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in let cases = Lm_symbol.SymbolTable.add cases action_sym (body, export) in lex, cases, default, succ index else if Lm_symbol.eq v Omake_symbol.default_sym then lex, cases, Some (body, export), index else raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("unknown case", v)))) (**) (Omake_lexer.Lexer.empty, Lm_symbol.SymbolTable.empty, None, 0) cases in (* What to do for skipped text *) let skip venv lexeme_loc lexeme = match lexeme, default with "", _ | _, None -> venv | _, Some (body, export) -> let venv_new = Omake_env.venv_add_match venv lexeme [] in let venv_new = Omake_env.venv_add_var venv_new Omake_var.parse_loc_var (ValOther (ValLocation lexeme_loc)) in let venv_new, _ = Omake_eval.eval_sequence_exp venv_new pos body in Omake_env.add_exports venv venv_new pos export in (* Process the files *) let rec input_loop venv inx = match Omake_lexer.Lexer.searchto lex inx with Omake_lexer.Lexer.LexEOF -> venv | Omake_lexer.Lexer.LexSkipped (_, lexeme) -> skip venv loc lexeme | Omake_lexer.Lexer.LexMatched (action_sym, lexeme_loc, skipped, lexeme, args) -> (* Process skipped text *) let venv = skip venv lexeme_loc skipped in (* Process the matched text *) let venv_new = Omake_env.venv_add_match venv lexeme args in let venv_new = Omake_env.venv_add_var venv_new Omake_var.parse_loc_var (ValOther (ValLocation lexeme_loc)) in let body, export = try Lm_symbol.SymbolTable.find cases action_sym with Not_found -> raise (Invalid_argument "lex") in let venv_new, _ = Omake_eval.eval_sequence_exp venv_new pos body in let venv = Omake_env.add_exports venv venv_new pos export in input_loop venv inx in (* Process each file *) let rec file_loop venv files = match files with file :: files -> let inp, close_in = Omake_value.in_channel_of_any_value venv pos file in let inx = Omake_env.venv_find_channel venv pos inp in let venv = try input_loop venv inx with (Omake_env.Break _ | Omake_value_type.Return _) as exn -> if close_in then Omake_env.venv_close_channel venv pos inp; raise exn | exn -> if close_in then Omake_env.venv_close_channel venv pos inp; Omake_eval.raise_uncaught_exception pos exn in if close_in then Omake_env.venv_close_channel venv pos inp; file_loop venv files | [] -> venv in let venv = try file_loop venv files with Omake_env.Break (_, venv) -> venv in venv, Omake_value_type.ValNone (* * \begin{doc} * \obj{Lexer} * * The \verb+Omake_lexer.Lexer+ object defines a facility for lexical analysis, similar to the * \Cmd{lex}{1} and \Cmd{flex}{1} programs. * * In \Prog{omake}, lexical analyzers can be constructed dynamically by extending * the \verb+Omake_lexer.Lexer+ class. A lexer definition consists of a set of directives specified * with method calls, and set of clauses specified as rules. * * For example, consider the following lexer definition, which is intended * for lexical analysis of simple arithmetic expressions for a desktop * calculator. * * \begin{verbatim} * lexer1. = * extends $(Omake_lexer.Lexer) * * other: . * eprintln(Illegal character: $* ) * lex() * * white: $"[[:space:]]+" * lex() * * op: $"[-+*/()]" * switch $* * case + * Token.unit($(loc), plus) * case - * Token.unit($(loc), minus) * case * * Token.unit($(loc), mul) * case / * Token.unit($(loc), div) * case $"(" * Token.unit($(loc), lparen) * case $")" * Token.unit($(loc), rparen) * * number: $"[[:digit:]]+" * Token.pair($(loc), exp, $(int $* )) * * eof: $"\'" * Token.unit($(loc), eof) * \end{verbatim} * * This program defines an object \verb+lexer1+ the extends the \verb+Omake_lexer.Lexer+ * object, which defines lexing environment. * * The remainder of the definition consists of a set of clauses, * each with a method name before the colon; a regular expression * after the colon; and in this case, a body. The body is optional, * if it is not specified, the method with the given name should * already exist in the lexer definition. * * \emph{NB} The clause that matches the \emph{longest} prefix of the input * is selected. If two clauses match the same input prefix, then the \emph{last} * one is selected. This is unlike most standard lexers, but makes more sense * for extensible grammars. * * The first clause matches any input that is not matched by the other clauses. * In this case, an error message is printed for any unknown character, and * the input is skipped. Note that this clause is selected only if no other * clause matches. * * The second clause is responsible for ignoring white space. * If whitespace is found, it is ignored, and the lexer is called * recursively. * * The third clause is responsible for the arithmetic operators. * It makes use of the \verb+Token+ object, which defines three * fields: a \verb+loc+ field that represents the source location; * a \verb+name+; and a \verb+value+. * * The lexer defines the \verb+loc+ variable to be the location * of the current lexeme in each of the method bodies, so we can use * that value to create the tokens. * * The \verb+Token.unit($(loc), name)+ * method constructs a new \verb+Token+ object with the given name, * and a default value. * * The \verb+number+ clause matches nonnegative integer constants. * The \verb+Token.pair($(loc), name, value)+ constructs a token with the * given name and value. * * \verb+Omake_lexer.Lexer+ object operate on \verb+InChannel+ objects. * The method \verb+lexer1.lex-channel(channel)+ reads the next * token from the channel argument. * * \subsection{Omake\textunderscore{}lexer.Lexer matching} * * During lexical analysis, clauses are selected by longest match. * That is, the clause that matches the longest sequence of input * characters is chosen for evaluation. If no clause matches, the * lexer raises a \verb+RuntimeException+. If more than one clause * matches the same amount of input, the first one is chosen * for evaluation. * * \subsection{Extending lexer definitions} * * Suppose we wish to augment the lexer example so that it ignores * comments. We will define comments as any text that begins with * the string \verb+(*+, ends with \verb+*)+, and comments may * be nested. * * One convenient way to do this is to define a separate lexer * just to skip comments. * * \begin{verbatim} * lex-comment. = * extends $(Omake_lexer.Lexer) * * level = 0 * * other: . * lex() * * term: $"[*][)]" * if $(not $(eq $(level), 0)) * level = $(sub $(level), 1) * lex() * * next: $"[(][*]" * level = $(add $(level), 1) * lex() * * eof: $"\'" * eprintln(Unterminated comment) * \end{verbatim} * * This lexer contains a field \verb+level+ that keeps track of the nesting * level. On encountering a \verb+(*+ string, it increments the level, * and for \verb+*)+, it decrements the level if nonzero, and continues. * * Next, we need to modify our previous lexer to skip comments. * We can do this by extending the lexer object \verb+lexer1+ * that we just created. * * \begin{verbatim} * lexer1. += * comment: $"[(][*]" * lex-comment.lex-channel($(channel)) * lex() * \end{verbatim} * * The body for the comment clause calls the \verb+lex-comment+ lexer when * a comment is encountered, and continues lexing when that lexer returns. * * \subsection{Threading the lexer object} * * Clause bodies may also end with an \verb+export+ directive. In this case * the lexer object itself is used as the returned token. If used with * the \verb+Parser+ object below, the lexer should define the \verb+loc+, \verb+name+ * and \verb+value+ fields in each \verb+export+ clause. Each time * the \verb+Parser+ calls the lexer, it calls it with the lexer returned * from the previous lex invocation. * \end{doc} *) (* * Add a lexer clause. *) let lex_rule venv pos loc (args : Omake_value_type.t list) kargs = let pos = string_pos "lex-rule" pos in match args, kargs with [_; action; _; pattern; _; ValBody (_, [], [], body, export)], [] -> let lexer = Omake_value.current_lexer venv pos in let action_name = Omake_value.string_of_value venv pos action in let action_sym = Lm_symbol.add action_name in let pattern = Omake_value.string_of_value venv pos pattern in let _, lexer = try Omake_lexer.Lexer.add_clause lexer action_sym pattern with Failure err -> let msg = Lm_printf.sprintf "Malformed regular expression '%s'" pattern in raise (Omake_value_type.OmakeException (loc_pos loc pos, StringStringError (msg, err))) in (* Add the method *) let action_var = Omake_ir.VarThis (loc, action_sym) in let venv = Omake_env.venv_add_var venv action_var (ValFun (Omake_env.venv_get_env venv, [], [], body, export)) in let venv = Omake_env.venv_add_var venv Omake_var.builtin_field_var (ValOther (ValLexer lexer)) in venv, Omake_value_type.ValNone | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 6, List.length args))) (* * Perform the lexing. *) let lex_engine venv pos loc args kargs = let pos = string_pos "lex" pos in match args, kargs with [arg], [] -> let lexer = Omake_value.current_lexer venv pos in let inp, close_flag = Omake_value.in_channel_of_any_value venv pos arg in let inx = Omake_env.venv_find_channel venv pos inp in let action, lexeme_loc, lexeme, args = try Omake_lexer.Lexer.lex lexer inx with Failure _ as exn -> let loc = Lm_channel.loc inx in let pos = loc_pos loc pos in if close_flag then Omake_env.venv_close_channel venv pos inp; raise (Omake_value_type.UncaughtException (pos, exn)) in let () = if close_flag then Omake_env.venv_close_channel venv pos inp in let venv = Omake_env.venv_add_match venv lexeme args in let venv = Omake_env.venv_add_var venv Omake_var.parse_loc_var (ValOther (ValLocation lexeme_loc)) in let action = Omake_env.venv_find_var venv pos loc (Omake_ir.VarThis (loc, action)) in Omake_eval.eval_apply venv pos loc action [] [] | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) (* * \begin{doc} * \obj{Parser} * * The \verb+Parser+ object provides a facility for syntactic analysis based * on context-free grammars. * * \verb+Parser+ objects are specified as a sequence of directives, * specified with method calls; and productions, specified as rules. * * For example, let's finish building the desktop calculator started * in the \verb+Lexer+ example. * * \begin{verbatim} * parser1. = * extends $(Parser) * * # * # Use the main lexer * # * lexer = $(lexer1) * * # * # Precedences, in ascending order * # * left(plus minus) * left(mul div) * right(uminus) * * # * # A program * # * start(prog) * * prog: exp eof * return $1 * * # * # Simple arithmetic expressions * # * exp: minus exp :prec: uminus * neg($2) * * exp: exp plus exp * add($1, $3) * * exp: exp minus exp * sub($1, $3) * * exp: exp mul exp * mul($1, $3) * * exp: exp div exp * div($1, $3) * * exp: lparen exp rparen * return $2 * \end{verbatim} * * Parsers are defined as extensions of the \verb+Parser+ class. * A \verb+Parser+ object must have a \verb+lexer+ field. The \verb+lexer+ * is not required to be a \verb+Lexer+ object, but it must provide * a \verb+lexer.lex()+ method that returns a token object with * \verb+name+ and \verb+value+ fields. For this example, we use the * \verb+lexer1+ object that we defined previously. * * The next step is to define precedences for the terminal symbols. * The precedences are defined with the \verb+left+, \verb+right+, * and \verb+nonassoc+ methods in order of increasing precedence. * * The grammar must have at least one start symbol, declared with * the \verb+start+ method. * * Next, the productions in the grammar are listed as rules. * The name of the production is listed before the colon, and * a sequence of variables is listed to the right of the colon. * The body is a semantic action to be evaluated when the production * is recognized as part of the input. * * In this example, these are the productions for the arithmetic * expressions recognized by the desktop calculator. The semantic * action performs the calculation. The variables \verb+$1, $2, ...+ * correspond to the values associated with each of the variables * on the right-hand-side of the production. * * \subsection{Calling the parser} * * The parser is called with the \verb+$(parser1.parse-channel start, channel)+ * or \verb+$(parser1.parse-file start, file)+ functions. The \verb+start+ * argument is the start symbol, and the \verb+channel+ or \verb+file+ * is the input to the parser. * * \subsection{Parsing control} * * The parser generator generates a pushdown automation based on LALR(1) * tables. As usual, if the grammar is ambiguous, this may generate shift/reduce * or reduce/reduce conflicts. These conflicts are printed to standard * output when the automaton is generated. * * By default, the automaton is not constructed until the parser is * first used. * * The \verb+build(debug)+ method forces the construction of the automaton. * While not required, it is wise to finish each complete parser with * a call to the \verb+build(debug)+ method. If the \verb+debug+ variable * is set, this also prints with parser table together with any conflicts. * * The \verb+loc+ variable is defined within action bodies, and represents * the input range for all tokens on the right-hand-side of the production. * * \subsection{Extending parsers} * * Parsers may also be extended by inheritance. * For example, let's extend the grammar so that it also recognizes * the \verb+<<+ and \verb+>>+ shift operations. * * First, we extend the lexer so that it recognizes these tokens. * This time, we choose to leave \verb+lexer1+ intact, instead of * using the += operator. * * \begin{verbatim} * lexer2. = * extends $(lexer1) * * lsl: $"<<" * Token.unit($(loc), lsl) * * asr: $">>" * Token.unit($(loc), asr) * \end{verbatim} * * Next, we extend the parser to handle these new operators. * We intend that the bitwise operators have lower precedence * than the other arithmetic operators. The two-argument form * of the \verb+left+ method accomplishes this. * * \begin{verbatim} * parser2. = * extends $(parser1) * * left(plus, lsl lsr asr) * * lexer = $(lexer2) * * exp: exp lsl exp * lsl($1, $3) * * exp: exp asr exp * asr($1, $3) * \end{verbatim} * * In this case, we use the new lexer \verb+lexer2+, and we add productions * for the new shift operations. * \end{doc} *) (* * Add start symbols. *) let parse_start venv pos loc args kargs = let pos = string_pos "parse-start" pos in let parse = Omake_value.current_parser venv pos in let args = match args, kargs with [arg], [] -> Omake_value.strings_of_value venv pos arg | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) in let parse = List.fold_left (fun parse s -> Omake_parser.Parser.add_start parse (Lm_symbol.add s)) parse args in (* Redefine the parser *) let venv = Omake_env.venv_add_var venv Omake_var.builtin_field_var (ValOther (ValParser parse)) in venv, Omake_value_type.ValNone (* * Precedence operations. *) let parse_prec venv pos loc args kargs assoc = let pos = string_pos "parse-prec" pos in let this = Omake_env.venv_this venv in let parse = Omake_value.current_parser venv pos in let parse, level, args = match args, kargs with [before; args], [] -> let current_prec = Lm_symbol.add (Omake_value.string_of_value venv pos before) in let level = try Omake_parser.Parser.find_prec parse current_prec with Not_found -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("no such precedence", current_prec))) in let parse, level = Omake_parser.Parser.create_prec_lt parse level assoc in parse, level, args | [args], [] -> let current_prec = Lm_symbol.add (Omake_value.string_of_value venv pos (Omake_env.venv_find_field_internal this pos Omake_symbol.current_prec_sym)) in let level = try Omake_parser.Parser.find_prec parse current_prec with Not_found -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringVarError ("current precedence is not found", current_prec))) in let parse, level = Omake_parser.Parser.create_prec_gt parse level assoc in parse, level, args | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityRange (1, 2), List.length args))) in let args = Omake_value.strings_of_value venv pos args in let parse = List.fold_left (fun parse s -> Omake_parser.Parser.add_prec parse level (Lm_symbol.add s)) parse args in (* Reset the current precedence *) let venv = match args with arg :: _ -> Omake_env.venv_add_var venv Omake_var.current_prec_field_var (ValString arg) | [] -> venv in (* Redefine the parser *) let venv = Omake_env.venv_add_var venv Omake_var.builtin_field_var (ValOther (ValParser parse)) in venv, Omake_value_type.ValNone let parse_left venv pos loc args kargs = let pos = string_pos "parse-left" pos in parse_prec venv pos loc args kargs LeftAssoc let parse_right venv pos loc args kargs = let pos = string_pos "parse-right" pos in parse_prec venv pos loc args kargs RightAssoc let parse_nonassoc venv pos loc args kargs = let pos = string_pos "parse-nonassoc" pos in parse_prec venv pos loc args kargs NonAssoc (* * Build the parser. *) let parse_build venv pos loc args = let pos = string_pos "parse-build" pos in match args with [arg] -> let par = Omake_value.current_parser venv pos in let debug = Omake_value.bool_of_value venv pos arg in Omake_parser.Parser.build par debug; Omake_value_type.ValNone | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 1, List.length args))) (* * Get the precedence option. *) let prec_option venv pos _ options = Omake_env.venv_map_fold (fun _ optname optval -> let s = Omake_value.string_of_value venv pos optname in if s = ":prec:" then Some (Lm_symbol.add (Omake_value.string_of_value venv pos optval)) else raise (Omake_value_type.OmakeException (pos, StringValueError ("illegal option", optname)))) None options (* * Compute an action name that is not defined in the current object. *) let action_sym = Lm_symbol.add "action" let find_action_name venv loc = Lm_symbol.new_name action_sym (fun v -> Omake_env.venv_defined venv (Omake_ir.VarThis (loc, v))) (* * Add a parser clause. *) let parse_rule venv pos loc (args : Omake_value_type.t list) kargs = let pos = string_pos "parse-rule" pos in let action, head, rhs, options, body, export = match args, kargs with [_; action; head; rhs; ValMap options; ValBody (_, [], [], body, export)], [] -> let action = Omake_value.string_of_value venv pos action in let head = Omake_value.string_of_value venv pos head in if head = "" then (* Action name was omitted *) find_action_name venv loc, Lm_symbol.add action, rhs, options, body, export else Lm_symbol.add action, Lm_symbol.add head, rhs, options, body, export | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 6, List.length args))) in let par = Omake_value.current_parser venv pos in let rhs = List.map Lm_symbol.add (Omake_value.strings_of_value venv pos rhs) in let pre = prec_option venv pos loc options in let par = Omake_parser.Parser.add_production par action head rhs pre in (* Add the method if there is a body *) let venv = match body with _ :: _ -> let body : Omake_ir.exp list = LetVarExp (loc, Omake_ir.VarThis (loc, Omake_symbol.val_sym), [], VarDefNormal, ConstString (loc, "")) :: body in Omake_env.venv_add_var venv (Omake_ir.VarThis (loc, action)) (ValFun (Omake_env.venv_get_env venv, [], [], body, export)) | [] -> venv in (* Add back the parser *) let venv = Omake_env.venv_add_var venv Omake_var.builtin_field_var (ValOther (ValParser par)) in venv, Omake_value_type.ValNone (* * Perform the lexing. *) let parse_engine venv pos loc args = let pos = string_pos "parse-engine" pos in match args with [start] -> let dfa = Omake_value.current_parser venv pos in let start = Lm_symbol.add (Omake_value.string_of_value venv pos start) in let lexer = Omake_env.venv_find_var venv pos loc Omake_var.lexer_field_var in let lexer = Omake_eval.eval_object venv pos lexer in let parser_obj = Omake_env.venv_this venv in let lex (venv, parser_obj, lexer) = let lex = Omake_env.venv_find_field_internal lexer pos Omake_symbol.lex_sym in let venv = Omake_env.venv_with_object venv lexer in let venv, result = Omake_eval.eval_apply venv pos loc lex [] [] in let obj = Omake_eval.eval_object venv pos result in try let lex_loc = Omake_env.venv_find_field_internal_exn obj Omake_symbol.loc_sym in let lex_loc = Omake_value.loc_of_value venv pos lex_loc in let name = Omake_env.venv_find_field_internal_exn obj Omake_symbol.name_sym in let name = Lm_symbol.add (Omake_value.string_of_value venv pos name) in let value = Omake_env.venv_find_field_internal_exn obj Omake_symbol.val_sym in name, lex_loc, (venv, parser_obj, lexer), value with Not_found -> let print_error buf = Format.fprintf buf "@[<v 3>The lexer returned a malformed object.\ @ @[<v 3>The result of a lexer action should be an object with at least 3 fields:\ @ loc: the location of the token\ @ name: the name of the token\ @ val: the value of the token@]\ @ %a@]" Omake_value_print.pp_print_value (ValObject obj) in raise (Omake_value_type.OmakeException (pos, LazyError print_error)) in let eval (venv, parser_obj, lexer) action loc args = let pos = loc_pos loc pos in let venv = Omake_env.venv_add_match_values venv args in let action = Omake_env.venv_find_field_internal parser_obj pos action in let venv = Omake_env.venv_with_object venv parser_obj in let venv = Omake_env.venv_add_var venv Omake_var.parse_loc_var (ValOther (ValLocation loc)) in let venv, result = Omake_eval.eval_apply venv pos loc action [] [] in (venv, parser_obj, lexer), result in let _, value = try Omake_parser.Parser.parse dfa start lex eval (venv, parser_obj, lexer) with Failure _ as exn -> raise (Omake_value_type.UncaughtException (pos, exn)) | Lm_parser.ParseError (loc, s) -> raise (Omake_value_type.OmakeException (loc_pos loc pos, StringError s)) in value | _ -> raise (Omake_value_type.OmakeException (loc_pos loc pos, ArityMismatch (ArityExact 2, List.length args))) (************************************************************************ * External interface. *) let () = let builtin_funs = [true, "grep", grep, Omake_ir.ArityRange (1, 3); true, "builtin-grep", builtin_grep, ArityExact 1; true, "cat", cat, ArityExact 1; true, "parse-engine", parse_engine, ArityExact 1; true, "parse-build", parse_build, ArityExact 1; ] in let builtin_kfuns = [true, "lex-rule", lex_rule, Omake_ir.ArityRange (3, 4); true, "lex-engine", lex_engine, ArityExact 1; true, "parse-rule", parse_rule, ArityRange (3, 5); true, "parse-start", parse_start, ArityExact 1; true, "parse-left", parse_left, ArityExact 1; true, "parse-right", parse_right, ArityExact 1; true, "parse-nonassoc", parse_nonassoc, ArityExact 1; true, "scan", scan, ArityRange (1, 3); true, "awk", awk, ArityExact 3; true, "fsubst", fsubst, ArityExact 3; true, "lex", lex, ArityExact 3; true, "lex-search", lex_search, ArityExact 3; ] in let builtin_info = {Omake_builtin_type.builtin_empty with builtin_funs = builtin_funs; builtin_kfuns = builtin_kfuns } in Omake_builtin.register_builtin builtin_info