Plasma GitLab Archive
Projects Blog Knowledge

(* $Id: char_classes_wlex.def 662 2004-05-25 20:57:28Z gerd $
 * ----------------------------------------------------------------------
 *
 *)

(* This file is divided up into sections, marked by (* [SECTION] *).
 * Sections are processed by lexpp.
 *)

(* ---------------------------------------------------------------------- *)
(* [TYPE_WLEX] *)

(* TYPE_WLEX announces that this file consists of two sections,
 * CLASSES, and LET. The contents of this section are ignored.
 *)

(* ---------------------------------------------------------------------- *)
(* [CLASSES] *)

(* Declare the character classes for wlex:
 * - invalid: all invalid code points, e.g. illegal control characters
 * - unicode_baseChar: baseChar characters outside the ASCII range.
 *   baseChar is defined by XML.
 * - ideographic: defined by XML.
 * - extender: defined by XML.
 * - ascii_digit: ASCII 0 to 9
 * - unicode_digit: the digits outside of the ASCII range
 * - combiningChar: defined by XML
 * - otherChar: other legal characters that may occur in XML text.
 *
 * Character classes marked as PRIVATE must not be used outside
 * of this file.
 *)


classes 
  invalid            (* PRIVATE *)
  unicode_baseChar   (* PRIVATE *)
  ideographic 
  extender
  ascii_digit
  unicode_digit      (* PRIVATE *)
  combiningChar
  otherChar          (* PRIVATE *)
  (* Now characters that may be referenced by the lexer definition directly: *)
  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  "abcdefghijklmnopqrstuvwxyz"
  "<>?!-/[]&#;%+*|,()'=.:_{}" '"'
  '\010'
  '\013'
  '\009'
  '\032'

(* ---------------------------------------------------------------------- *)
(* [LET] *)

(* The section LET consists of further "let" definitions that are copied
 * to the output files.
 *)

(* The following definitions assume that the character encoding is
 * ASCII-compatible.
 *)

let ascii_hexdigit = [ "ABCDEFabcdef" ] | ascii_digit

let letter = [ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	       "abcdefghijklmnopqrstuvwxyz"
             ] | unicode_baseChar | ideographic

let digit = ascii_digit | unicode_digit

let character = [^ invalid ]

let char_but_qmark =            (* '?' = '\063' *)
  [^ '?' invalid]


let char_but_rangle =           (* '>' = '\062' *)
  [^ '>' invalid]


let char_but_minus =            (* '-' = '\045' *)
  [^ '-' invalid]

let char_but_quot =             (* '"' = '\034' *)
  [^ '"' invalid]

let char_but_apos =             (* '\'' = '\039' *)
  [^ "'" invalid]


let pchar_text = 
  [^ '\009' '\010' '\013' '&' '<' ']' '{' '}' invalid]

let pchar_ebatt = 
  [^ '\009' '\010' '\013' '&' '<' '"' '\'' '{' '}' invalid]

let char_but_rbracket =         (* ']' = '\093' *)
  [^ ']' invalid]

let char_but_rbracket_rangle =  (* ']' = '\093', '>' = '\062' *)
  [^ ']' '>' invalid]


let pchar_but_amp_lt =
  (* '&' = '\038', '<' = '\060' *)
  [^ '&' '<' '\009' '\010' '\013' invalid]

let pchar_but_amp_percent =
  (* '%' = '\037', '&' = '\038' *)
  [^ '&' '%' '\009' '\010' '\013' invalid]

let char_ignore =
  (* '<'=060, ']'=093, '"'=034, '\''=039 *)
  [^ '<' ']' '"' "'" invalid]
  
(* [END] *)


This web site is published by Informatikbüro Gerd Stolpmann
Powered by Caml