(* $Id: char_classes_wlex.def 662 2004-05-25 20:57:28Z gerd $
* ----------------------------------------------------------------------
*
*)
(* This file is divided up into sections, marked by (* [SECTION] *).
* Sections are processed by lexpp.
*)
(* ---------------------------------------------------------------------- *)
(* [TYPE_WLEX] *)
(* TYPE_WLEX announces that this file consists of two sections,
* CLASSES, and LET. The contents of this section are ignored.
*)
(* ---------------------------------------------------------------------- *)
(* [CLASSES] *)
(* Declare the character classes for wlex:
* - invalid: all invalid code points, e.g. illegal control characters
* - unicode_baseChar: baseChar characters outside the ASCII range.
* baseChar is defined by XML.
* - ideographic: defined by XML.
* - extender: defined by XML.
* - ascii_digit: ASCII 0 to 9
* - unicode_digit: the digits outside of the ASCII range
* - combiningChar: defined by XML
* - otherChar: other legal characters that may occur in XML text.
*
* Character classes marked as PRIVATE must not be used outside
* of this file.
*)
classes
invalid (* PRIVATE *)
unicode_baseChar (* PRIVATE *)
ideographic
extender
ascii_digit
unicode_digit (* PRIVATE *)
combiningChar
otherChar (* PRIVATE *)
(* Now characters that may be referenced by the lexer definition directly: *)
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"<>?!-/[]&#;%+*|,()'=.:_{}" '"'
'\010'
'\013'
'\009'
'\032'
(* ---------------------------------------------------------------------- *)
(* [LET] *)
(* The section LET consists of further "let" definitions that are copied
* to the output files.
*)
(* The following definitions assume that the character encoding is
* ASCII-compatible.
*)
let ascii_hexdigit = [ "ABCDEFabcdef" ] | ascii_digit
let letter = [ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
] | unicode_baseChar | ideographic
let digit = ascii_digit | unicode_digit
let character = [^ invalid ]
let char_but_qmark = (* '?' = '\063' *)
[^ '?' invalid]
let char_but_rangle = (* '>' = '\062' *)
[^ '>' invalid]
let char_but_minus = (* '-' = '\045' *)
[^ '-' invalid]
let char_but_quot = (* '"' = '\034' *)
[^ '"' invalid]
let char_but_apos = (* '\'' = '\039' *)
[^ "'" invalid]
let pchar_text =
[^ '\009' '\010' '\013' '&' '<' ']' '{' '}' invalid]
let pchar_ebatt =
[^ '\009' '\010' '\013' '&' '<' '"' '\'' '{' '}' invalid]
let char_but_rbracket = (* ']' = '\093' *)
[^ ']' invalid]
let char_but_rbracket_rangle = (* ']' = '\093', '>' = '\062' *)
[^ ']' '>' invalid]
let pchar_but_amp_lt =
(* '&' = '\038', '<' = '\060' *)
[^ '&' '<' '\009' '\010' '\013' invalid]
let pchar_but_amp_percent =
(* '%' = '\037', '&' = '\038' *)
[^ '&' '%' '\009' '\010' '\013' invalid]
let char_ignore =
(* '<'=060, ']'=093, '"'=034, '\''=039 *)
[^ '<' ']' '"' "'" invalid]
(* [END] *)