(* $Id$ *)
exception SASLprepError
open Netsaslprep_data
let map (u : int array) =
let to_space = Hashtbl.create 41 in
let to_nothing = Hashtbl.create 41 in
Array.iter (fun p -> Hashtbl.add to_space p ()) map_to_space;
Array.iter (fun p -> Hashtbl.add to_nothing p ()) map_to_nothing;
let u0 = Array.to_list u in
let u1 =
List.filter
(fun p -> not (Hashtbl.mem to_nothing p))
u0 in
let u2 =
List.map
(fun p ->
if Hashtbl.mem to_space p then
32
else
p
)
u1 in
Array.of_list u2
(* The KC normalizer follows roughly
https://web.archive.org/web/20070514031407/http://www.unicode.org/unicode/reports/tr15/Normalizer.html
*)
type buffer =
{ mutable buf : int array;
mutable len : int
}
let create_buffer() =
{ buf = Array.make 42 0;
len = 0
}
let buffer_at buf k =
assert(k >= 0 && k < buf.len);
buf.buf.(k)
let set_buffer_at buf k ch =
assert(k >= 0 && k < buf.len);
buf.buf.(k) <- ch
let resize buf =
let nbuf = Array.make (Array.length buf.buf * 2) 0 in
Array.blit buf.buf 0 nbuf 0 (Array.length buf.buf);
buf.buf <- nbuf
let insert_at buf k ch =
assert(k >= 0 && k <= buf.len);
if buf.len = Array.length buf.buf then resize buf;
if k < buf.len then
Array.blit buf.buf k buf.buf (k+1) (buf.len - k);
buf.buf.(k) <- ch;
buf.len <- buf.len + 1
let length buf =
buf.len
let contents buf =
Array.sub buf.buf 0 buf.len
let get_cano_tab() =
let cano_tab = Hashtbl.create 41 in
let last = ref 0 in
Array.iter
(fun p ->
if p < 0 then
last := -p
else
Hashtbl.add cano_tab p !last
)
cano_classes;
cano_tab
let cano_tab =
(* this table is pretty small *)
get_cano_tab()
(* Hangul *)
let h_SBase = 0xAC00
let h_LBase = 0x1100
let h_VBase = 0x1161
let h_TBase = 0x11A7
let h_LCount = 19
let h_VCount = 21
let h_TCount = 28
let h_NCount = h_VCount * h_TCount
let h_SCount = h_LCount * h_NCount
let decompose_hangul code =
if code < h_SBase || code >= h_SBase + h_SCount then raise Not_found;
let si = code - h_SBase in
let l = h_LBase + si/h_NCount in
let v = h_VBase + (si mod h_NCount) / h_TCount in
let t = h_TBase + si mod h_TCount in
if t = h_TBase then
[ l; v ]
else
[ l; v; t ]
let compose_hangul first second =
(* check for L and V *)
if first >= h_LBase &&
first < h_LBase + h_LCount &&
second >= h_VBase &&
second < h_VBase + h_VCount
then
(* create LV syllable *)
let l = first - h_LBase in
let v = second - h_VBase in
h_SBase + (l*h_VCount + v) * h_TCount
else
(* check for LV and T *)
let si = first - h_SBase in
if first >= h_SBase &&
first < h_SBase + h_SCount &&
si mod h_TCount = 0
then
let ti = second - h_TBase in
first + ti
else
raise Not_found
let decompose (u : int array) =
(* "compatibility decomposition" as required for NFKC *)
let decomp_tab = Hashtbl.create 41 in
let last = ref (ref []) in
Array.iter
(fun p ->
if p < 0 then (
last := ref [];
Hashtbl.add decomp_tab ((-p) lsr 1) !last
)
else
!last := p :: ! !last
)
decompositions;
let rec get_recursive_decomp ch =
try
let chars = List.rev (! (Hashtbl.find decomp_tab ch)) in
List.flatten (List.map get_recursive_decomp chars)
with
| Not_found ->
try
decompose_hangul ch
with
| Not_found -> [ch] in
let get_cc ch = try Hashtbl.find cano_tab ch with Not_found -> 0 in
let target = create_buffer() in
for i = 0 to Array.length u - 1 do
let decomp = get_recursive_decomp u.(i) in
List.iter
(fun ch ->
let cc = get_cc ch in
let k = ref (length target) in
if cc <> 0 then (
while
!k > 0 &&
get_cc (buffer_at target (!k-1)) > cc
do
decr k
done
);
insert_at target !k ch
)
decomp
done;
contents target
let compose_1 (u : int array) =
(* "canonical composition" as required for NFKC *)
(* u <> [| |] required *)
let excl_tab = Hashtbl.create 41 in
Array.iter
(fun p -> Hashtbl.add excl_tab p ())
exclusions;
let comp_tab = Hashtbl.create 41 in
let last_p = ref 0 in
let last = ref [] in
Array.iter
(fun p ->
if p < 0 then (
if !last <> [] then (
let q = (- !last_p) lsr 1 in
let is_canonical = (- !last_p) land 1 = 0 in
let is_excluded = Hashtbl.mem excl_tab q in
if is_canonical && not is_excluded then (
match !last with
| [ c0 ] ->
()
| [ c1; c0 ] ->
(* NB. We can at most support 15 bits *)
assert(c0 < 16384);
assert(c1 < 16384);
Hashtbl.add comp_tab ((c0 lsl 14) lor c1) q
| _ ->
assert false
)
);
last_p := p;
last := [];
)
else
last := p :: !last
)
decompositions;
let get_cc ch = try Hashtbl.find cano_tab ch with Not_found -> 0 in
let target = create_buffer() in
let starter_pos = ref 0 in
let starter_ch = ref u.(!starter_pos) in
let last_class = ref (get_cc !starter_ch) in
if !last_class <> 0 then last_class := 256;
insert_at target 0 !starter_ch;
for i = 1 to Array.length u - 1 do
let ch = u.(i) in
let cc = get_cc ch in
try
let composite =
try
if !starter_ch >= 16384 || ch >= 16384 then raise Not_found;
Hashtbl.find comp_tab ((!starter_ch lsl 14) lor ch)
with
| Not_found ->
compose_hangul !starter_ch ch in
if !last_class >= cc && !last_class <> 0 then raise Not_found;
set_buffer_at target !starter_pos composite;
starter_ch := composite
with
| Not_found ->
if cc = 0 then (
starter_pos := length target;
starter_ch := ch;
);
last_class := cc;
insert_at target (length target) ch
done;
contents target
let compose u =
if u = [| |] then [| |] else compose_1 u
let exists f a =
try
Array.iter (fun p -> if f p then raise Exit) a;
false
with Exit -> true
let norm_needed u =
(* If the string uses only certain characters we don't need to normalize.
These are practically all Latin, Greek and Cyrillic characters.
*)
let quick_need_norm c =
not
((c >= 0x20 && c <= 0x7e) ||
(c >= 0xc0 && c <= 0x131) ||
(c >= 0x134 && c <= 0x13e) ||
(c >= 0x141 && c <= 0x148) ||
(c >= 0x14a && c <= 0x17e) ||
(c >= 0x180 && c <= 0x1c3) ||
(c >= 0x1cd && c <= 0x1f0) ||
(c >= 0x1f4 && c <= 0x2ad) ||
(c >= 0x374 && c <= 0x375) ||
(c = 0x37e) ||
(c >= 0x385 && c <= 0x3ce) ||
(c >= 0x400 && c <= 0x482) ||
(c >= 0x48a && c <= 0x50f)
) in
exists quick_need_norm u
let normalize u =
(* normalization form KC (NFKC) *)
if norm_needed u then
compose (decompose u)
else
u
let prohibited u =
Array.iter
(fun p ->
Array.iter
(fun (p0, p1) ->
if p >= p0 && p <= p1 then raise SASLprepError;
)
forbidden;
)
u;
u
let is_randalcat c =
exists
(fun (c0,c1) -> c >= c0 && c <= c1)
randalcat
let is_lcat c =
exists
(fun (c0,c1) -> c >= c0 && c <= c1)
lcat
let bidicheck u =
let u_randalcat =
Array.map is_randalcat u in
let u_lcat =
Array.map is_lcat u in
let has_randalcat = exists (fun p -> p) u_randalcat in
let has_lcat = exists (fun p -> p) u_lcat in
if has_randalcat && has_lcat then raise SASLprepError;
if has_randalcat && u <> [| |] then (
if not u_randalcat.(0) || not u_randalcat.(Array.length u - 1) then
raise SASLprepError
);
u
let basecheck u =
if exists (fun p -> p < 0 || p > 0x10ffff) u then raise SASLprepError;
()
let saslprep_a u =
basecheck u;
bidicheck ( prohibited (normalize (map u)))
let saslprep s =
Netconversion.ustring_of_uarray
`Enc_utf8
(saslprep_a
(Netconversion.uarray_of_ustring
`Enc_utf8
s
)
)