Skip to content

Commit 29ca97d

Browse files
committed
[TMP] Reinstate caml_utf16_of_utf8 and its inverse
1 parent 85067fc commit 29ca97d

File tree

1 file changed

+109
-0
lines changed

1 file changed

+109
-0
lines changed

runtime/js/mlBytes.js

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,115 @@ function caml_sub_uint8_array_to_jsbytes(a, i, len) {
7979
return s;
8080
}
8181

82+
//Provides: caml_utf8_of_utf16
83+
function caml_utf8_of_utf16(s) {
84+
for (var b = "", t = b, c, d, i = 0, l = s.length; i < l; i++) {
85+
c = s.charCodeAt(i);
86+
if (c < 0x80) {
87+
for (var j = i + 1; j < l && (c = s.charCodeAt(j)) < 0x80; j++);
88+
if (j - i > 512) {
89+
t.slice(0, 1);
90+
b += t;
91+
t = "";
92+
b += s.slice(i, j);
93+
} else t += s.slice(i, j);
94+
if (j === l) break;
95+
i = j;
96+
}
97+
if (c < 0x800) {
98+
t += String.fromCharCode(0xc0 | (c >> 6));
99+
t += String.fromCharCode(0x80 | (c & 0x3f));
100+
} else if (c < 0xd800 || c > 0xdfff) {
101+
t += String.fromCharCode(
102+
0xe0 | (c >> 12),
103+
0x80 | ((c >> 6) & 0x3f),
104+
0x80 | (c & 0x3f),
105+
);
106+
} else if (
107+
c > 0xdbff ||
108+
i + 1 === l ||
109+
(d = s.charCodeAt(i + 1)) < 0xdc00 ||
110+
d > 0xdfff
111+
) {
112+
// Unmatched surrogate pair, replaced by \ufffd (replacement character)
113+
t += "\xef\xbf\xbd";
114+
} else {
115+
i++;
116+
c = (c << 10) + d - 0x35fdc00;
117+
t += String.fromCharCode(
118+
0xf0 | (c >> 18),
119+
0x80 | ((c >> 12) & 0x3f),
120+
0x80 | ((c >> 6) & 0x3f),
121+
0x80 | (c & 0x3f),
122+
);
123+
}
124+
if (t.length > 1024) {
125+
t.slice(0, 1);
126+
b += t;
127+
t = "";
128+
}
129+
}
130+
return b + t;
131+
}
132+
133+
//Provides: caml_utf16_of_utf8
134+
function caml_utf16_of_utf8(s) {
135+
for (var b = "", t = "", c, c1, c2, v, i = 0, l = s.length; i < l; i++) {
136+
c1 = s.charCodeAt(i);
137+
if (c1 < 0x80) {
138+
for (var j = i + 1; j < l && (c1 = s.charCodeAt(j)) < 0x80; j++);
139+
if (j - i > 512) {
140+
t.slice(0, 1);
141+
b += t;
142+
t = "";
143+
b += s.slice(i, j);
144+
} else t += s.slice(i, j);
145+
if (j === l) break;
146+
i = j;
147+
}
148+
v = 1;
149+
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
150+
c = c2 + (c1 << 6);
151+
if (c1 < 0xe0) {
152+
v = c - 0x3080;
153+
if (v < 0x80) v = 1;
154+
} else {
155+
v = 2;
156+
if (++i < l && ((c2 = s.charCodeAt(i)) & -64) === 128) {
157+
c = c2 + (c << 6);
158+
if (c1 < 0xf0) {
159+
v = c - 0xe2080;
160+
if (v < 0x800 || (v > 0xd7ff && v < 0xe000)) v = 2;
161+
} else {
162+
v = 3;
163+
if (
164+
++i < l &&
165+
((c2 = s.charCodeAt(i)) & -64) === 128 &&
166+
c1 < 0xf5
167+
) {
168+
v = c2 - 0x3c82080 + (c << 6);
169+
if (v < 0x10000 || v > 0x10ffff) v = 3;
170+
}
171+
}
172+
}
173+
}
174+
}
175+
if (v < 4) {
176+
// Invalid sequence
177+
i -= v;
178+
t += "\ufffd";
179+
} else if (v > 0xffff)
180+
t += String.fromCharCode(0xd7c0 + (v >> 10), 0xdc00 + (v & 0x3ff));
181+
else t += String.fromCharCode(v);
182+
if (t.length > 1024) {
183+
t.slice(0, 1);
184+
b += t;
185+
t = "";
186+
}
187+
}
188+
return b + t;
189+
}
190+
82191
//Provides: jsoo_is_ascii
83192
function jsoo_is_ascii(s) {
84193
// The regular expression gets better at around this point for all browsers

0 commit comments

Comments
 (0)