@@ -79,6 +79,115 @@ function caml_sub_uint8_array_to_jsbytes(a, i, len) {
7979 return s ;
8080}
8181
82+ //Provides: caml_utf8_of_utf16
83+ function caml_utf8_of_utf16 ( s ) {
84+ for ( var b = "" , t = b , c , d , i = 0 , l = s . length ; i < l ; i ++ ) {
85+ c = s . charCodeAt ( i ) ;
86+ if ( c < 0x80 ) {
87+ for ( var j = i + 1 ; j < l && ( c = s . charCodeAt ( j ) ) < 0x80 ; j ++ ) ;
88+ if ( j - i > 512 ) {
89+ t . slice ( 0 , 1 ) ;
90+ b += t ;
91+ t = "" ;
92+ b += s . slice ( i , j ) ;
93+ } else t += s . slice ( i , j ) ;
94+ if ( j === l ) break ;
95+ i = j ;
96+ }
97+ if ( c < 0x800 ) {
98+ t += String . fromCharCode ( 0xc0 | ( c >> 6 ) ) ;
99+ t += String . fromCharCode ( 0x80 | ( c & 0x3f ) ) ;
100+ } else if ( c < 0xd800 || c > 0xdfff ) {
101+ t += String . fromCharCode (
102+ 0xe0 | ( c >> 12 ) ,
103+ 0x80 | ( ( c >> 6 ) & 0x3f ) ,
104+ 0x80 | ( c & 0x3f ) ,
105+ ) ;
106+ } else if (
107+ c > 0xdbff ||
108+ i + 1 === l ||
109+ ( d = s . charCodeAt ( i + 1 ) ) < 0xdc00 ||
110+ d > 0xdfff
111+ ) {
112+ // Unmatched surrogate pair, replaced by \ufffd (replacement character)
113+ t += "\xef\xbf\xbd" ;
114+ } else {
115+ i ++ ;
116+ c = ( c << 10 ) + d - 0x35fdc00 ;
117+ t += String . fromCharCode (
118+ 0xf0 | ( c >> 18 ) ,
119+ 0x80 | ( ( c >> 12 ) & 0x3f ) ,
120+ 0x80 | ( ( c >> 6 ) & 0x3f ) ,
121+ 0x80 | ( c & 0x3f ) ,
122+ ) ;
123+ }
124+ if ( t . length > 1024 ) {
125+ t . slice ( 0 , 1 ) ;
126+ b += t ;
127+ t = "" ;
128+ }
129+ }
130+ return b + t ;
131+ }
132+
133+ //Provides: caml_utf16_of_utf8
134+ function caml_utf16_of_utf8 ( s ) {
135+ for ( var b = "" , t = "" , c , c1 , c2 , v , i = 0 , l = s . length ; i < l ; i ++ ) {
136+ c1 = s . charCodeAt ( i ) ;
137+ if ( c1 < 0x80 ) {
138+ for ( var j = i + 1 ; j < l && ( c1 = s . charCodeAt ( j ) ) < 0x80 ; j ++ ) ;
139+ if ( j - i > 512 ) {
140+ t . slice ( 0 , 1 ) ;
141+ b += t ;
142+ t = "" ;
143+ b += s . slice ( i , j ) ;
144+ } else t += s . slice ( i , j ) ;
145+ if ( j === l ) break ;
146+ i = j ;
147+ }
148+ v = 1 ;
149+ if ( ++ i < l && ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 ) {
150+ c = c2 + ( c1 << 6 ) ;
151+ if ( c1 < 0xe0 ) {
152+ v = c - 0x3080 ;
153+ if ( v < 0x80 ) v = 1 ;
154+ } else {
155+ v = 2 ;
156+ if ( ++ i < l && ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 ) {
157+ c = c2 + ( c << 6 ) ;
158+ if ( c1 < 0xf0 ) {
159+ v = c - 0xe2080 ;
160+ if ( v < 0x800 || ( v > 0xd7ff && v < 0xe000 ) ) v = 2 ;
161+ } else {
162+ v = 3 ;
163+ if (
164+ ++ i < l &&
165+ ( ( c2 = s . charCodeAt ( i ) ) & - 64 ) === 128 &&
166+ c1 < 0xf5
167+ ) {
168+ v = c2 - 0x3c82080 + ( c << 6 ) ;
169+ if ( v < 0x10000 || v > 0x10ffff ) v = 3 ;
170+ }
171+ }
172+ }
173+ }
174+ }
175+ if ( v < 4 ) {
176+ // Invalid sequence
177+ i -= v ;
178+ t += "\ufffd" ;
179+ } else if ( v > 0xffff )
180+ t += String . fromCharCode ( 0xd7c0 + ( v >> 10 ) , 0xdc00 + ( v & 0x3ff ) ) ;
181+ else t += String . fromCharCode ( v ) ;
182+ if ( t . length > 1024 ) {
183+ t . slice ( 0 , 1 ) ;
184+ b += t ;
185+ t = "" ;
186+ }
187+ }
188+ return b + t ;
189+ }
190+
82191//Provides: jsoo_is_ascii
83192function jsoo_is_ascii ( s ) {
84193 // The regular expression gets better at around this point for all browsers
0 commit comments