Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba charset module for Mac OS X/Darwin
4 : Copyright (C) Benjamin Riefenstahl 2003
5 :
6 : This program is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3 of the License, or
9 : (at your option) any later version.
10 :
11 : This program is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with this program. If not, see <http://www.gnu.org/licenses/>.
18 : */
19 :
20 : /*
21 : * modules/charset_macosxfs.c
22 : *
23 : * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 : * and display encoding.
25 : *
26 : * Actually two implementations are provided here. The default
27 : * implementation is based on the official CFString API. The other is
28 : * based on internal CFString APIs as defined in the OpenDarwin
29 : * source.
30 : */
31 :
32 : #include "replace.h"
33 : #include "charset.h"
34 : #include "charset_proto.h"
35 : #include "lib/util/debug.h"
36 : #undef realloc
37 :
38 : #ifdef DARWINOS
39 :
40 : /*
41 : * Include OS frameworks. These are only needed in this module.
42 : */
43 : #include <CoreFoundation/CFString.h>
44 :
45 : /*
46 : * See if autoconf has found us the internal headers in some form.
47 : */
48 : #if defined(HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H)
49 : # include <CoreFoundation/CFStringEncodingConverter.h>
50 : # include <CoreFoundation/CFUnicodePrecomposition.h>
51 : # define USE_INTERNAL_API 1
52 : #elif defined(HAVE_CFSTRINGENCODINGCONVERTER_H)
53 : # include <CFStringEncodingConverter.h>
54 : # include <CFUnicodePrecomposition.h>
55 : # define USE_INTERNAL_API 1
56 : #endif
57 :
58 : /*
59 : * Compile time configuration: Do we want debug output?
60 : */
61 : /* #define DEBUG_STRINGS 1 */
62 :
63 : /*
64 : * A simple, but efficient memory provider for our buffers.
65 : */
66 : static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
67 : {
68 : if (newsize > *size) {
69 : *size = newsize + 128;
70 : buffer = realloc(buffer, *size);
71 : }
72 : return buffer;
73 : }
74 :
75 : /*
76 : * While there is a version of OpenDarwin for intel, the usual case is
77 : * big-endian PPC. So we need byte swapping to handle the
78 : * little-endian byte order of the network protocol. We also need an
79 : * additional dynamic buffer to do this work for incoming data blocks,
80 : * because we have to consider the original data as constant.
81 : *
82 : * We abstract the differences away by providing a simple facade with
83 : * these functions/macros:
84 : *
85 : * le_to_native(dst,src,len)
86 : * native_to_le(cp,len)
87 : * set_ucbuffer_with_le(buffer,bufsize,data,size)
88 : * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
89 : */
90 : #ifdef WORDS_BIGENDIAN
91 :
92 : static inline void swap_bytes (char * dst, const char * src, size_t len)
93 : {
94 : const char *srcend = src + len;
95 : while (src < srcend) {
96 : dst[0] = src[1];
97 : dst[1] = src[0];
98 : dst += 2;
99 : src += 2;
100 : }
101 : }
102 : static inline void swap_bytes_inplace (char * cp, size_t len)
103 : {
104 : char temp;
105 : char *end = cp + len;
106 : while (cp < end) {
107 : temp = cp[1];
108 : cp[1] = cp[0];
109 : cp[0] = temp;
110 : cp += 2;
111 : }
112 : }
113 :
114 : #define le_to_native(dst,src,len) swap_bytes(dst,src,len)
115 : #define native_to_le(cp,len) swap_bytes_inplace(cp,len)
116 : #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
117 : set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
118 :
119 : #else /* ! WORDS_BIGENDIAN */
120 :
121 : #define le_to_native(dst,src,len) memcpy(dst,src,len)
122 : #define native_to_le(cp,len) /* nothing */
123 : #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
124 : (((void)(bufsize)),(UniChar*)(data))
125 :
126 : #endif
127 :
128 : static inline UniChar *set_ucbuffer_with_le_copy (
129 : UniChar *buffer, size_t *bufsize,
130 : const void *data, size_t size, size_t reserve)
131 : {
132 : buffer = resize_buffer(buffer, bufsize, size+reserve);
133 : le_to_native((char*)buffer,data,size);
134 : return buffer;
135 : }
136 :
137 :
138 : /*
139 : * A simple hexdump function for debugging error conditions.
140 : */
141 : #define debug_out(s) DEBUG(0,(s))
142 :
143 : #ifdef DEBUG_STRINGS
144 :
145 : static void hexdump( const char * label, const char * s, size_t len )
146 : {
147 : size_t restlen = len;
148 : debug_out("<<<<<<<\n");
149 : debug_out(label);
150 : debug_out("\n");
151 : while (restlen > 0) {
152 : char line[100];
153 : size_t i, j;
154 : char * d = line;
155 : #undef sprintf
156 : d += sprintf(d, "%04X ", (unsigned)(len-restlen));
157 : *d++ = ' ';
158 : for( i = 0; i<restlen && i<8; ++i ) {
159 : d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
160 : }
161 : for( j = i; j<8; ++j ) {
162 : d += sprintf(d, " ");
163 : }
164 : *d++ = ' ';
165 : for( i = 8; i<restlen && i<16; ++i ) {
166 : d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
167 : }
168 : for( j = i; j<16; ++j ) {
169 : d += sprintf(d, " ");
170 : }
171 : *d++ = ' ';
172 : for( i = 0; i<restlen && i<16; ++i ) {
173 : if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
174 : *d++ = '.';
175 : else
176 : *d++ = s[i];
177 : }
178 : *d++ = '\n';
179 : *d = 0;
180 : restlen -= i;
181 : s += i;
182 : debug_out(line);
183 : }
184 : debug_out(">>>>>>>\n");
185 : }
186 :
187 : #else /* !DEBUG_STRINGS */
188 :
189 : #define hexdump(label,s,len) /* nothing */
190 :
191 : #endif
192 :
193 :
194 : #if !USE_INTERNAL_API
195 :
196 : /*
197 : * An implementation based on documented Mac OS X APIs.
198 : *
199 : * This does a certain amount of memory management, creating and
200 : * manipulating CFString objects. We try to minimize the impact by
201 : * keeping those objects around and re-using them. We also use
202 : * external backing store for the CFStrings where this is possible and
203 : * benficial.
204 : *
205 : * The Unicode normalizations forms available at this level are
206 : * generic, not specifically for the file system. So they may not be
207 : * perfect fits.
208 : */
209 : size_t macosxfs_encoding_pull(
210 : void *cd, /* Encoder handle */
211 : const char **inbuf, size_t *inbytesleft, /* Script string */
212 : char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
213 : {
214 : static const int script_code = kCFStringEncodingUTF8;
215 : static CFMutableStringRef cfstring = NULL;
216 : size_t outsize;
217 : CFRange range;
218 :
219 : (void) cd; /* UNUSED */
220 :
221 : if (0 == *inbytesleft) {
222 : return 0;
223 : }
224 :
225 : if (NULL == cfstring) {
226 : /*
227 : * A version with an external backing store as in the
228 : * push function should have been more efficient, but
229 : * testing shows, that it is actually slower (!).
230 : * Maybe kCFAllocatorDefault gets shortcut evaluation
231 : * internally, while kCFAllocatorNull doesn't.
232 : */
233 : cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
234 : }
235 :
236 : /*
237 : * Three methods of appending to a CFString, choose the most
238 : * efficient.
239 : */
240 : if (0 == (*inbuf)[*inbytesleft-1]) {
241 : CFStringAppendCString(cfstring, *inbuf, script_code);
242 : } else if (*inbytesleft <= 255) {
243 : Str255 buffer;
244 : buffer[0] = *inbytesleft;
245 : memcpy(buffer+1, *inbuf, buffer[0]);
246 : CFStringAppendPascalString(cfstring, buffer, script_code);
247 : } else {
248 : /*
249 : * We would like to use a fixed buffer and a loop
250 : * here, but than we can't garantee that the input is
251 : * well-formed UTF-8, as we are supposed to do.
252 : */
253 : static char *buffer = NULL;
254 : static size_t buflen = 0;
255 : buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
256 : memcpy(buffer, *inbuf, *inbytesleft);
257 : buffer[*inbytesleft] = 0;
258 : CFStringAppendCString(cfstring, *inbuf, script_code);
259 : }
260 :
261 : /*
262 : * Compose characters, using the non-canonical composition
263 : * form.
264 : */
265 : CFStringNormalize(cfstring, kCFStringNormalizationFormC);
266 :
267 : outsize = CFStringGetLength(cfstring);
268 : range = CFRangeMake(0,outsize);
269 :
270 : if (outsize == 0) {
271 : /*
272 : * HACK: smbd/mangle_hash2.c:is_legal_name() expects
273 : * errors here. That function will always pass 2
274 : * characters. smbd/open.c:check_for_pipe() cuts a
275 : * patchname to 10 characters blindly. Suppress the
276 : * debug output in those cases.
277 : */
278 : if(2 != *inbytesleft && 10 != *inbytesleft) {
279 : debug_out("String conversion: "
280 : "An unknown error occurred\n");
281 : hexdump("UTF8->UTF16LE (old) input",
282 : *inbuf, *inbytesleft);
283 : }
284 : errno = EILSEQ; /* Not sure, but this is what we have
285 : * actually seen. */
286 : return -1;
287 : }
288 : if (outsize*2 > *outbytesleft) {
289 : CFStringDelete(cfstring, range);
290 : debug_out("String conversion: "
291 : "Output buffer too small\n");
292 : hexdump("UTF8->UTF16LE (old) input",
293 : *inbuf, *inbytesleft);
294 : errno = E2BIG;
295 : return -1;
296 : }
297 :
298 : CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
299 : CFStringDelete(cfstring, range);
300 :
301 : native_to_le(*outbuf, outsize*2);
302 :
303 : /*
304 : * Add a converted null byte, if the CFString conversions
305 : * prevented that until now.
306 : */
307 : if (0 == (*inbuf)[*inbytesleft-1] &&
308 : (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
309 :
310 : if ((outsize*2+2) > *outbytesleft) {
311 : debug_out("String conversion: "
312 : "Output buffer too small\n");
313 : hexdump("UTF8->UTF16LE (old) input",
314 : *inbuf, *inbytesleft);
315 : errno = E2BIG;
316 : return -1;
317 : }
318 :
319 : (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
320 : outsize += 2;
321 : }
322 :
323 : *inbuf += *inbytesleft;
324 : *inbytesleft = 0;
325 : *outbuf += outsize*2;
326 : *outbytesleft -= outsize*2;
327 :
328 : return 0;
329 : }
330 :
331 : size_t macosxfs_encoding_push(
332 : void *cd, /* Encoder handle */
333 : const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
334 : char **outbuf, size_t *outbytesleft) /* Script string */
335 : {
336 : static const int script_code = kCFStringEncodingUTF8;
337 : static CFMutableStringRef cfstring = NULL;
338 : static UniChar *buffer = NULL;
339 : static size_t buflen = 0;
340 : CFIndex outsize, cfsize, charsconverted;
341 :
342 : (void) cd; /* UNUSED */
343 :
344 : if (0 == *inbytesleft) {
345 : return 0;
346 : }
347 :
348 : /*
349 : * We need a buffer that can hold 4 times the original data,
350 : * because that is the theoretical maximum that decomposition
351 : * can create currently (in Unicode 4.0).
352 : */
353 : buffer = set_ucbuffer_with_le_copy(
354 : buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
355 :
356 : if (NULL == cfstring) {
357 : cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
358 : kCFAllocatorDefault,
359 : buffer, *inbytesleft/2, buflen/2,
360 : kCFAllocatorNull);
361 : } else {
362 : CFStringSetExternalCharactersNoCopy(
363 : cfstring,
364 : buffer, *inbytesleft/2, buflen/2);
365 : }
366 :
367 : /*
368 : * Decompose characters, using the non-canonical decomposition
369 : * form.
370 : *
371 : * NB: This isn't exactly what HFS+ wants (see note on
372 : * kCFStringEncodingUseHFSPlusCanonical in
373 : * CFStringEncodingConverter.h), but AFAIK it's the best that
374 : * the official API can do.
375 : */
376 : CFStringNormalize(cfstring, kCFStringNormalizationFormD);
377 :
378 : cfsize = CFStringGetLength(cfstring);
379 : charsconverted = CFStringGetBytes(
380 : cfstring, CFRangeMake(0,cfsize),
381 : script_code, 0, false,
382 : *(UInt8 **)outbuf, *outbytesleft, &outsize);
383 :
384 : if (0 == charsconverted) {
385 : debug_out("String conversion: "
386 : "Buffer too small or not convertable\n");
387 : hexdump("UTF16LE->UTF8 (old) input",
388 : *inbuf, *inbytesleft);
389 : errno = EILSEQ; /* Probably more likely. */
390 : return -1;
391 : }
392 :
393 : /*
394 : * Add a converted null byte, if the CFString conversions
395 : * prevented that until now.
396 : */
397 : if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
398 : (0 != (*outbuf)[outsize-1])) {
399 :
400 : if (((size_t)outsize+1) > *outbytesleft) {
401 : debug_out("String conversion: "
402 : "Output buffer too small\n");
403 : hexdump("UTF16LE->UTF8 (old) input",
404 : *inbuf, *inbytesleft);
405 : errno = E2BIG;
406 : return -1;
407 : }
408 :
409 : (*outbuf)[outsize] = 0;
410 : ++outsize;
411 : }
412 :
413 : *inbuf += *inbytesleft;
414 : *inbytesleft = 0;
415 : *outbuf += outsize;
416 : *outbytesleft -= outsize;
417 :
418 : return 0;
419 : }
420 :
421 : #else /* USE_INTERNAL_API */
422 :
423 : /*
424 : * An implementation based on internal code as known from the
425 : * OpenDarwin CVS.
426 : *
427 : * This code doesn't need much memory management because it uses
428 : * functions that operate on the raw memory directly.
429 : *
430 : * The push routine here is faster and more compatible with HFS+ than
431 : * the other implementation above. The pull routine is only faster
432 : * for some strings, slightly slower for others. The pull routine
433 : * looses because it has to iterate over the data twice, once to
434 : * decode UTF-8 and than to do the character composition required by
435 : * Windows.
436 : */
437 : static size_t macosxfs_encoding_pull(
438 : void *cd, /* Encoder handle */
439 : const char **inbuf, size_t *inbytesleft, /* Script string */
440 : char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
441 : {
442 : static const int script_code = kCFStringEncodingUTF8;
443 : UInt32 srcCharsUsed = 0;
444 : UInt32 dstCharsUsed = 0;
445 : UInt32 result;
446 : uint32_t dstDecomposedUsed = 0;
447 : uint32_t dstPrecomposedUsed = 0;
448 :
449 : (void) cd; /* UNUSED */
450 :
451 : if (0 == *inbytesleft) {
452 : return 0;
453 : }
454 :
455 : result = CFStringEncodingBytesToUnicode(
456 : script_code, kCFStringEncodingComposeCombinings,
457 : *inbuf, *inbytesleft, &srcCharsUsed,
458 : (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
459 :
460 : switch(result) {
461 : case kCFStringEncodingConversionSuccess:
462 : if (*inbytesleft == srcCharsUsed) {
463 : break;
464 : }
465 :
466 : FALL_THROUGH;
467 : case kCFStringEncodingInsufficientOutputBufferLength:
468 : debug_out("String conversion: "
469 : "Output buffer too small\n");
470 : hexdump("UTF8->UTF16LE (new) input",
471 : *inbuf, *inbytesleft);
472 : errno = E2BIG;
473 : return -1;
474 : case kCFStringEncodingInvalidInputStream:
475 : /*
476 : * HACK: smbd/mangle_hash2.c:is_legal_name() expects
477 : * errors here. That function will always pass 2
478 : * characters. smbd/open.c:check_for_pipe() cuts a
479 : * patchname to 10 characters blindly. Suppress the
480 : * debug output in those cases.
481 : */
482 : if(2 != *inbytesleft && 10 != *inbytesleft) {
483 : debug_out("String conversion: "
484 : "Invalid input sequence\n");
485 : hexdump("UTF8->UTF16LE (new) input",
486 : *inbuf, *inbytesleft);
487 : }
488 : errno = EILSEQ;
489 : return -1;
490 : case kCFStringEncodingConverterUnavailable:
491 : debug_out("String conversion: "
492 : "Unknown encoding\n");
493 : hexdump("UTF8->UTF16LE (new) input",
494 : *inbuf, *inbytesleft);
495 : errno = EINVAL;
496 : return -1;
497 : }
498 :
499 : /*
500 : * It doesn't look like CFStringEncodingBytesToUnicode() can
501 : * produce precomposed characters (flags=ComposeCombinings
502 : * doesn't do it), so we need another pass over the data here.
503 : * We can do this in-place, as the string can only get
504 : * shorter.
505 : *
506 : * (Actually in theory there should be an internal
507 : * decomposition and reordering before the actual composition
508 : * step. But we should be able to rely on that we always get
509 : * fully decomposed strings for input, so this can't create
510 : * problems in reality.)
511 : */
512 : CFUniCharPrecompose(
513 : (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
514 : (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
515 :
516 : native_to_le(*outbuf, dstPrecomposedUsed*2);
517 :
518 : *inbuf += srcCharsUsed;
519 : *inbytesleft -= srcCharsUsed;
520 : *outbuf += dstPrecomposedUsed*2;
521 : *outbytesleft -= dstPrecomposedUsed*2;
522 :
523 : return 0;
524 : }
525 :
526 : static size_t macosxfs_encoding_push(
527 : void *cd, /* Encoder handle */
528 : const char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
529 : char **outbuf, size_t *outbytesleft) /* Script string */
530 : {
531 : static const int script_code = kCFStringEncodingUTF8;
532 : static UniChar *buffer = NULL;
533 : static size_t buflen = 0;
534 : UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
535 :
536 : (void) cd; /* UNUSED */
537 :
538 : if (0 == *inbytesleft) {
539 : return 0;
540 : }
541 :
542 : buffer = set_ucbuffer_with_le(
543 : buffer, &buflen, *inbuf, *inbytesleft);
544 :
545 : result = CFStringEncodingUnicodeToBytes(
546 : script_code, kCFStringEncodingUseHFSPlusCanonical,
547 : buffer, *inbytesleft/2, &srcCharsUsed,
548 : *outbuf, *outbytesleft, &dstCharsUsed);
549 :
550 : switch(result) {
551 : case kCFStringEncodingConversionSuccess:
552 : if (*inbytesleft/2 == srcCharsUsed) {
553 : break;
554 : }
555 :
556 : FALL_THROUGH;
557 : case kCFStringEncodingInsufficientOutputBufferLength:
558 : debug_out("String conversion: "
559 : "Output buffer too small\n");
560 : hexdump("UTF16LE->UTF8 (new) input",
561 : *inbuf, *inbytesleft);
562 : errno = E2BIG;
563 : return -1;
564 : case kCFStringEncodingInvalidInputStream:
565 : /*
566 : * HACK: smbd/open.c:check_for_pipe():is_legal_name()
567 : * cuts a pathname to 10 characters blindly. Suppress
568 : * the debug output in those cases.
569 : */
570 : if(10 != *inbytesleft) {
571 : debug_out("String conversion: "
572 : "Invalid input sequence\n");
573 : hexdump("UTF16LE->UTF8 (new) input",
574 : *inbuf, *inbytesleft);
575 : }
576 : errno = EILSEQ;
577 : return -1;
578 : case kCFStringEncodingConverterUnavailable:
579 : debug_out("String conversion: "
580 : "Unknown encoding\n");
581 : hexdump("UTF16LE->UTF8 (new) input",
582 : *inbuf, *inbytesleft);
583 : errno = EINVAL;
584 : return -1;
585 : }
586 :
587 : *inbuf += srcCharsUsed*2;
588 : *inbytesleft -= srcCharsUsed*2;
589 : *outbuf += dstCharsUsed;
590 : *outbytesleft -= dstCharsUsed;
591 :
592 : return 0;
593 : }
594 :
595 : #endif /* USE_INTERNAL_API */
596 :
597 : #else /* DARWIN */
598 :
599 : void charset_macosfs_dummy(void);
600 0 : void charset_macosfs_dummy(void)
601 : {
602 0 : return;
603 : }
604 :
605 : #endif /* DARWIN */
|