Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 : Copyright (C) Andrew Bartlett 2011
7 : Copyright (C) Jeremy Allison 1992-2007
8 : Copyright (C) Martin Pool 2003
9 : Copyright (C) James Peach 2006
10 :
11 : This program is free software; you can redistribute it and/or modify
12 : it under the terms of the GNU General Public License as published by
13 : the Free Software Foundation; either version 3 of the License, or
14 : (at your option) any later version.
15 :
16 : This program is distributed in the hope that it will be useful,
17 : but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : GNU General Public License for more details.
20 :
21 : You should have received a copy of the GNU General Public License
22 : along with this program. If not, see <http://www.gnu.org/licenses/>.
23 : */
24 :
25 : #include "replace.h"
26 : #include "system/locale.h"
27 : #include "charset.h"
28 : #include "lib/util/fault.h"
29 :
30 : #ifdef strcasecmp
31 : #undef strcasecmp
32 : #endif
33 : #ifdef strncasecmp
34 : #undef strncasecmp
35 : #endif
36 :
37 :
38 : /**
39 : Case insensitive string compararison, handle specified for testing
40 : **/
41 68520579 : _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
42 : const char *s1, const char *s2)
43 : {
44 68520579 : codepoint_t c1=0, c2=0;
45 68520579 : codepoint_t u1=0, u2=0;
46 68520579 : codepoint_t l1=0, l2=0;
47 : size_t size1, size2;
48 :
49 : /* handle null ptr comparisons to simplify the use in qsort */
50 68520579 : if (s1 == s2) return 0;
51 68520123 : if (s1 == NULL) return -1;
52 68520123 : if (s2 == NULL) return 1;
53 :
54 220193404 : while (*s1 && *s2) {
55 158637234 : c1 = next_codepoint_handle(iconv_handle, s1, &size1);
56 158637234 : c2 = next_codepoint_handle(iconv_handle, s2, &size2);
57 :
58 158637234 : if (c1 == INVALID_CODEPOINT ||
59 : c2 == INVALID_CODEPOINT) {
60 8 : return strcasecmp(s1, s2);
61 : }
62 :
63 158637226 : s1 += size1;
64 158637226 : s2 += size2;
65 :
66 158637226 : if (c1 == c2) {
67 91325288 : continue;
68 : }
69 :
70 67311938 : u1 = toupper_m(c1);
71 67311938 : u2 = toupper_m(c2);
72 67311938 : if (u1 == u2) {
73 373986 : continue;
74 : }
75 :
76 66937952 : l1 = tolower_m(c1);
77 66937952 : l2 = tolower_m(c2);
78 66937952 : if (l1 == l2) {
79 0 : continue;
80 : }
81 :
82 66937952 : return l1 - l2;
83 : }
84 :
85 1582163 : return *s1 - *s2;
86 : }
87 :
88 : /**
89 : Case insensitive string compararison
90 : **/
91 68520579 : _PUBLIC_ int strcasecmp_m(const char *s1, const char *s2)
92 : {
93 68520579 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
94 68520579 : return strcasecmp_m_handle(iconv_handle, s1, s2);
95 : }
96 :
97 : /**
98 : Case insensitive string compararison, length limited, handle specified for testing
99 : **/
100 356497 : _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
101 : const char *s1, const char *s2, size_t n)
102 : {
103 356497 : codepoint_t c1=0, c2=0;
104 356497 : codepoint_t u1=0, u2=0;
105 356497 : codepoint_t l1=0, l2=0;
106 : size_t size1, size2;
107 :
108 : /* handle null ptr comparisons to simplify the use in qsort */
109 356497 : if (s1 == s2) return 0;
110 356309 : if (s1 == NULL) return -1;
111 356309 : if (s2 == NULL) return 1;
112 :
113 1338644 : while (*s1 && *s2 && n) {
114 1080653 : n--;
115 :
116 1080653 : c1 = next_codepoint_handle(iconv_handle, s1, &size1);
117 1080653 : c2 = next_codepoint_handle(iconv_handle, s2, &size2);
118 :
119 1080653 : if (c1 == INVALID_CODEPOINT ||
120 : c2 == INVALID_CODEPOINT) {
121 : /*
122 : * n was specified in characters,
123 : * now we must convert it to bytes.
124 : * As bytes are the smallest
125 : * character unit, the following
126 : * increment and strncasecmp is always
127 : * safe.
128 : *
129 : * The source string was already known
130 : * to be n characters long, so we are
131 : * guaranteed to be able to look at the
132 : * (n remaining + size1) bytes from the
133 : * s1 position).
134 : */
135 0 : n += size1;
136 0 : return strncasecmp(s1, s2, n);
137 : }
138 :
139 1080653 : s1 += size1;
140 1080653 : s2 += size2;
141 :
142 1080653 : if (c1 == c2) {
143 770937 : continue;
144 : }
145 :
146 309716 : u1 = toupper_m(c1);
147 309716 : u2 = toupper_m(c2);
148 309716 : if (u1 == u2) {
149 383 : continue;
150 : }
151 :
152 309333 : l1 = tolower_m(c1);
153 309333 : l2 = tolower_m(c2);
154 309333 : if (l1 == l2) {
155 0 : continue;
156 : }
157 :
158 309333 : return l1 - l2;
159 : }
160 :
161 46976 : if (n == 0) {
162 45752 : return 0;
163 : }
164 :
165 1224 : return *s1 - *s2;
166 : }
167 :
168 : /**
169 : Case insensitive string compararison, length limited
170 : **/
171 356497 : _PUBLIC_ int strncasecmp_m(const char *s1, const char *s2, size_t n)
172 : {
173 356497 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
174 356497 : return strncasecmp_m_handle(iconv_handle, s1, s2, n);
175 : }
176 :
177 : /**
178 : * Compare 2 strings.
179 : *
180 : * @note The comparison is case-insensitive.
181 : **/
182 5687 : _PUBLIC_ bool strequal_m(const char *s1, const char *s2)
183 : {
184 5687 : return strcasecmp_m(s1,s2) == 0;
185 : }
186 :
187 : /**
188 : Compare 2 strings (case sensitive).
189 : **/
190 176870 : _PUBLIC_ bool strcsequal(const char *s1,const char *s2)
191 : {
192 176870 : if (s1 == s2)
193 0 : return true;
194 176870 : if (!s1 || !s2)
195 0 : return false;
196 :
197 176870 : return strcmp(s1,s2) == 0;
198 : }
199 :
200 : /**
201 : * Calculate the number of units (8 or 16-bit, depending on the
202 : * destination charset), that would be needed to convert the input
203 : * string which is expected to be in in src_charset encoding to the
204 : * destination charset (which should be a unicode charset).
205 : */
206 14049892 : _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic,
207 : const char *s, charset_t src_charset, charset_t dst_charset)
208 : {
209 14049892 : size_t count = 0;
210 :
211 : #ifdef DEVELOPER
212 14049892 : switch (dst_charset) {
213 0 : case CH_DOS:
214 : case CH_UNIX:
215 0 : smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)");
216 14049892 : default:
217 14049892 : break;
218 : }
219 :
220 14049892 : switch (src_charset) {
221 0 : case CH_UTF16LE:
222 : case CH_UTF16BE:
223 0 : smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)");
224 14049892 : default:
225 14049892 : break;
226 : }
227 : #endif
228 14049892 : if (!s) {
229 48968 : return 0;
230 : }
231 :
232 699797894 : while (*s && !(((uint8_t)*s) & 0x80)) {
233 672884785 : s++;
234 672884785 : count++;
235 : }
236 :
237 14000924 : if (!*s) {
238 14000529 : return count;
239 : }
240 :
241 39586 : while (*s) {
242 : size_t c_size;
243 38796 : codepoint_t c = next_codepoint_handle_ext(ic, s, strnlen(s, 5),
244 : src_charset, &c_size);
245 38796 : s += c_size;
246 :
247 38796 : switch (dst_charset) {
248 38796 : case CH_UTF16LE:
249 : case CH_UTF16BE:
250 : case CH_UTF16MUNGED:
251 38796 : if (c < 0x10000) {
252 : /* Unicode char fits into 16 bits. */
253 38796 : count += 1;
254 : } else {
255 : /* Double-width unicode char - 32 bits. */
256 0 : count += 2;
257 : }
258 38796 : break;
259 0 : case CH_UTF8:
260 : /*
261 : * this only checks ranges, and does not
262 : * check for invalid codepoints
263 : */
264 0 : if (c < 0x80) {
265 0 : count += 1;
266 0 : } else if (c < 0x800) {
267 0 : count += 2;
268 0 : } else if (c < 0x10000) {
269 0 : count += 3;
270 : } else {
271 0 : count += 4;
272 : }
273 0 : break;
274 0 : default:
275 : /*
276 : * non-unicode encoding:
277 : * assume that each codepoint fits into
278 : * one unit in the destination encoding.
279 : */
280 0 : count += 1;
281 : }
282 : }
283 :
284 395 : return count;
285 : }
286 :
287 : /**
288 : * Calculate the number of units (8 or 16-bit, depending on the
289 : * destination charset), that would be needed to convert the input
290 : * string which is expected to be in in src_charset encoding to the
291 : * destination charset (which should be a unicode charset).
292 : */
293 14049892 : _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset)
294 : {
295 14049892 : struct smb_iconv_handle *ic = get_iconv_handle();
296 14049892 : return strlen_m_ext_handle(ic, s, src_charset, dst_charset);
297 : }
298 :
299 3227254 : _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset,
300 : const charset_t dst_charset)
301 : {
302 3227254 : if (!s) {
303 12034 : return 0;
304 : }
305 3215220 : return strlen_m_ext(s, src_charset, dst_charset) + 1;
306 : }
307 :
308 48960 : _PUBLIC_ size_t strlen_m_ext_term_null(const char *s,
309 : const charset_t src_charset,
310 : const charset_t dst_charset)
311 : {
312 : size_t len;
313 48960 : if (!s) {
314 528 : return 0;
315 : }
316 48432 : len = strlen_m_ext(s, src_charset, dst_charset);
317 48432 : if (len == 0) {
318 14281 : return 0;
319 : }
320 :
321 34151 : return len+1;
322 : }
323 :
324 : /**
325 : * Calculate the number of 16-bit units that would be needed to convert
326 : * the input string which is expected to be in CH_UNIX encoding to UTF16.
327 : *
328 : * This will be the same as the number of bytes in a string for single
329 : * byte strings, but will be different for multibyte.
330 : */
331 10786240 : _PUBLIC_ size_t strlen_m(const char *s)
332 : {
333 10786240 : return strlen_m_ext(s, CH_UNIX, CH_UTF16LE);
334 : }
335 :
336 : /**
337 : Work out the number of multibyte chars in a string, including the NULL
338 : terminator.
339 : **/
340 361120 : _PUBLIC_ size_t strlen_m_term(const char *s)
341 : {
342 361120 : return strlen_m_ext_term(s, CH_UNIX, CH_UTF16LE);
343 : }
344 :
345 : /*
346 : * Weird helper routine for the winreg pipe: If nothing is around, return 0,
347 : * if a string is there, include the terminator.
348 : */
349 :
350 48960 : _PUBLIC_ size_t strlen_m_term_null(const char *s)
351 : {
352 48960 : return strlen_m_ext_term_null(s, CH_UNIX, CH_UTF16LE);
353 : }
354 :
355 : /**
356 : Strchr and strrchr_m are a bit complex on general multi-byte strings.
357 : **/
358 57021937 : _PUBLIC_ char *strchr_m(const char *src, char c)
359 : {
360 : const char *s;
361 57021937 : struct smb_iconv_handle *ic = get_iconv_handle();
362 57021937 : if (src == NULL) {
363 0 : return NULL;
364 : }
365 : /* characters below 0x3F are guaranteed to not appear in
366 : non-initial position in multi-byte charsets */
367 57021937 : if ((c & 0xC0) == 0) {
368 18002515 : return strchr(src, c);
369 : }
370 :
371 : /* this is quite a common operation, so we want it to be
372 : fast. We optimise for the ascii case, knowing that all our
373 : supported multi-byte character sets are ascii-compatible
374 : (ie. they match for the first 128 chars) */
375 :
376 271155218 : for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
377 232152914 : if (*s == c)
378 17118 : return discard_const_p(char, s);
379 : }
380 :
381 39002304 : if (!*s)
382 39002304 : return NULL;
383 :
384 : #ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
385 : /* With compose characters we must restart from the beginning. JRA. */
386 : s = src;
387 : #endif
388 :
389 0 : while (*s) {
390 : size_t size;
391 0 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
392 0 : if (c2 == c) {
393 0 : return discard_const_p(char, s);
394 : }
395 0 : s += size;
396 : }
397 :
398 0 : return NULL;
399 : }
400 :
401 : /**
402 : * Multibyte-character version of strrchr
403 : */
404 662680 : _PUBLIC_ char *strrchr_m(const char *s, char c)
405 : {
406 : struct smb_iconv_handle *ic;
407 662680 : char *ret = NULL;
408 :
409 662680 : if (s == NULL) {
410 0 : return NULL;
411 : }
412 :
413 : /* characters below 0x3F are guaranteed to not appear in
414 : non-initial position in multi-byte charsets */
415 662680 : if ((c & 0xC0) == 0) {
416 649311 : return strrchr(s, c);
417 : }
418 :
419 : /* this is quite a common operation, so we want it to be
420 : fast. We optimise for the ascii case, knowing that all our
421 : supported multi-byte character sets are ascii-compatible
422 : (ie. they match for the first 128 chars). Also, in Samba
423 : we only search for ascii characters in 'c' and that
424 : in all mb character sets with a compound character
425 : containing c, if 'c' is not a match at position
426 : p, then p[-1] > 0x7f. JRA. */
427 :
428 : {
429 13369 : size_t len = strlen(s);
430 13369 : const char *cp = s;
431 13369 : bool got_mb = false;
432 :
433 13369 : if (len == 0)
434 20 : return NULL;
435 13349 : cp += (len - 1);
436 : do {
437 74489 : if (c == *cp) {
438 : /* Could be a match. Part of a multibyte ? */
439 22471 : if ((cp > s) &&
440 11252 : (((unsigned char)cp[-1]) & 0x80)) {
441 : /* Yep - go slow :-( */
442 0 : got_mb = true;
443 0 : break;
444 : }
445 : /* No - we have a match ! */
446 11806 : return discard_const_p(char , cp);
447 : }
448 62683 : } while (cp-- != s);
449 1543 : if (!got_mb)
450 1543 : return NULL;
451 : }
452 :
453 0 : ic = get_iconv_handle();
454 :
455 0 : while (*s) {
456 : size_t size;
457 0 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
458 0 : if (c2 == c) {
459 0 : ret = discard_const_p(char, s);
460 : }
461 0 : s += size;
462 : }
463 :
464 0 : return ret;
465 : }
466 :
467 : /**
468 : return True if any (multi-byte) character is lower case
469 : */
470 0 : _PUBLIC_ bool strhaslower_handle(struct smb_iconv_handle *ic,
471 : const char *string)
472 : {
473 0 : while (*string) {
474 : size_t c_size;
475 : codepoint_t s;
476 : codepoint_t t;
477 :
478 0 : s = next_codepoint_handle(ic, string, &c_size);
479 0 : string += c_size;
480 :
481 0 : t = toupper_m(s);
482 :
483 0 : if (s != t) {
484 0 : return true; /* that means it has lower case chars */
485 : }
486 : }
487 :
488 0 : return false;
489 : }
490 :
491 0 : _PUBLIC_ bool strhaslower(const char *string)
492 : {
493 0 : struct smb_iconv_handle *ic = get_iconv_handle();
494 0 : return strhaslower_handle(ic, string);
495 : }
496 :
497 : /**
498 : return True if any (multi-byte) character is upper case
499 : */
500 0 : _PUBLIC_ bool strhasupper_handle(struct smb_iconv_handle *ic,
501 : const char *string)
502 : {
503 0 : while (*string) {
504 : size_t c_size;
505 : codepoint_t s;
506 : codepoint_t t;
507 :
508 0 : s = next_codepoint_handle(ic, string, &c_size);
509 0 : string += c_size;
510 :
511 0 : t = tolower_m(s);
512 :
513 0 : if (s != t) {
514 0 : return true; /* that means it has upper case chars */
515 : }
516 : }
517 :
518 0 : return false;
519 : }
520 :
521 0 : _PUBLIC_ bool strhasupper(const char *string)
522 : {
523 0 : struct smb_iconv_handle *ic = get_iconv_handle();
524 0 : return strhasupper_handle(ic, string);
525 : }
526 :
527 : /***********************************************************************
528 : strstr_m - We convert via ucs2 for now.
529 : ***********************************************************************/
530 :
531 172494 : char *strstr_m(const char *src, const char *findstr)
532 : {
533 172494 : TALLOC_CTX *mem_ctx = NULL;
534 : smb_ucs2_t *p;
535 : smb_ucs2_t *src_w, *find_w;
536 : const char *s;
537 : char *s2;
538 172494 : char *retp = NULL;
539 172494 : size_t converted_size, findstr_len = 0;
540 :
541 : /* for correctness */
542 172494 : if (!findstr[0]) {
543 0 : return discard_const_p(char, src);
544 : }
545 :
546 : /* Samba does single character findstr calls a *lot*. */
547 172494 : if (findstr[1] == '\0')
548 15834 : return strchr_m(src, *findstr);
549 :
550 : /* We optimise for the ascii case, knowing that all our
551 : supported multi-byte character sets are ascii-compatible
552 : (ie. they match for the first 128 chars) */
553 :
554 3433838 : for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) {
555 3337061 : if (*s == *findstr) {
556 101243 : if (!findstr_len)
557 85957 : findstr_len = strlen(findstr);
558 :
559 101243 : if (strncmp(s, findstr, findstr_len) == 0) {
560 59883 : return discard_const_p(char, s);
561 : }
562 : }
563 : }
564 :
565 96777 : if (!*s)
566 96777 : return NULL;
567 :
568 : #if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */
569 : /* 'make check' fails unless we do this */
570 :
571 : /* With compose characters we must restart from the beginning. JRA. */
572 0 : s = src;
573 : #endif
574 :
575 : /*
576 : * Use get_iconv_handle() just as a non-NULL talloc ctx. In
577 : * case we leak memory, this should then be more obvious in
578 : * the talloc report.
579 : */
580 0 : mem_ctx = talloc_new(get_iconv_handle());
581 0 : if (mem_ctx == NULL) {
582 0 : return NULL;
583 : }
584 :
585 0 : if (!push_ucs2_talloc(mem_ctx, &src_w, src, &converted_size)) {
586 0 : goto done;
587 : }
588 :
589 0 : if (!push_ucs2_talloc(mem_ctx, &find_w, findstr, &converted_size)) {
590 0 : goto done;
591 : }
592 :
593 0 : p = strstr_w(src_w, find_w);
594 :
595 0 : if (!p) {
596 0 : goto done;
597 : }
598 :
599 0 : *p = 0;
600 0 : if (!pull_ucs2_talloc(mem_ctx, &s2, src_w, &converted_size)) {
601 0 : goto done;
602 : }
603 0 : retp = discard_const_p(char, (s+strlen(s2)));
604 0 : done:
605 0 : TALLOC_FREE(mem_ctx);
606 0 : return retp;
607 : }
|