LCOV - code coverage report
Current view: top level - lib/util/charset - util_unistr.c (source / functions) Hit Total Coverage
Test: coverage report for v4-17-test 1498b464 Lines: 96 159 60.4 %
Date: 2024-06-13 04:01:37 Functions: 11 17 64.7 %

          Line data    Source code
       1             : /* 
       2             :    Unix SMB/CIFS implementation.
       3             :    Samba utility functions
       4             :    Copyright (C) Andrew Tridgell 1992-2001
       5             :    Copyright (C) Simo Sorce 2001
       6             :    
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             :    
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             :    
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include "replace.h"
      22             : #include "system/locale.h"
      23             : #include "charset.h"
      24             : #include "lib/util/byteorder.h"
      25             : #include "lib/util/fault.h"
      26             : 
      27             : /**
      28             :  String replace.
      29             :  NOTE: oldc and newc must be 7 bit characters
      30             : **/
      31           0 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
      32             : {
      33           0 :         struct smb_iconv_handle *ic = get_iconv_handle();
      34           0 :         while (s && *s) {
      35             :                 size_t size;
      36           0 :                 codepoint_t c = next_codepoint_handle(ic, s, &size);
      37           0 :                 if (c == oldc) {
      38           0 :                         *s = newc;
      39             :                 }
      40           0 :                 s += size;
      41             :         }
      42           0 : }
      43             : 
      44             : /**
      45             :  Convert a string to lower case, allocated with talloc
      46             : **/
      47     1012051 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
      48             :                                       TALLOC_CTX *ctx, const char *src)
      49             : {
      50     1012051 :         size_t size=0;
      51             :         char *dest;
      52             : 
      53     1012051 :         if(src == NULL) {
      54          44 :                 return NULL;
      55             :         }
      56             : 
      57             :         /* this takes advantage of the fact that upper/lower can't
      58             :            change the length of a character by more than 1 byte */
      59     1012007 :         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
      60     1012007 :         if (dest == NULL) {
      61           0 :                 return NULL;
      62             :         }
      63             : 
      64    24292622 :         while (*src) {
      65             :                 size_t c_size;
      66    22512478 :                 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
      67    22512478 :                 src += c_size;
      68             : 
      69    22512478 :                 c = tolower_m(c);
      70             : 
      71    22512478 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
      72    22512478 :                 if (c_size == -1) {
      73           0 :                         talloc_free(dest);
      74           0 :                         return NULL;
      75             :                 }
      76    22512478 :                 size += c_size;
      77             :         }
      78             : 
      79     1012007 :         dest[size] = 0;
      80             : 
      81             :         /* trim it so talloc_append_string() works */
      82     1012007 :         dest = talloc_realloc(ctx, dest, char, size+1);
      83             : 
      84     1012007 :         talloc_set_name_const(dest, dest);
      85             : 
      86     1012007 :         return dest;
      87             : }
      88             : 
      89     1012051 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
      90             : {
      91     1012051 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
      92     1012051 :         return strlower_talloc_handle(iconv_handle, ctx, src);
      93             : }
      94             : 
      95             : /**
      96             :  Convert a string to UPPER case, allocated with talloc
      97             :  source length limited to n bytes, iconv handle supplied
      98             : **/
      99   490389673 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
     100             :                                         TALLOC_CTX *ctx, const char *src, size_t n)
     101             : {
     102   490389673 :         size_t size=0;
     103             :         char *dest;
     104             : 
     105   490389673 :         if (!src) {
     106      109834 :                 return NULL;
     107             :         }
     108             : 
     109             :         /* this takes advantage of the fact that upper/lower can't
     110             :            change the length of a character by more than 1 byte */
     111   490279839 :         dest = talloc_array(ctx, char, 2*(n+1));
     112   490279839 :         if (dest == NULL) {
     113           0 :                 return NULL;
     114             :         }
     115             : 
     116  7782172924 :         while (n && *src) {
     117             :                 size_t c_size;
     118  6922270098 :                 codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
     119             :                                                           CH_UNIX, &c_size);
     120  6922270098 :                 src += c_size;
     121  6922270098 :                 n -= c_size;
     122             : 
     123  6922270098 :                 c = toupper_m(c);
     124             : 
     125  6922270098 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
     126  6922270098 :                 if (c_size == -1) {
     127           2 :                         talloc_free(dest);
     128           2 :                         return NULL;
     129             :                 }
     130  6922270096 :                 size += c_size;
     131             :         }
     132             : 
     133   490279837 :         dest[size] = 0;
     134             : 
     135             :         /* trim it so talloc_append_string() works */
     136   490279837 :         dest = talloc_realloc(ctx, dest, char, size+1);
     137             : 
     138   490279837 :         talloc_set_name_const(dest, dest);
     139             : 
     140   490279837 :         return dest;
     141             : }
     142             : 
     143             : /**
     144             :  Convert a string to UPPER case, allocated with talloc
     145             :  source length limited to n bytes
     146             : **/
     147   490389673 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
     148             : {
     149   490389673 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
     150   490389673 :         return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
     151             : }
     152             : /**
     153             :  Convert a string to UPPER case, allocated with talloc
     154             : **/
     155     1709934 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
     156             : {
     157     1709934 :         return strupper_talloc_n(ctx, src, src?strlen(src):0);
     158             : }
     159             : 
     160             : /**
     161             :  talloc_strdup() a unix string to upper case.
     162             : **/
     163       69000 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
     164             : {
     165       69000 :         return strupper_talloc(ctx, src);
     166             : }
     167             : 
     168             : /**
     169             :  Find the number of 'c' chars in a string
     170             : **/
     171           0 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
     172             : {
     173           0 :         struct smb_iconv_handle *ic = get_iconv_handle();
     174           0 :         size_t count = 0;
     175             : 
     176           0 :         while (*s) {
     177             :                 size_t size;
     178           0 :                 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
     179           0 :                 if (c2 == c) count++;
     180           0 :                 s += size;
     181             :         }
     182             : 
     183           0 :         return count;
     184             : }
     185             : 
     186      756410 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
     187             : {
     188      756410 :         if (flags & (STR_NOALIGN|STR_ASCII)) {
     189       21367 :                 return 0;
     190             :         }
     191      735043 :         return PTR_DIFF(p, base_ptr) & 1;
     192             : }
     193             : 
     194             : /**
     195             : return the number of bytes occupied by a buffer in CH_UTF16 format
     196             : the result includes the null termination
     197             : **/
     198           0 : size_t utf16_len(const void *buf)
     199             : {
     200             :         size_t len;
     201             : 
     202           0 :         for (len = 0; SVAL(buf,len); len += 2) ;
     203             : 
     204           0 :         return len + 2;
     205             : }
     206             : 
     207             : /**
     208             : return the number of bytes occupied by a buffer in CH_UTF16 format
     209             : the result includes the null termination
     210             : limited by 'n' bytes
     211             : **/
     212      314147 : size_t utf16_len_n(const void *src, size_t n)
     213             : {
     214             :         size_t len;
     215             : 
     216      323253 :         for (len = 0; (len+2 < n) && SVAL(src, len); len += 2) ;
     217             : 
     218      314147 :         if (len+2 <= n) {
     219      314004 :                 len += 2;
     220             :         }
     221             : 
     222      314147 :         return len;
     223             : }
     224             : 
     225             : /**
     226             :  * Copy a string from a char* unix src to a dos codepage string destination.
     227             :  *
     228             :  * @converted_size the number of bytes occupied by the string in the destination.
     229             :  * @return bool true if success.
     230             :  *
     231             :  * @param flags can include
     232             :  * <dl>
     233             :  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
     234             :  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
     235             :  * </dl>
     236             :  *
     237             :  * @param dest_len the maximum length in bytes allowed in the
     238             :  * destination.  If @p dest_len is -1 then no maximum is used.
     239             :  **/
     240        2103 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
     241             : {
     242             :         size_t src_len;
     243             :         bool ret;
     244             : 
     245        2103 :         if (flags & STR_UPPER) {
     246           4 :                 char *tmpbuf = strupper_talloc(NULL, src);
     247           4 :                 if (tmpbuf == NULL) {
     248           0 :                         return false;
     249             :                 }
     250           4 :                 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
     251           4 :                 talloc_free(tmpbuf);
     252           4 :                 return ret;
     253             :         }
     254             : 
     255        2099 :         src_len = strlen(src);
     256             : 
     257        2099 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
     258        2075 :                 src_len++;
     259             : 
     260        2099 :         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
     261             : }
     262             : 
     263             : /**
     264             :  * Copy a string from a dos codepage source to a unix char* destination.
     265             :  *
     266             :  * The resulting string in "dest" is always null terminated.
     267             :  *
     268             :  * @param flags can have:
     269             :  * <dl>
     270             :  * <dt>STR_TERMINATE</dt>
     271             :  * <dd>STR_TERMINATE means the string in @p src
     272             :  * is null terminated, and src_len is ignored.</dd>
     273             :  * </dl>
     274             :  *
     275             :  * @param src_len is the length of the source area in bytes.
     276             :  * @returns the number of bytes occupied by the string in @p src.
     277             :  **/
     278           0 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     279             : {
     280           0 :         size_t size = 0;
     281             : 
     282           0 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
     283           0 :                 if (src_len == (size_t)-1) {
     284           0 :                         src_len = strlen((const char *)src) + 1;
     285             :                 } else {
     286           0 :                         size_t len = strnlen((const char *)src, src_len);
     287           0 :                         if (len < src_len)
     288           0 :                                 len++;
     289           0 :                         src_len = len;
     290             :                 }
     291             :         }
     292             : 
     293             :         /* We're ignoring the return here.. */
     294           0 :         (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
     295             : 
     296           0 :         if (dest_len)
     297           0 :                 dest[MIN(size, dest_len-1)] = 0;
     298             : 
     299           0 :         return src_len;
     300             : }
     301             : 
     302             : /**
     303             :  * Copy a string from a char* src to a unicode destination.
     304             :  *
     305             :  * @returns the number of bytes occupied by the string in the destination.
     306             :  *
     307             :  * @param flags can have:
     308             :  *
     309             :  * <dl>
     310             :  * <dt>STR_TERMINATE <dd>means include the null termination.
     311             :  * <dt>STR_UPPER     <dd>means uppercase in the destination.
     312             :  * <dt>STR_NOALIGN   <dd>means don't do alignment.
     313             :  * </dl>
     314             :  *
     315             :  * @param dest_len is the maximum length allowed in the
     316             :  * destination. If dest_len is -1 then no maxiumum is used.
     317             :  **/
     318      303586 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
     319             : {
     320      303586 :         size_t len=0;
     321      303586 :         size_t src_len = strlen(src);
     322      303586 :         size_t size = 0;
     323             :         bool ret;
     324             : 
     325      303586 :         if (flags & STR_UPPER) {
     326         924 :                 char *tmpbuf = strupper_talloc(NULL, src);
     327             :                 ssize_t retval;
     328         924 :                 if (tmpbuf == NULL) {
     329           0 :                         return -1;
     330             :                 }
     331         924 :                 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
     332         924 :                 talloc_free(tmpbuf);
     333         924 :                 return retval;
     334             :         }
     335             : 
     336      302662 :         if (flags & STR_TERMINATE)
     337      162142 :                 src_len++;
     338             : 
     339      302662 :         if (ucs2_align(NULL, dest, flags)) {
     340       85368 :                 *(char *)dest = 0;
     341       85368 :                 dest = (void *)((char *)dest + 1);
     342       85368 :                 if (dest_len) dest_len--;
     343       85368 :                 len++;
     344             :         }
     345             : 
     346             :         /* ucs2 is always a multiple of 2 bytes */
     347      302662 :         dest_len &= ~1;
     348             : 
     349      302662 :         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
     350      302662 :         if (ret == false) {
     351           0 :                 return 0;
     352             :         }
     353             : 
     354      302662 :         len += size;
     355             : 
     356      302662 :         return (ssize_t)len;
     357             : }
     358             : 
     359             : 
     360             : /**
     361             :  Copy a string from a ucs2 source to a unix char* destination.
     362             :  Flags can have:
     363             :   STR_TERMINATE means the string in src is null terminated.
     364             :   STR_NOALIGN   means don't try to align.
     365             :  if STR_TERMINATE is set then src_len is ignored if it is -1.
     366             :  src_len is the length of the source area in bytes
     367             :  Return the number of bytes occupied by the string in src.
     368             :  The resulting string in "dest" is always null terminated.
     369             : **/
     370             : 
     371           0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     372             : {
     373           0 :         size_t size = 0;
     374             : 
     375           0 :         if (ucs2_align(NULL, src, flags)) {
     376           0 :                 src = (const void *)((const char *)src + 1);
     377           0 :                 if (src_len > 0)
     378           0 :                         src_len--;
     379             :         }
     380             : 
     381           0 :         if (flags & STR_TERMINATE) {
     382           0 :                 if (src_len == (size_t)-1) {
     383           0 :                         src_len = utf16_len(src);
     384             :                 } else {
     385           0 :                         src_len = utf16_len_n(src, src_len);
     386             :                 }
     387             :         }
     388             : 
     389             :         /* ucs2 is always a multiple of 2 bytes */
     390           0 :         if (src_len != (size_t)-1)
     391           0 :                 src_len &= ~1;
     392             : 
     393             :         /* We're ignoring the return here.. */
     394           0 :         (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
     395           0 :         if (dest_len)
     396           0 :                 dest[MIN(size, dest_len-1)] = 0;
     397             : 
     398           0 :         return src_len;
     399             : }
     400             : 
     401             : /**
     402             :  Copy a string from a char* src to a unicode or ascii
     403             :  dos codepage destination choosing unicode or ascii based on the 
     404             :  flags in the SMB buffer starting at base_ptr.
     405             :  Return the number of bytes occupied by the string in the destination.
     406             :  flags can have:
     407             :   STR_TERMINATE means include the null termination.
     408             :   STR_UPPER     means uppercase in the destination.
     409             :   STR_ASCII     use ascii even with unicode packet.
     410             :   STR_NOALIGN   means don't do alignment.
     411             :  dest_len is the maximum length allowed in the destination. If dest_len
     412             :  is -1 then no maxiumum is used.
     413             : **/
     414             : 
     415      304761 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
     416             : {
     417      304761 :         if (flags & STR_ASCII) {
     418        2099 :                 size_t size = 0;
     419        2099 :                 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
     420        2099 :                         return (ssize_t)size;
     421             :                 } else {
     422           0 :                         return (ssize_t)-1;
     423             :                 }
     424      302662 :         } else if (flags & STR_UNICODE) {
     425      302662 :                 return push_ucs2(dest, src, dest_len, flags);
     426             :         } else {
     427           0 :                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
     428             :                 return -1;
     429             :         }
     430             : }
     431             : 
     432             : 
     433             : /**
     434             :  Copy a string from a unicode or ascii source (depending on
     435             :  the packet flags) to a char* destination.
     436             :  Flags can have:
     437             :   STR_TERMINATE means the string in src is null terminated.
     438             :   STR_UNICODE   means to force as unicode.
     439             :   STR_ASCII     use ascii even with unicode packet.
     440             :   STR_NOALIGN   means don't do alignment.
     441             :  if STR_TERMINATE is set then src_len is ignored is it is -1
     442             :  src_len is the length of the source area in bytes.
     443             :  Return the number of bytes occupied by the string in src.
     444             :  The resulting string in "dest" is always null terminated.
     445             : **/
     446             : 
     447           0 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     448             : {
     449           0 :         if (flags & STR_ASCII) {
     450           0 :                 return pull_ascii_string(dest, src, dest_len, src_len, flags);
     451           0 :         } else if (flags & STR_UNICODE) {
     452           0 :                 return pull_ucs2(dest, src, dest_len, src_len, flags);
     453             :         } else {
     454           0 :                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
     455             :                 return -1;
     456             :         }
     457             : }

Generated by: LCOV version 1.13