VirtualBox

Ignore:
Timestamp:
Sep 21, 2022 10:26:43 PM (2 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
153714
Message:

IPRT/RTProcCreateEx/posix: Do not assume locale order in composite locale string on linux and OS/2. Skip the memmove stuff. Check for NULL and empty nl_langinfo_l returns on all platforms. Restored old encoding extraction logic for the newlocale failure path & for hosts (like OS/2) which doesn't have newlocale. bugref:10153

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/r3/posix/process-creation-posix.cpp

    r96609 r96820  
    14791479        /*
    14801480         * LC_ALL overrides everything else.  The LC_* environment variables are often set
    1481          * to the empty string so move on the next variable if that is the case.
     1481         * to the empty string so move on the next variable if that is the case (that's
     1482         * what setlocale in glibc does).
    14821483         */
    14831484        const char *pszVar;
    14841485        int rc = RTEnvGetEx(hEnvToUse, pszVar = "LC_ALL", szEncoding, sizeof(szEncoding), NULL);
    1485         if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && !*szEncoding))
     1486        if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && szEncoding[0] == '\0'))
    14861487            rc = RTEnvGetEx(hEnvToUse, pszVar = "LC_CTYPE", szEncoding, sizeof(szEncoding), NULL);
    1487         if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && !*szEncoding))
     1488        if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && szEncoding[0] == '\0'))
    14881489            rc = RTEnvGetEx(hEnvToUse, pszVar = "LANG", szEncoding, sizeof(szEncoding), NULL);
    1489         if (RT_SUCCESS(rc) && *szEncoding)
     1490        if (RT_SUCCESS(rc) && szEncoding[0] != '\0')
    14901491        {
    14911492            /*
     
    14971498             * e.g.:
    14981499             *   en_US.UTF-8/POSIX/el_GR.UTF-8/el_CY.UTF-8/en_GB.UTF-8/es_ES.UTF-8
    1499              * N.B. On Solaris there is also a leading slash.
    1500              * On Linux the composite locale format is made up of key-value pairs of category
    1501              * names and locales of the form 'name=value' with each element separated by a
    1502              * semicolon in the same order as above with following additional categories
    1503              * included as well:
     1500             *
     1501             * On Solaris there is also a leading slash.
     1502             *
     1503             * On Linux and OS/2 the composite locale format is made up of key-value pairs
     1504             * of category names and locales of the form 'name=value' with each element
     1505             * separated by a semicolon in the same order as above with following additional
     1506             * categories included as well:
    15041507             *   LC_PAPER/LC_NAME/LC_ADDRESS/LC_TELEPHONE/LC_MEASUREMENT/LC_IDENTIFICATION
    15051508             * e.g.
     
    15091512             *   LC_IDENTIFICATION=fr_LU.utf8
    15101513             */
    1511 #if !defined(RT_OS_LINUX)
    1512 # if defined(RT_OS_SOLARIS)
    1513             if (RTPATH_IS_SLASH(*szEncoding))
    1514                 (void) memmove(szEncoding, szEncoding + 1, strlen(szEncoding));
    1515 # endif
    1516             char *pszSlash = strchr(szEncoding, '/');
     1514            char *pszEncodingStart = szEncoding;
     1515#if !defined(RT_OS_LINUX) && !defined(RT_OS_OS2)
     1516            if (*pszEncodingStart == '/')
     1517                pszEncodingStart++;
     1518            char *pszSlash = strchr(pszEncodingStart, '/');
    15171519            if (pszSlash)
    1518                 *pszSlash = '\0';
     1520                *pszSlash = '\0';       /* This ASSUMES the first one is LC_CTYPE! */
    15191521#else
    1520             char *pszSemicolon = strchr(szEncoding, ';');
    1521             if (pszSemicolon)
    1522             {
    1523                 *pszSemicolon = '\0';
    1524                 size_t cchPrefix = strlen("LC_CTYPE=");
    1525                 if (!RTStrNCmp(szEncoding, "LC_CTYPE=", cchPrefix))
    1526                     (void) memmove(szEncoding, szEncoding + cchPrefix, strlen(szEncoding));
    1527             }
    1528 #endif
     1522            char *pszCType = strstr(pszEncodingStart, "LC_CTYPE=");
     1523            if (pszCType)
     1524            {
     1525                pszEncodingStart = pszCType + sizeof("LC_CTYPE=") - 1;
     1526
     1527                char *pszSemiColon = strchr(pszEncodingStart, ';');
     1528                if (pszSemiColon)
     1529                    *pszSemiColon = '\0';
     1530            }
     1531#endif
     1532
    15291533            /*
    15301534             * Use newlocale and nl_langinfo_l to determine the default codeset for the locale
     
    15321536             * ancient days on Linux and for quite a long time on macOS, Solaris, and *BSD but
    15331537             * to ensure their availability check that LC_CTYPE_MASK is defined.
     1538             *
     1539             * Note! The macOS nl_langinfo(3)/nl_langinfo_l(3) routines return a pointer to an
     1540             *       empty string for "short" locale names like en_NZ, it_IT, el_GR, etc. so use
     1541             *       UTF-8 in those cases as it is the default for short name locales on macOS
     1542             *       (see also rtStrGetLocaleCodeset).
    15341543             */
    15351544#ifdef LC_CTYPE_MASK
    1536             locale_t hLocale = newlocale(LC_CTYPE_MASK, szEncoding, (locale_t)0);
     1545            locale_t hLocale = newlocale(LC_CTYPE_MASK, pszEncodingStart, (locale_t)0);
    15371546            if (hLocale != (locale_t)0)
    15381547            {
    15391548                const char *pszCodeset = nl_langinfo_l(CODESET, hLocale);
     1549                Log2Func(("nl_langinfo_l(CODESET, %s=%s) -> %s\n", pszVar, pszEncodingStart, pszCodeset));
     1550                if (!pszCodeset || *pszCodeset == '\0')
    15401551# ifdef RT_OS_DARWIN
    1541                 /*
    1542                  * The macOS nl_langinfo(3)/nl_langinfo_l(3) routines return a pointer to an
    1543                  * empty string for "short" locale names like en_NZ, it_IT, el_GR, etc. so
    1544                  * fallback to UTF-8 in those cases which is the default for short name locales
    1545                  * on macOS anyhow.
    1546                  */
    1547                 if (pszCodeset && !*pszCodeset)
    1548                     pszCodeset = "UTF-8";
     1552                    pszEncoding = "UTF-8";
     1553# else
     1554                    pszEncoding = "ASCII";
    15491555# endif
    1550                 Log2Func(("nl_langinfo_l(CODESET, %s=%s) -> %s\n", pszVar, szEncoding, pszCodeset));
    1551                 Assert(pszCodeset && *pszCodeset != '\0');
    1552 
    1553                 rc = RTStrCopy(szEncoding, sizeof(szEncoding), pszCodeset);
    1554                 AssertRC(rc); /* cannot possibly overflow */
     1556                else
     1557                {
     1558                    rc = RTStrCopy(szEncoding, sizeof(szEncoding), pszCodeset);
     1559                    AssertRC(rc); /* cannot possibly overflow */
     1560                }
    15551561
    15561562                freelocale(hLocale);
     
    15601566#endif
    15611567             {
    1562                  /* This is mostly wrong, but I cannot think of anything better now: */
    1563                  pszEncoding = rtStrGetLocaleCodeset();
    1564                  LogFunc(("No newlocale or it failed (on '%s=%s', errno=%d), falling back on %s that we're using...\n",
    1565                           pszVar, szEncoding, errno, pszEncoding));
     1568                 /* If there is something that ought to be a character set encoding, try use it: */
     1569                 const char *pszDot = strchr(pszEncodingStart, '.');
     1570                 if (pszDot)
     1571                     pszDot = RTStrStripL(pszDot + 1);
     1572                 if (pszDot && *pszDot != '\0')
     1573                 {
     1574                     pszEncoding = pszDot;
     1575                     Log2Func(("%s=%s -> %s (simple)\n", pszVar, szEncoding, pszEncoding));
     1576                 }
     1577                 else
     1578                 {
     1579                     /* This is mostly wrong, but I cannot think of anything better now: */
     1580                     pszEncoding = rtStrGetLocaleCodeset();
     1581                     LogFunc(("No newlocale or it failed (on '%s=%s', errno=%d), falling back on %s that we're using...\n",
     1582                              pszVar, pszEncodingStart, errno, pszEncoding));
     1583                 }
    15661584             }
    15671585             RT_NOREF_PV(pszVar);
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette