Skip to content

Commit 649be43

Browse files
committed
Add locale_charset() function.
1 parent 92af7a5 commit 649be43

File tree

10 files changed

+760
-10
lines changed

10 files changed

+760
-10
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ lib/generated/check-encodings
8383

8484
# Submodules.
8585
build-aux/
86+
cppp-platform/
8687

8788
# Temp tests files.
8889
tests/data/UTF-8.TXT

CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ get_filename_component(BUILD_AUX "${BUILD_AUX}" ABSOLUTE)
3939
include("${BUILD_AUX}/cmake/cppp.cmake")
4040
# ----------------------------------------------------------------------------------
4141

42+
add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/cppp-platform")
43+
4244
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
4345
add_compile_options(-Wno-unused-parameter)
4446
add_compile_options(-Wno-missing-field-initializers)
@@ -99,12 +101,22 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/include/iconv.h.in" "${output_includ
99101
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/lib")
100102
include_directories("${output_includedir}")
101103

104+
# Add definitions.
102105
add_compile_definitions(VERSION_MAJOR=${PROJECT_VERSION_MAJOR})
103106
add_compile_definitions(VERSION_MINOR=${PROJECT_VERSION_MINOR})
104107
add_compile_definitions(VERSION_PATCH=${PROJECT_VERSION_PATCH})
105108

109+
include(CheckFunctionExists)
110+
check_function_exists(nl_langinfo HAVE_LANGINFO_CODESET)
111+
if(HAVE_LANGINFO_CODESET)
112+
add_compile_definitions(HAVE_LANGINFO_CODESET=1)
113+
endif()
114+
106115
# Add library.
107-
set(SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/lib/iconv.c" "${CMAKE_CURRENT_SOURCE_DIR}/lib/reiconv.cpp")
116+
set(SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/lib/iconv.c"
117+
"${CMAKE_CURRENT_SOURCE_DIR}/lib/reiconv.cpp"
118+
"${CMAKE_CURRENT_SOURCE_DIR}/lib/locale_charset.c"
119+
"${CMAKE_CURRENT_SOURCE_DIR}/lib/locale_charset.cpp")
108120
cppp_build_library(${PROJECT_NAME} SOURCES TRUE TRUE "${CMAKE_BINARY_DIR}/windows/cppp-reiconv.rc")
109121

110122
# Include test suite.

include/cppp/reiconv.h.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,13 @@ extern _CPPP_API size_t reiconv_iconv(reiconv_t cd, char **inbuf, size_t *inbyte
201201
*/
202202
extern _CPPP_API int reiconv_handle_close(reiconv_t cd);
203203

204+
/**
205+
* @brief Determine the current locale's character encoding.
206+
* @note The result is statically allocated.
207+
* @return The canonicalized encoding name.
208+
*/
209+
extern _CPPP_API const char *locale_charset();
210+
204211
#ifdef __cplusplus
205212
}
206213
#endif

include/cppp/reiconv.hpp.in

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,9 @@ namespace reiconv
142142
extern _CPPP_API std::string convert(Encoding from, Encoding to, const std::string_view input, bool strict = true);
143143

144144
/**
145-
* @brief Determine the current locale's character encoding, and canonicalize it into one of
146-
* the canonical names.
147-
* @note The result must not be freed. It is statically allocated.
148-
* @note The result becomes invalid when `setlocale` is used to change the global locale, or
149-
* when the value of one of the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` is changed.
150-
* Threads in multithreaded programs should not do this. If the canonical name cannot be
151-
* determined, the result is a non-canonical name.
152-
* @return The name of the current locale's character encoding.
153-
* @see Libiconv's `locale_charset`.
145+
* @brief Determine the current locale's character encoding.
146+
* @note The result is statically allocated.
147+
* @return The canonicalized encoding name.
154148
*/
155149
extern _CPPP_API std::string_view locale_charset();
156150

lib/locale_charset.c

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
* @file locale_charset.c
3+
* @brief Detect the locale charset.
4+
* @copyright Copyright (C) 2024 The C++ Plus Project.
5+
*/
6+
/*
7+
* This file is part of the cppp-reiconv library.
8+
*
9+
* The cppp-reiconv library is free software; you can redistribute it
10+
* and/or modify it under the terms of the GNU Lesser General Public
11+
* License as published by the Free Software Foundation; either version 3
12+
* of the License, or (at your option) any later version.
13+
*
14+
* The cppp-reiconv library is distributed in the hope that it will be
15+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17+
* Lesser General Public License for more details.
18+
*
19+
* You should have received a copy of the GNU Lesser General Public
20+
* License along with the cppp-reiconv library; see the file LICENSE.
21+
* If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
24+
#include "cppp/cppp-platform.h"
25+
26+
#include <locale.h>
27+
#include <stddef.h>
28+
#include <stdio.h>
29+
#include <stdlib.h>
30+
#include <string.h>
31+
32+
#include "localecharset/lc_types.h"
33+
#include "localecharset/lc_utils.h"
34+
35+
#if IS_DARWIN7
36+
#include <xlocale.h>
37+
#endif // IS_DARWIN7
38+
39+
#if HAVE_LANGINFO_CODESET
40+
#include <langinfo.h>
41+
#endif // HAVE_LANGINFO_CODESET
42+
43+
#if IS_WINDOWS_NATIVE || __has_cygwin__
44+
#define WIN32_LEAN_AND_MEAN
45+
#include "localecharset/windows_getcp.h"
46+
47+
#include <Windows.h>
48+
49+
/*
50+
* For the use of setlocale() below, the Gnulib override in setlocale.c is
51+
* not needed; see the platform lists in setlocale_null.m4.
52+
*/
53+
#undef setlocale
54+
#endif // IS_WINDOWS_NATIVE || __has_cygwin__
55+
56+
#if !HAVE_LANGINFO_CODESET && !WINDOWS_NATIVE
57+
#include "localecharset/locale_table.h"
58+
#endif // !HAVE_LANGINFO_CODESET && !WINDOWS_NATIVE
59+
60+
const char *locale_charset()
61+
{
62+
#if IS_WINDOWS_NATIVE
63+
return windows_getcp();
64+
#endif // IS_WINDOWS_NATIVE
65+
66+
#if IS_MACOSX || __has_beos__ || __has_haiku__ || __has_android__
67+
/*
68+
* On Mac OS X, all modern locales use the UTF-8 encoding.
69+
* BeOS, Haiku and Android have a single locale, and it has UTF-8 encoding.
70+
*/
71+
return "UTF-8";
72+
#endif // IS_MACOSX || __has_beos__ || __has_haiku__ || __has_android__
73+
74+
const char *codeset = NULL;
75+
76+
#if HAVE_LANGINFO_CODESET
77+
return nl_langinfo(CODESET);
78+
#endif // HAVE_LANGINFO_CODESET
79+
80+
#if __has_cygwin__
81+
return windows_getcp();
82+
#endif // __has_cygwin__
83+
84+
const char *locale = NULL;
85+
GET_LOCALE(locale);
86+
87+
#if locale_table_defined
88+
// The locale_table is sorted. Perform a binary search.
89+
size_t hi = LOCALE_TABLE_SIZE;
90+
size_t lo = 0;
91+
while (lo < hi)
92+
{
93+
/*
94+
* Invariant:
95+
* for i < lo, strcmp (locale_table[i].locale, locale) < 0,
96+
* for i >= hi, strcmp (locale_table[i].locale, locale) > 0.
97+
*/
98+
size_t mid = (hi + lo) >> 1; // >= lo, < hi
99+
int cmp = strcmp(locale_table[mid].locale, locale);
100+
if (cmp < 0)
101+
{
102+
lo = mid + 1;
103+
}
104+
else if (cmp > 0)
105+
{
106+
hi = mid;
107+
}
108+
else
109+
{
110+
// Found an i with strcmp (locale_table[i].locale, locale) == 0.
111+
codeset = locale_table[mid].canonical;
112+
goto done_table_lookup;
113+
}
114+
}
115+
#endif // locale_table_defined
116+
// Cannot find the locale in the table, try to split it.
117+
const char *p = strchr(locale, '.');
118+
if (p == NULL)
119+
{
120+
// No dot.
121+
return DEFAULT_CHARSET;
122+
}
123+
124+
// Split the locale into two parts.
125+
codeset = p + 1;
126+
return codeset;
127+
128+
done_table_lookup:
129+
130+
#if IS_DARWIN7
131+
/* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
132+
(the default codeset) does not work when MB_CUR_MAX is 1. */
133+
if (strcmp(codeset, "UTF-8") == 0 && MB_CUR_MAX_L(uselocale(NULL)) <= 1)
134+
{
135+
codeset = "ASCII";
136+
}
137+
#endif // DARWIN7
138+
139+
return codeset;
140+
}

lib/locale_charset.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/**
2+
* @file locale_charset.cpp
3+
* @brief Detect the locale charset.
4+
* @copyright Copyright (C) 2024 The C++ Plus Project.
5+
*/
6+
/*
7+
* This file is part of the cppp-reiconv library.
8+
*
9+
* The cppp-reiconv library is free software; you can redistribute it
10+
* and/or modify it under the terms of the GNU Lesser General Public
11+
* License as published by the Free Software Foundation; either version 3
12+
* of the License, or (at your option) any later version.
13+
*
14+
* The cppp-reiconv library is distributed in the hope that it will be
15+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17+
* Lesser General Public License for more details.
18+
*
19+
* You should have received a copy of the GNU Lesser General Public
20+
* License along with the cppp-reiconv library; see the file LICENSE.
21+
* If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
24+
#include <cppp/reiconv.hpp>
25+
26+
#include <cppp/reiconv.h>
27+
28+
#include <string_view>
29+
30+
namespace reiconv
31+
{
32+
_CPPP_API std::string_view locale_charset()
33+
{
34+
// The return value of C function is statically allocated.
35+
return ::locale_charset();
36+
}
37+
} // namespace reiconv

lib/localecharset/lc_types.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @file lc_types.h
3+
* @brief Types for locale charset.
4+
* @copyright Copyright (C) 2024 The C++ Plus Project.
5+
*/
6+
/*
7+
* This file is part of the cppp-reiconv library.
8+
*
9+
* The cppp-reiconv library is free software; you can redistribute it
10+
* and/or modify it under the terms of the GNU Lesser General Public
11+
* License as published by the Free Software Foundation; either version 3
12+
* of the License, or (at your option) any later version.
13+
*
14+
* The cppp-reiconv library is distributed in the hope that it will be
15+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17+
* Lesser General Public License for more details.
18+
*
19+
* You should have received a copy of the GNU Lesser General Public
20+
* License along with the cppp-reiconv library; see the file LICENSE.
21+
* If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
24+
#ifndef _LC_TYPES_H_
25+
#define _LC_TYPES_H_
26+
27+
#define DEFAULT_CHARSET "UTF-8"
28+
29+
struct locale_table_entry
30+
{
31+
const char locale[17 + 1];
32+
const char canonical[11 + 1];
33+
};
34+
35+
#endif // _LC_TYPES_H_

lib/localecharset/lc_utils.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/**
2+
* @file lc_utils.h
3+
* @brief Utilities for locale charset.
4+
* @copyright Copyright (C) 2024 The C++ Plus Project.
5+
*/
6+
/*
7+
* This file is part of the cppp-reiconv library.
8+
*
9+
* The cppp-reiconv library is free software; you can redistribute it
10+
* and/or modify it under the terms of the GNU Lesser General Public
11+
* License as published by the Free Software Foundation; either version 3
12+
* of the License, or (at your option) any later version.
13+
*
14+
* The cppp-reiconv library is distributed in the hope that it will be
15+
* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17+
* Lesser General Public License for more details.
18+
*
19+
* You should have received a copy of the GNU Lesser General Public
20+
* License along with the cppp-reiconv library; see the file LICENSE.
21+
* If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
24+
#ifndef _LC_UTILS_H_
25+
#define _LC_UTILS_H_
26+
27+
#include <cppp/cppp-platform.h>
28+
29+
#if defined(__APPLE__) && defined(__MACH__)
30+
#define IS_MACOSX 1
31+
#else
32+
#define IS_MACOSX 0
33+
#endif // defined(__APPLE__) && defined(__MACH__)
34+
35+
#define IS_DARWIN7 (__has_macos__ && IS_MACOSX && HAVE_LANGINFO_CODESET)
36+
37+
#define IS_WINDOWS_NATIVE (__has_windows__ && !__has_cygwin__)
38+
39+
#define IS_INVALID_LOCALE(locale) (locale == NULL || locale[0] == '\0')
40+
41+
#define GET_LOCALE(locale) \
42+
do \
43+
{ \
44+
locale = getenv("LC_ALL"); \
45+
if (IS_INVALID_LOCALE(locale)) \
46+
{ \
47+
locale = getenv("LC_CTYPE"); \
48+
if (IS_INVALID_LOCALE(locale)) \
49+
{ \
50+
locale = getenv("LANG"); \
51+
if (IS_INVALID_LOCALE(locale)) \
52+
{ \
53+
locale = setlocale(LC_CTYPE, NULL); \
54+
} \
55+
} \
56+
} \
57+
} while (0)
58+
59+
#endif // _LC_UTILS_H_

0 commit comments

Comments
 (0)