1 /* Encoding utility from UTF-8 to locale and vice versa
2 Copyright (C) 2001, 2002 Peter Verthez
4 Permission granted to do anything with this file that you want, as long
5 as the above copyright is retained in all copies.
6 THERE IS NO WARRANTY - USE AT YOUR OWN RISK
17 #include "libcharset.h"
18 #include "utf8-locale.h"
20 #define INITIAL_OUTSIZE 256
22 static iconv_t utf8_to_locale = (iconv_t) -1;
23 static iconv_t locale_to_utf8 = (iconv_t) -1;
24 static char* outbuffer = NULL;
25 static size_t outbufsize = 0;
26 static const char* the_unknown = "?";
28 void convert_set_unknown(const char* unknown)
30 the_unknown = unknown;
33 void close_conversion_contexts()
35 iconv_close(utf8_to_locale);
36 iconv_close(locale_to_utf8);
37 utf8_to_locale = (iconv_t) -1;
38 locale_to_utf8 = (iconv_t) -1;
42 int open_conversion_contexts()
44 assert(utf8_to_locale == (iconv_t) -1);
45 assert(locale_to_utf8 == (iconv_t) -1);
46 utf8_to_locale = iconv_open(locale_charset(), "UTF-8");
47 if (utf8_to_locale == (iconv_t) -1)
50 locale_to_utf8 = iconv_open("UTF-8", locale_charset());
51 if (locale_to_utf8 == (iconv_t) -1) {
52 close_conversion_contexts();
56 outbufsize = INITIAL_OUTSIZE;
57 outbuffer = (char*)malloc(outbufsize);
58 atexit(close_conversion_contexts);
64 char* convert_utf8_to_locale(const char* input, int *conv_fails)
66 size_t insize = strlen(input);
68 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
72 if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1))
74 assert(utf8_to_locale != (iconv_t) -1);
75 /* make sure we start from an empty state */
76 iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
77 if (conv_fails != NULL) *conv_fails = 0;
78 /* set up output buffer (empty it) */
81 memset(outbuffer, 0, outbufsize);
82 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
83 while (nconv == (size_t)-1) {
85 /* grow the output buffer */
87 outlen = outptr - outbuffer;
89 outbuffer = realloc(outbuffer, outbufsize);
90 outptr = outbuffer + outlen;
91 outsize = outbufsize - outlen;
92 memset(outptr, 0, outsize);
94 else if (errno == EILSEQ) {
95 /* skip over character */
96 const char* unkn_ptr = the_unknown;
97 if (conv_fails != NULL) (*conv_fails)++;
98 if ((*inptr & 0x80) == 0) {
99 /* an ASCII character, just skip one (this case is very improbable) */
103 /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
105 while ((*inptr & 0xC0) == 0x80) {
109 /* append the "unknown" string to the output */
110 while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
113 /* EINVAL should not happen, since we convert entire strings */
114 /* EBADF is an error which should be captured by the assert above */
117 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
122 char* convert_locale_to_utf8(const char* input)
124 size_t insize = strlen(input);
126 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
130 if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
132 assert(locale_to_utf8 != (iconv_t) -1);
133 /* make sure we start from an empty state */
134 iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
135 /* set up output buffer (empty it) */
137 outsize = outbufsize;
138 memset(outbuffer, 0, outbufsize);
139 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
140 while (nconv == (size_t)-1) {
141 if (errno == E2BIG) {
142 /* grow the output buffer */
144 outlen = outptr - outbuffer;
146 outbuffer = realloc(outbuffer, outbufsize);
147 outptr = outbuffer + outlen;
148 outsize = outbufsize - outlen;
149 memset(outptr, 0, outsize);
152 /* EILSEQ should not happen, because UTF-8 can represent anything */
153 /* EINVAL should not happen, since we convert entire strings */
154 /* EBADF is an error which should be captured by the assert above */
157 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);