1 /* Encoding utility from UTF-8 to locale and vice versa
2 Copyright (C) 2001, 2002 Peter Verthez
4 Permission granted to do anything with this file that you want, as long
5 as the above copyright is retained in all copies.
6 THERE IS NO WARRANTY - USE AT YOUR OWN RISK
17 #include "utf8-locale.h"
19 #define INITIAL_OUTSIZE 256
21 static iconv_t utf8_to_locale = (iconv_t) -1;
22 static iconv_t locale_to_utf8 = (iconv_t) -1;
23 static char* outbuffer = NULL;
24 static size_t outbufsize = 0;
25 static const char* the_unknown = "?";
27 void convert_set_unknown(const char* unknown)
29 the_unknown = unknown;
32 void close_conversion_contexts()
34 iconv_close(utf8_to_locale);
35 iconv_close(locale_to_utf8);
36 utf8_to_locale = (iconv_t) -1;
37 locale_to_utf8 = (iconv_t) -1;
41 int open_conversion_contexts()
43 assert(utf8_to_locale == (iconv_t) -1);
44 assert(locale_to_utf8 == (iconv_t) -1);
45 utf8_to_locale = iconv_open(nl_langinfo(CODESET), "UTF-8");
46 if (utf8_to_locale == (iconv_t) -1)
49 locale_to_utf8 = iconv_open("UTF-8", nl_langinfo(CODESET));
50 if (locale_to_utf8 == (iconv_t) -1) {
51 close_conversion_contexts();
55 outbufsize = INITIAL_OUTSIZE;
56 outbuffer = (char*)malloc(outbufsize);
57 atexit(close_conversion_contexts);
63 char* convert_utf8_to_locale(char* input)
65 size_t insize = strlen(input);
71 if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1))
73 assert(utf8_to_locale != (iconv_t) -1);
74 /* make sure we start from an empty state */
75 iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
76 /* set up output buffer (empty it) */
79 memset(outbuffer, 0, outbufsize);
80 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
83 /* grow the output buffer */
85 outlen = outptr - outbuffer;
87 outbuffer = realloc(outbuffer, outbufsize);
88 outptr = outbuffer + outlen;
89 outsize = outbufsize - outlen;
90 memset(outptr, 0, outsize);
92 else if (errno == EILSEQ) {
93 /* skip over character */
94 const char* unkn_ptr = the_unknown;
95 if ((*inptr & 0x80) == 0) {
96 /* an ASCII character, just skip one (this case is very improbable) */
100 /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
102 while ((*inptr & 0xC0) == 0x80) {
106 /* append the "unknown" string to the output */
107 while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
110 /* EINVAL should not happen, since we convert entire strings */
111 /* EBADF is an error which should be captured by the assert above */
114 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
119 char* convert_locale_to_utf8(char* input)
121 size_t insize = strlen(input);
127 if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
129 assert(locale_to_utf8 != (iconv_t) -1);
130 /* make sure we start from an empty state */
131 iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
132 /* set up output buffer (empty it) */
134 outsize = outbufsize;
135 memset(outbuffer, 0, outbufsize);
136 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
137 while (nconv == -1) {
138 if (errno == E2BIG) {
139 /* grow the output buffer */
141 outlen = outptr - outbuffer;
143 outbuffer = realloc(outbuffer, outbufsize);
144 outptr = outbuffer + outlen;
145 outsize = outbufsize - outlen;
146 memset(outptr, 0, outsize);
149 /* EILSEQ should not happen, because UTF-8 can represent anything */
150 /* EINVAL should not happen, since we convert entire strings */
151 /* EBADF is an error which should be captured by the assert above */
154 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);