1 /* Encoding utility from UTF-8 to locale and vice versa
2 Copyright (C) 2001, 2002 Peter Verthez
4 Permission granted to do anything with this file that you want, as long
5 as the above copyright is retained in all copies.
6 THERE IS NO WARRANTY - USE AT YOUR OWN RISK
17 #include "utf8-locale.h"
19 #define INITIAL_OUTSIZE 256
21 static iconv_t utf8_to_locale = (iconv_t) -1;
22 static iconv_t locale_to_utf8 = (iconv_t) -1;
23 static char* outbuffer = NULL;
24 static size_t outbufsize = 0;
25 static const char* the_unknown = "?";
27 void convert_set_unknown(const char* unknown)
29 the_unknown = unknown;
32 void close_conversion_contexts()
34 iconv_close(utf8_to_locale);
35 iconv_close(locale_to_utf8);
36 utf8_to_locale = (iconv_t) -1;
37 locale_to_utf8 = (iconv_t) -1;
41 int open_conversion_contexts()
43 assert(utf8_to_locale == (iconv_t) -1);
44 assert(locale_to_utf8 == (iconv_t) -1);
45 utf8_to_locale = iconv_open(nl_langinfo(CODESET), "UTF-8");
46 if (utf8_to_locale == (iconv_t) -1)
49 locale_to_utf8 = iconv_open("UTF-8", nl_langinfo(CODESET));
50 if (locale_to_utf8 == (iconv_t) -1) {
51 close_conversion_contexts();
55 outbufsize = INITIAL_OUTSIZE;
56 outbuffer = (char*)malloc(outbufsize);
57 atexit(close_conversion_contexts);
63 char* convert_utf8_to_locale(char* input, int *conv_fails)
65 size_t insize = strlen(input);
71 if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1))
73 assert(utf8_to_locale != (iconv_t) -1);
74 /* make sure we start from an empty state */
75 iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
76 if (conv_fails != NULL) *conv_fails = 0;
77 /* set up output buffer (empty it) */
80 memset(outbuffer, 0, outbufsize);
81 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
84 /* grow the output buffer */
86 outlen = outptr - outbuffer;
88 outbuffer = realloc(outbuffer, outbufsize);
89 outptr = outbuffer + outlen;
90 outsize = outbufsize - outlen;
91 memset(outptr, 0, outsize);
93 else if (errno == EILSEQ) {
94 /* skip over character */
95 const char* unkn_ptr = the_unknown;
96 if (conv_fails != NULL) (*conv_fails)++;
97 if ((*inptr & 0x80) == 0) {
98 /* an ASCII character, just skip one (this case is very improbable) */
102 /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
104 while ((*inptr & 0xC0) == 0x80) {
108 /* append the "unknown" string to the output */
109 while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
112 /* EINVAL should not happen, since we convert entire strings */
113 /* EBADF is an error which should be captured by the assert above */
116 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
121 char* convert_locale_to_utf8(char* input)
123 size_t insize = strlen(input);
129 if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
131 assert(locale_to_utf8 != (iconv_t) -1);
132 /* make sure we start from an empty state */
133 iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
134 /* set up output buffer (empty it) */
136 outsize = outbufsize;
137 memset(outbuffer, 0, outbufsize);
138 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
139 while (nconv == -1) {
140 if (errno == E2BIG) {
141 /* grow the output buffer */
143 outlen = outptr - outbuffer;
145 outbuffer = realloc(outbuffer, outbufsize);
146 outptr = outbuffer + outlen;
147 outsize = outbufsize - outlen;
148 memset(outptr, 0, outsize);
151 /* EILSEQ should not happen, because UTF-8 can represent anything */
152 /* EINVAL should not happen, since we convert entire strings */
153 /* EBADF is an error which should be captured by the assert above */
156 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);