1 /* Encoding utility from UTF-8 to locale and vice versa
2 Copyright (C) 2001, 2002 Peter Verthez
4 Permission granted to do anything with this file that you want, as long
5 as the above copyright is retained in all copies.
6 THERE IS NO WARRANTY - USE AT YOUR OWN RISK
18 #include "libcharset.h"
19 #include "utf8-locale.h"
21 #define INITIAL_OUTSIZE 256
23 static iconv_t utf8_to_locale = (iconv_t) -1;
24 static iconv_t locale_to_utf8 = (iconv_t) -1;
25 static char* outbuffer = NULL;
26 static size_t outbufsize = 0;
27 static const char* the_unknown = "?";
29 void convert_set_unknown(const char* unknown)
31 the_unknown = unknown;
34 void close_conversion_contexts()
36 iconv_close(utf8_to_locale);
37 iconv_close(locale_to_utf8);
38 utf8_to_locale = (iconv_t) -1;
39 locale_to_utf8 = (iconv_t) -1;
43 int open_conversion_contexts()
45 assert(utf8_to_locale == (iconv_t) -1);
46 assert(locale_to_utf8 == (iconv_t) -1);
47 utf8_to_locale = iconv_open(locale_charset(), "UTF-8");
48 if (utf8_to_locale == (iconv_t) -1)
51 locale_to_utf8 = iconv_open("UTF-8", locale_charset());
52 if (locale_to_utf8 == (iconv_t) -1) {
53 close_conversion_contexts();
57 outbufsize = INITIAL_OUTSIZE;
58 outbuffer = (char*)malloc(outbufsize);
59 atexit(close_conversion_contexts);
65 char* convert_utf8_to_locale(const char* input, int *conv_fails)
67 size_t insize = strlen(input);
69 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
73 if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1)) {
74 if (conv_fails != NULL) *conv_fails = insize;
77 assert(utf8_to_locale != (iconv_t) -1);
78 /* make sure we start from an empty state */
79 iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
80 if (conv_fails != NULL) *conv_fails = 0;
81 /* set up output buffer (empty it) */
84 memset(outbuffer, 0, outbufsize);
85 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
86 while (nconv == (size_t)-1) {
88 /* grow the output buffer */
90 outlen = outptr - outbuffer;
92 outbuffer = realloc(outbuffer, outbufsize);
93 outptr = outbuffer + outlen;
94 outsize = outbufsize - outlen;
95 memset(outptr, 0, outsize);
97 else if (errno == EILSEQ) {
98 /* skip over character */
99 const char* unkn_ptr = the_unknown;
100 if (conv_fails != NULL) (*conv_fails)++;
101 if ((*inptr & 0x80) == 0) {
102 /* an ASCII character, just skip one (this case is very improbable) */
106 /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
108 while ((*inptr & 0xC0) == 0x80) {
112 /* append the "unknown" string to the output */
113 while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
116 /* EINVAL should not happen, since we convert entire strings */
117 /* EBADF is an error which should be captured by the assert above */
118 if (conv_fails != NULL) *conv_fails += insize;
121 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
126 char* convert_locale_to_utf8(const char* input)
128 size_t insize = strlen(input);
130 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
134 if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
136 assert(locale_to_utf8 != (iconv_t) -1);
137 /* make sure we start from an empty state */
138 iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
139 /* set up output buffer (empty it) */
141 outsize = outbufsize;
142 memset(outbuffer, 0, outbufsize);
143 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
144 while (nconv == (size_t)-1) {
145 if (errno == E2BIG) {
146 /* grow the output buffer */
148 outlen = outptr - outbuffer;
150 outbuffer = realloc(outbuffer, outbufsize);
151 outptr = outbuffer + outlen;
152 outsize = outbufsize - outlen;
153 memset(outptr, 0, outsize);
156 /* EILSEQ should not happen, because UTF-8 can represent anything */
157 /* EINVAL should not happen, since we convert entire strings */
158 /* EBADF is an error which should be captured by the assert above */
161 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);