1 /* Encoding utility from UTF-8 to locale and vice versa
2 Copyright (C) 2001, 2002 Peter Verthez
4 Permission granted to do anything with this file that you want, as long
5 as the above copyright is retained in all copies.
6 THERE IS NO WARRANTY - USE AT YOUR OWN RISK
18 #include "libcharset.h"
19 #include "utf8-locale.h"
21 #define INITIAL_OUTSIZE 256
23 static iconv_t utf8_to_locale = (iconv_t) -1;
24 static iconv_t locale_to_utf8 = (iconv_t) -1;
25 static char* outbuffer = NULL;
26 static size_t outbufsize = 0;
27 static const char* the_unknown = "?";
29 void convert_set_unknown(const char* unknown)
31 the_unknown = unknown;
34 void close_conversion_contexts()
36 iconv_close(utf8_to_locale);
37 iconv_close(locale_to_utf8);
38 utf8_to_locale = (iconv_t) -1;
39 locale_to_utf8 = (iconv_t) -1;
43 int open_conversion_contexts()
45 assert(utf8_to_locale == (iconv_t) -1);
46 assert(locale_to_utf8 == (iconv_t) -1);
47 utf8_to_locale = iconv_open(locale_charset(), "UTF-8");
48 if (utf8_to_locale == (iconv_t) -1)
51 locale_to_utf8 = iconv_open("UTF-8", locale_charset());
52 if (locale_to_utf8 == (iconv_t) -1) {
53 close_conversion_contexts();
57 outbufsize = INITIAL_OUTSIZE;
58 outbuffer = (char*)malloc(outbufsize);
59 atexit(close_conversion_contexts);
65 char* convert_utf8_to_locale(const char* input, int *conv_fails)
67 size_t insize = strlen(input);
69 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
73 if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1))
75 assert(utf8_to_locale != (iconv_t) -1);
76 /* make sure we start from an empty state */
77 iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
78 if (conv_fails != NULL) *conv_fails = 0;
79 /* set up output buffer (empty it) */
82 memset(outbuffer, 0, outbufsize);
83 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
84 while (nconv == (size_t)-1) {
86 /* grow the output buffer */
88 outlen = outptr - outbuffer;
90 outbuffer = realloc(outbuffer, outbufsize);
91 outptr = outbuffer + outlen;
92 outsize = outbufsize - outlen;
93 memset(outptr, 0, outsize);
95 else if (errno == EILSEQ) {
96 /* skip over character */
97 const char* unkn_ptr = the_unknown;
98 if (conv_fails != NULL) (*conv_fails)++;
99 if ((*inptr & 0x80) == 0) {
100 /* an ASCII character, just skip one (this case is very improbable) */
104 /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
106 while ((*inptr & 0xC0) == 0x80) {
110 /* append the "unknown" string to the output */
111 while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
114 /* EINVAL should not happen, since we convert entire strings */
115 /* EBADF is an error which should be captured by the assert above */
118 nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
123 char* convert_locale_to_utf8(const char* input)
125 size_t insize = strlen(input);
127 ICONV_CONST char *inptr = (ICONV_CONST char*) input;
131 if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
133 assert(locale_to_utf8 != (iconv_t) -1);
134 /* make sure we start from an empty state */
135 iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
136 /* set up output buffer (empty it) */
138 outsize = outbufsize;
139 memset(outbuffer, 0, outbufsize);
140 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
141 while (nconv == (size_t)-1) {
142 if (errno == E2BIG) {
143 /* grow the output buffer */
145 outlen = outptr - outbuffer;
147 outbuffer = realloc(outbuffer, outbufsize);
148 outptr = outbuffer + outlen;
149 outsize = outbufsize - outlen;
150 memset(outptr, 0, outsize);
153 /* EILSEQ should not happen, because UTF-8 can represent anything */
154 /* EINVAL should not happen, since we convert entire strings */
155 /* EBADF is an error which should be captured by the assert above */
158 nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);