X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=utf8%2Futf8-locale.c;h=115c66ee7312e66bec78e797abaa4f2cc95bc9e2;hb=af581cf02cbbed0d24636be0b3533587448e7e0f;hp=28eeb4b24c8c1b7806c2b2a258f748fc7c9c6aad;hpb=f063286d11379bef709dc47d696e4f5b8e9b20c1;p=gedcom-parse.git diff --git a/utf8/utf8-locale.c b/utf8/utf8-locale.c index 28eeb4b..115c66e 100644 --- a/utf8/utf8-locale.c +++ b/utf8/utf8-locale.c @@ -9,156 +9,49 @@ /* $Id$ */ /* $Name$ */ +#include "utf8.h" #include -#include #include -#include -#include -#include "config.h" #include "libcharset.h" -#include "utf8.h" - -#define INITIAL_OUTSIZE 256 -static iconv_t utf8_to_locale = (iconv_t) -1; -static iconv_t locale_to_utf8 = (iconv_t) -1; -static char* outbuffer = NULL; -static size_t outbufsize = 0; -static const char* the_unknown = "?"; +static convert_t locale_conv = NULL; void convert_set_unknown(const char* unknown) { - the_unknown = unknown; + conversion_set_unknown(locale_conv, unknown); } void close_conversion_contexts() { - iconv_close(utf8_to_locale); - iconv_close(locale_to_utf8); - utf8_to_locale = (iconv_t) -1; - locale_to_utf8 = (iconv_t) -1; - free(outbuffer); + cleanup_utf8_conversion(locale_conv); } int open_conversion_contexts() { - assert(utf8_to_locale == (iconv_t) -1); - assert(locale_to_utf8 == (iconv_t) -1); - utf8_to_locale = iconv_open(locale_charset(), "UTF-8"); - if (utf8_to_locale == (iconv_t) -1) - return -1; + assert (locale_conv == NULL); + locale_conv = initialize_utf8_conversion(locale_charset()); + + if (locale_conv) { + atexit(close_conversion_contexts); + return 0; + } else { - locale_to_utf8 = iconv_open("UTF-8", locale_charset()); - if (locale_to_utf8 == (iconv_t) -1) { - close_conversion_contexts(); - return -1; - } - else { - outbufsize = INITIAL_OUTSIZE; - outbuffer = (char*)malloc(outbufsize); - atexit(close_conversion_contexts); - return 0; - } + return -1; } } char* convert_utf8_to_locale(const char* input, int *conv_fails) { - size_t insize = strlen(input); - size_t outsize; - ICONV_CONST char *inptr = (ICONV_CONST char*) input; - char *outptr; - size_t nconv; + if (!locale_conv) + open_conversion_contexts(); - if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1)) { - if (conv_fails != NULL) *conv_fails = insize; - return NULL; - } - assert(utf8_to_locale != (iconv_t) -1); - /* make sure we start from an empty state */ - iconv(utf8_to_locale, NULL, NULL, NULL, NULL); - if (conv_fails != NULL) *conv_fails = 0; - /* set up output buffer (empty it) */ - outptr = outbuffer; - outsize = outbufsize; - memset(outbuffer, 0, outbufsize); - nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize); - while (nconv == (size_t)-1) { - if (errno == E2BIG) { - /* grow the output buffer */ - size_t outlen; - outlen = outptr - outbuffer; - outbufsize *= 2; - outbuffer = realloc(outbuffer, outbufsize); - outptr = outbuffer + outlen; - outsize = outbufsize - outlen; - memset(outptr, 0, outsize); - } - else if (errno == EILSEQ) { - /* skip over character */ - const char* unkn_ptr = the_unknown; - if (conv_fails != NULL) (*conv_fails)++; - if ((*inptr & 0x80) == 0) { - /* an ASCII character, just skip one (this case is very improbable) */ - inptr++; insize--; - } - else { - /* a general UTF-8 character, skip all 0x10xxxxxx bytes */ - inptr++; insize--; - while ((*inptr & 0xC0) == 0x80) { - inptr++; insize--; - } - } - /* append the "unknown" string to the output */ - while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; } - } - else { - /* EINVAL should not happen, since we convert entire strings */ - /* EBADF is an error which should be captured by the assert above */ - if (conv_fails != NULL) *conv_fails += insize; - return NULL; - } - nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize); - } - return outbuffer; + return convert_from_utf8(locale_conv, input, conv_fails); } char* convert_locale_to_utf8(const char* input) { - size_t insize = strlen(input); - size_t outsize; - ICONV_CONST char *inptr = (ICONV_CONST char*) input; - char *outptr; - size_t nconv; + if (!locale_conv) + open_conversion_contexts(); - if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1)) - return NULL; - assert(locale_to_utf8 != (iconv_t) -1); - /* make sure we start from an empty state */ - iconv(locale_to_utf8, NULL, NULL, NULL, NULL); - /* set up output buffer (empty it) */ - outptr = outbuffer; - outsize = outbufsize; - memset(outbuffer, 0, outbufsize); - nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize); - while (nconv == (size_t)-1) { - if (errno == E2BIG) { - /* grow the output buffer */ - size_t outlen; - outlen = outptr - outbuffer; - outbufsize *= 2; - outbuffer = realloc(outbuffer, outbufsize); - outptr = outbuffer + outlen; - outsize = outbufsize - outlen; - memset(outptr, 0, outsize); - } - else { - /* EILSEQ should not happen, because UTF-8 can represent anything */ - /* EINVAL should not happen, since we convert entire strings */ - /* EBADF is an error which should be captured by the assert above */ - return NULL; - } - nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize); - } - return outbuffer; + return convert_to_utf8(locale_conv, input); }