/* Encoding utility from UTF-8 to locale and vice versa
Copyright (C) 2001, 2002 Peter Verthez
- Permission granted to do anything with this file that you want, as long
- as the above copyright is retained in all copies.
- THERE IS NO WARRANTY - USE AT YOUR OWN RISK
+ The UTF8 tools library is free software; you can redistribute it
+ and/or modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The Gedcom parser library is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the Gedcom parser library; if not, write to the
+ Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA.
*/
/* $Id$ */
/* $Name$ */
+#include "utf8tools.h"
#include <stdlib.h>
-#include <iconv.h>
#include <assert.h>
-#include <errno.h>
#include <string.h>
-#include "config.h"
#include "libcharset.h"
-#include "utf8-locale.h"
-#define INITIAL_OUTSIZE 256
+static convert_t locale_conv = NULL;
-static iconv_t utf8_to_locale = (iconv_t) -1;
-static iconv_t locale_to_utf8 = (iconv_t) -1;
-static char* outbuffer = NULL;
-static size_t outbufsize = 0;
-static const char* the_unknown = "?";
-
-void convert_set_unknown(const char* unknown)
+static void close_conversion_contexts()
{
- the_unknown = unknown;
+ cleanup_utf8_conversion(locale_conv);
}
-void close_conversion_contexts()
+static int open_conversion_contexts()
{
- iconv_close(utf8_to_locale);
- iconv_close(locale_to_utf8);
- utf8_to_locale = (iconv_t) -1;
- locale_to_utf8 = (iconv_t) -1;
- free(outbuffer);
-}
+ assert (locale_conv == NULL);
+ locale_conv = initialize_utf8_conversion(locale_charset(), 0);
-int open_conversion_contexts()
-{
- assert(utf8_to_locale == (iconv_t) -1);
- assert(locale_to_utf8 == (iconv_t) -1);
- utf8_to_locale = iconv_open(locale_charset(), "UTF-8");
- if (utf8_to_locale == (iconv_t) -1)
- return -1;
+ if (locale_conv) {
+ atexit(close_conversion_contexts);
+ return 0;
+ }
else {
- locale_to_utf8 = iconv_open("UTF-8", locale_charset());
- if (locale_to_utf8 == (iconv_t) -1) {
- close_conversion_contexts();
- return -1;
- }
- else {
- outbufsize = INITIAL_OUTSIZE;
- outbuffer = (char*)malloc(outbufsize);
- atexit(close_conversion_contexts);
- return 0;
- }
+ return -1;
}
}
+void convert_set_unknown(const char* unknown)
+{
+ if (!locale_conv)
+ open_conversion_contexts();
+ conversion_set_unknown(locale_conv, unknown);
+}
+
char* convert_utf8_to_locale(const char* input, int *conv_fails)
{
- size_t insize = strlen(input);
- size_t outsize;
- ICONV_CONST char *inptr = (ICONV_CONST char*) input;
- char *outptr;
- size_t nconv;
+ if (!locale_conv)
+ open_conversion_contexts();
- if (utf8_to_locale == (iconv_t) -1 && (open_conversion_contexts() == -1))
- return NULL;
- assert(utf8_to_locale != (iconv_t) -1);
- /* make sure we start from an empty state */
- iconv(utf8_to_locale, NULL, NULL, NULL, NULL);
- if (conv_fails != NULL) *conv_fails = 0;
- /* set up output buffer (empty it) */
- outptr = outbuffer;
- outsize = outbufsize;
- memset(outbuffer, 0, outbufsize);
- nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
- while (nconv == (size_t)-1) {
- if (errno == E2BIG) {
- /* grow the output buffer */
- size_t outlen;
- outlen = outptr - outbuffer;
- outbufsize *= 2;
- outbuffer = realloc(outbuffer, outbufsize);
- outptr = outbuffer + outlen;
- outsize = outbufsize - outlen;
- memset(outptr, 0, outsize);
- }
- else if (errno == EILSEQ) {
- /* skip over character */
- const char* unkn_ptr = the_unknown;
- if (conv_fails != NULL) (*conv_fails)++;
- if ((*inptr & 0x80) == 0) {
- /* an ASCII character, just skip one (this case is very improbable) */
- inptr++; insize--;
- }
- else {
- /* a general UTF-8 character, skip all 0x10xxxxxx bytes */
- inptr++; insize--;
- while ((*inptr & 0xC0) == 0x80) {
- inptr++; insize--;
- }
- }
- /* append the "unknown" string to the output */
- while (*unkn_ptr) { *outptr++ = *unkn_ptr++; outsize--; }
- }
- else {
- /* EINVAL should not happen, since we convert entire strings */
- /* EBADF is an error which should be captured by the assert above */
- return NULL;
- }
- nconv = iconv(utf8_to_locale, &inptr, &insize, &outptr, &outsize);
- }
- return outbuffer;
+ return convert_from_utf8(locale_conv, input, conv_fails, NULL);
}
char* convert_locale_to_utf8(const char* input)
{
- size_t insize = strlen(input);
- size_t outsize;
- ICONV_CONST char *inptr = (ICONV_CONST char*) input;
- char *outptr;
- size_t nconv;
+ if (!locale_conv)
+ open_conversion_contexts();
- if (locale_to_utf8 == (iconv_t) -1 && (open_conversion_contexts() == -1))
- return NULL;
- assert(locale_to_utf8 != (iconv_t) -1);
- /* make sure we start from an empty state */
- iconv(locale_to_utf8, NULL, NULL, NULL, NULL);
- /* set up output buffer (empty it) */
- outptr = outbuffer;
- outsize = outbufsize;
- memset(outbuffer, 0, outbufsize);
- nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
- while (nconv == (size_t)-1) {
- if (errno == E2BIG) {
- /* grow the output buffer */
- size_t outlen;
- outlen = outptr - outbuffer;
- outbufsize *= 2;
- outbuffer = realloc(outbuffer, outbufsize);
- outptr = outbuffer + outlen;
- outsize = outbufsize - outlen;
- memset(outptr, 0, outsize);
- }
- else {
- /* EILSEQ should not happen, because UTF-8 can represent anything */
- /* EINVAL should not happen, since we convert entire strings */
- /* EBADF is an error which should be captured by the assert above */
- return NULL;
- }
- nconv = iconv(locale_to_utf8, &inptr, &insize, &outptr, &outsize);
- }
- return outbuffer;
+ return convert_to_utf8(locale_conv, input, strlen(input));
}