/* Conversion between encodings.
- Copyright (C) 2001 The Genes Development Team
+ Copyright (C) 2001,2002 The Genes Development Team
This file is part of the Gedcom parser library.
Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
/* $Name$ */
#include <string.h>
-#include <iconv.h>
-#include <search.h>
#include <stdio.h>
#include <limits.h>
#include <stdlib.h>
#include "gedcom_internal.h"
+#include "gedcom.h"
#include "encoding.h"
+#include "hash.h"
+#include "utf8.h"
-#define INTERNAL_ENCODING "UTF8"
#define ENCODING_CONF_FILE "gedcom.enc"
#define GCONV_SEARCH_PATH "GCONV_PATH"
#define MAXBUF 255
-static iconv_t cd_to_internal = (iconv_t) -1;
-static void *encoding_mapping = NULL;
-static ENCODING the_enc = ONE_BYTE;
+static Encoding the_enc = ONE_BYTE;
+static hash_t *encodings = NULL;
-struct node {
- char *gedcom_name;
- char *iconv_name;
-};
+const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
-char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
+hnode_t *node_alloc(void *c UNUSED)
+{
+ return (hnode_t *)malloc(sizeof *node_alloc(NULL));
+}
-int node_compare(const void *node1, const void *node2)
+void node_free(hnode_t *n, void *c UNUSED)
{
- return strcmp(((const struct node *) node1)->gedcom_name,
- ((const struct node *) node2)->gedcom_name);
+ free((void*)hnode_getkey(n));
+ free(hnode_get(n));
+ free(n);
}
-void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
+void add_encoding(const char *gedcom_n, const char* charwidth,
+ const char *iconv_n)
{
- void **datum;
- struct node *nodeptr = (struct node *) malloc(sizeof *nodeptr);
- nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n)
- + strlen(charwidth) + 3);
- nodeptr->iconv_name = (char *) malloc(strlen(iconv_n) + 1);
- /* sprintf is safe here (malloc'ed before) */
- sprintf(nodeptr->gedcom_name, "%s(%s)", gedcom_n, charwidth);
- strcpy(nodeptr->iconv_name, iconv_n);
- datum = tsearch(nodeptr, &encoding_mapping, node_compare);
- if ((datum == NULL) || (*datum != nodeptr)) {
- gedcom_warning("Duplicate entry found for encoding '%s', ignoring",
- gedcom_n);
+ char *key, *val;
+
+ key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
+ val = (char *) malloc(strlen(iconv_n) + 1);
+
+ if (key && val) {
+ /* sprintf is safe here (malloc'ed before) */
+ sprintf(key, "%s(%s)", gedcom_n, charwidth);
+ strcpy(val, iconv_n);
+
+ if (hash_lookup(encodings, key)) {
+ gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
+ gedcom_n);
+ free(key);
+ free(val);
+ }
+ else {
+ hash_alloc_insert(encodings, key, val);
+ }
}
+ else
+ MEMORY_ERROR;
}
-char* get_encoding(char* gedcom_n, ENCODING enc)
+char* get_encoding(const char* gedcom_n, Encoding enc)
{
- void **datum;
- struct node search_node;
- char *buffer;
- buffer = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
- /* sprintf is safe here (malloc'ed before) */
- sprintf(buffer, "%s(%s)", gedcom_n, charwidth_string[enc]);
- search_node.gedcom_name = buffer;
- datum = tfind(&search_node, &encoding_mapping, node_compare);
- free(buffer);
- if (datum == NULL) {
- gedcom_error("No encoding found for '%s'", gedcom_n);
- return NULL;
+ char *key;
+ hnode_t *node;
+
+ if (encodings == NULL) return NULL;
+
+ key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
+
+ if (key) {
+ /* sprintf is safe here (malloc'ed before) */
+ sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
+
+ node = hash_lookup(encodings, key);
+ free(key);
+ if (node) {
+ return hnode_get(node);
+ }
+ else {
+ gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
+ return NULL;
+ }
}
else {
- return ((const struct node *) *datum)->iconv_name;
+ MEMORY_ERROR;
+ return NULL;
+ }
+}
+
+void cleanup_encodings()
+{
+ hash_free(encodings);
+}
+
+#ifdef USE_GLIBC_ICONV
+
+static char *new_gconv_path;
+
+void cleanup_gconv_path()
+{
+ /* Clean up environment */
+ putenv(GCONV_SEARCH_PATH);
+ if (new_gconv_path)
+ free(new_gconv_path);
+}
+
+/* Let function be called before main() */
+void update_gconv_search_path() __attribute__ ((constructor));
+
+#endif /* USE_GLIBC_ICONV */
+
+/* Note:
+
+ The environment variable GCONV_PATH has to be adjusted before the very
+ first call of iconv_open. For the most general case, it means that we
+ have to make our own constructor here (in case some of the other library
+ constructors would use iconv_open).
+
+ However, it looks like a change of an environment variable in a constructor
+ doesn't always survive until the main() function. This is the case if
+ the environment variable is a new one, for which there was no room yet
+ in the initial environment. The initial environment is located on the
+ stack, but when variables are added, it is moved to the heap (to be able
+ to grow). Now, the main function takes again the one from the stack, not
+ from the heap, so changes are lost.
+
+ For this, the function below will also be called in gedcom_init(), which
+ needs to be called as early as possible in the program.
+ */
+
+void update_gconv_search_path()
+{
+#ifdef USE_GLIBC_ICONV
+ char *gconv_path;
+ /* Add gedcom data directory to gconv search path */
+ gconv_path = getenv(GCONV_SEARCH_PATH);
+ if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
+ if (gconv_path == NULL) {
+ new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
+ + strlen(PKGDATADIR)
+ + 2);
+ if (new_gconv_path)
+ sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
+ }
+ else {
+ new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
+ + strlen(gconv_path)
+ + strlen(PKGDATADIR)
+ + 3);
+ if (new_gconv_path)
+ sprintf(new_gconv_path, "%s=%s:%s",
+ GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
+ }
+ if (new_gconv_path)
+ /* Ignore failures of putenv (can't do anything about it anyway) */
+ putenv(new_gconv_path);
+ else {
+ fprintf(stderr, "Could not allocate memory at %s, %d\n",
+ __FILE__, __LINE__);
+ abort();
+ }
}
+ if (init_called && atexit(cleanup_gconv_path) != 0) {
+ gedcom_warning(_("Could not register path cleanup function"));
+ }
+#endif /* USE_GLIBC_ICONV */
}
void init_encodings()
{
- if (encoding_mapping == NULL) {
+ if (encodings == NULL) {
FILE *in;
char buffer[MAXBUF + 1];
char gedcom_n[MAXBUF + 1];
char charwidth[MAXBUF + 1];
char iconv_n[MAXBUF + 1];
- char *gconv_path;
-
- /* Add gedcom data directory to gconv search path */
- gconv_path = getenv(GCONV_SEARCH_PATH);
- if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
- char *new_gconv_path;
- if (gconv_path == NULL) {
- new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
- + strlen(PKGDATADIR)
- + 2);
- sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
- }
- else {
- new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
- + strlen(gconv_path)
- + strlen(PKGDATADIR)
- + 3);
- sprintf(new_gconv_path, "%s=%s:%s",
- GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
- }
- if (putenv(new_gconv_path) != 0) {
- gedcom_warning("Failed updating environment variable %s",
- GCONV_SEARCH_PATH);
- }
+
+ if (atexit(cleanup_encodings) != 0) {
+ gedcom_warning(_("Could not register encoding cleanup function"));
}
+ encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
+ hash_set_allocator(encodings, node_alloc, node_free, NULL);
+
/* Open gedcom configuration file and read */
in = fopen(ENCODING_CONF_FILE, "r");
if (in == NULL) {
in = fopen(path, "r");
}
if (in == NULL) {
- gedcom_warning("Could not open encoding configuration file '%s'",
- ENCODING_CONF_FILE);
+ gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
+ ENCODING_CONF_FILE, strerror(errno));
}
else {
+ line_no = 1;
while (fgets(buffer, sizeof(buffer), in) != NULL) {
if (buffer[strlen(buffer) - 1] != '\n') {
- gedcom_error("Line too long in encoding configuration file '%s'",
+ gedcom_error(_("Line too long in encoding configuration file '%s'"),
ENCODING_CONF_FILE);
+ line_no = 0;
return;
}
else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
add_encoding(gedcom_n, charwidth, iconv_n);
}
else {
- gedcom_error("Missing data in encoding configuration file '%s'",
+ gedcom_error(_("Missing data in encoding configuration file '%s'"),
ENCODING_CONF_FILE);
+ line_no = 0;
return;
}
}
}
- fclose(in);
+ line_no = 0;
+ if (fclose(in) != 0) {
+ gedcom_warning(_("Error closing file '%s': %s"),
+ ENCODING_CONF_FILE, strerror(errno));
+ }
}
}
}
-void set_encoding_width(ENCODING enc)
+void set_encoding_width(Encoding enc)
{
the_enc = enc;
}
-static char conv_buf[MAXGEDCLINELEN * 2];
-static size_t conv_buf_size;
+static convert_t to_int = NULL;
+static char* error_value = "<error>";
-int open_conv_to_internal(char* fromcode)
+int open_conv_to_internal(const char* fromcode)
{
- char *encoding = get_encoding(fromcode, the_enc);
- if (cd_to_internal != (iconv_t) -1)
- iconv_close(cd_to_internal);
- if (encoding == NULL) {
- cd_to_internal = (iconv_t) -1;
- }
- else {
- memset(conv_buf, 0, sizeof(conv_buf));
- conv_buf_size = 0;
- cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
- if (cd_to_internal == (iconv_t) -1) {
- gedcom_error("Error opening conversion context for encoding %s: %s",
+ convert_t new_to_int = NULL;
+ const char *encoding = get_encoding(fromcode, the_enc);
+
+ if (encoding != NULL) {
+ new_to_int = initialize_utf8_conversion(encoding, 1);
+ if (new_to_int == NULL) {
+ gedcom_error(_("Error opening conversion context for encoding %s: %s"),
encoding, strerror(errno));
}
}
- return (cd_to_internal != (iconv_t) -1);
+
+ if (new_to_int != NULL) {
+ if (to_int != NULL)
+ cleanup_utf8_conversion(to_int);
+ to_int = new_to_int;
+ }
+
+ return (new_to_int != NULL);
}
void close_conv_to_internal()
{
- iconv_close(cd_to_internal);
- cd_to_internal = (iconv_t) -1;
+ if (to_int != NULL) {
+ cleanup_utf8_conversion(to_int);
+ to_int = NULL;
+ }
}
-char* to_internal(char* str, size_t len,
- char* output_buffer, size_t out_len)
+
+char* to_internal(const char* str, size_t len, struct conv_buffer* output_buf)
{
- size_t outsize = out_len;
- char *wrptr = output_buffer;
- char *rdptr = conv_buf;
- /* set up input buffer (concatenate to what was left previous time) */
- /* can't use strcpy, because possible null bytes from unicode */
- memcpy(conv_buf + conv_buf_size, str, len);
- conv_buf_size += len;
- /* set up output buffer (empty it) */
- memset(output_buffer, 0, out_len);
- /* do the conversion */
- iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
- /* then shift what is left over to the head of the input buffer */
- memmove(conv_buf, rdptr, conv_buf_size);
- memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);
- return output_buffer;
+ if (conversion_set_output_buffer(to_int, output_buf))
+ return convert_to_utf8_incremental(to_int, str, len);
+ else
+ return error_value;
}