X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fencoding.c;h=c2c3e8980686ae72d30f79d29057f988de278bb1;hb=6103dd898c4de86c68891cc0222543988a2caab7;hp=07a95891e6bfa29c9a52c6fbc619479e29975031;hpb=fffe9618ff8a5cfcac207f231b531615dfb0e38f;p=gedcom-parse.git diff --git a/gedcom/encoding.c b/gedcom/encoding.c index 07a9589..c2c3e89 100644 --- a/gedcom/encoding.c +++ b/gedcom/encoding.c @@ -22,31 +22,30 @@ /* $Name$ */ #include -#include #include #include #include #include "gedcom_internal.h" #include "gedcom.h" #include "encoding.h" +#include "encoding_state.h" #include "hash.h" +#include "utf8tools.h" #define ENCODING_CONF_FILE "gedcom.enc" #define GCONV_SEARCH_PATH "GCONV_PATH" #define MAXBUF 255 -static iconv_t cd_to_internal = (iconv_t) -1; -static ENCODING the_enc = ONE_BYTE; static hash_t *encodings = NULL; const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" }; -hnode_t *node_alloc(void *c __attribute__((unused))) +hnode_t *node_alloc(void *c UNUSED) { return (hnode_t *)malloc(sizeof *node_alloc(NULL)); } -void node_free(hnode_t *n, void *c __attribute__((unused))) +void node_free(hnode_t *n, void *c UNUSED) { free((void*)hnode_getkey(n)); free(hnode_get(n)); @@ -80,14 +79,20 @@ void add_encoding(const char *gedcom_n, const char* charwidth, MEMORY_ERROR; } -char* get_encoding(const char* gedcom_n, ENCODING enc) +char* get_encoding(const char* gedcom_n, Encoding enc) { char *key; hnode_t *node; + + if (encodings == NULL) return NULL; key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3); if (key) { + char* sp_pos = NULL; + while ((sp_pos = strchr(gedcom_n, ' ')) != NULL) { + *sp_pos = '_'; + } /* sprintf is safe here (malloc'ed before) */ sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]); @@ -112,9 +117,23 @@ void cleanup_encodings() hash_free(encodings); } +#ifdef USE_GLIBC_ICONV + +static char *new_gconv_path; + +void cleanup_gconv_path() +{ + /* Clean up environment */ + putenv(GCONV_SEARCH_PATH); + if (new_gconv_path) + free(new_gconv_path); +} + /* Let function be called before main() */ void update_gconv_search_path() __attribute__ ((constructor)); +#endif /* USE_GLIBC_ICONV */ + /* Note: The environment variable GCONV_PATH has to be adjusted before the very @@ -136,11 +155,11 @@ void update_gconv_search_path() __attribute__ ((constructor)); void update_gconv_search_path() { +#ifdef USE_GLIBC_ICONV char *gconv_path; /* Add gedcom data directory to gconv search path */ gconv_path = getenv(GCONV_SEARCH_PATH); if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) { - char *new_gconv_path; if (gconv_path == NULL) { new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH) + strlen(PKGDATADIR) @@ -166,6 +185,10 @@ void update_gconv_search_path() abort(); } } + if (init_called && atexit(cleanup_gconv_path) != 0) { + gedcom_warning(_("Could not register path cleanup function")); + } +#endif /* USE_GLIBC_ICONV */ } void init_encodings() @@ -201,6 +224,7 @@ void init_encodings() if (buffer[strlen(buffer) - 1] != '\n') { gedcom_error(_("Line too long in encoding configuration file '%s'"), ENCODING_CONF_FILE); + line_no = 0; return; } else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) { @@ -210,10 +234,12 @@ void init_encodings() else { gedcom_error(_("Missing data in encoding configuration file '%s'"), ENCODING_CONF_FILE); + line_no = 0; return; } } } + line_no = 0; if (fclose(in) != 0) { gedcom_warning(_("Error closing file '%s': %s"), ENCODING_CONF_FILE, strerror(errno)); @@ -222,75 +248,45 @@ void init_encodings() } } -void set_encoding_width(ENCODING enc) -{ - the_enc = enc; -} - -static char conv_buf[MAXGEDCLINELEN * 2]; -static size_t conv_buf_size; +static convert_t to_int = NULL; +static char* error_value = ""; int open_conv_to_internal(const char* fromcode) { - const char *encoding = get_encoding(fromcode, the_enc); - if (cd_to_internal != (iconv_t) -1) - iconv_close(cd_to_internal); - if (encoding == NULL) { - cd_to_internal = (iconv_t) -1; - } - else { - memset(conv_buf, 0, sizeof(conv_buf)); - conv_buf_size = 0; - cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding); - if (cd_to_internal == (iconv_t) -1) { + convert_t new_to_int = NULL; + const char *encoding = get_encoding(fromcode, read_encoding.width); + + if (encoding != NULL) { + new_to_int = initialize_utf8_conversion(encoding, 1); + if (new_to_int == NULL) { gedcom_error(_("Error opening conversion context for encoding %s: %s"), encoding, strerror(errno)); } } - return (cd_to_internal != (iconv_t) -1); + + if (new_to_int != NULL) { + if (to_int != NULL) + cleanup_utf8_conversion(to_int); + to_int = new_to_int; + set_read_encoding(fromcode, encoding); + } + + return (new_to_int != NULL); } void close_conv_to_internal() { - if (iconv_close(cd_to_internal) != 0) { - gedcom_warning(_("Error closing conversion context: %s"), strerror(errno)); + if (to_int != NULL) { + cleanup_utf8_conversion(to_int); + to_int = NULL; } - cd_to_internal = (iconv_t) -1; } -char* to_internal(const char* str, size_t len, - char* output_buffer, size_t out_len) + +char* to_internal(const char* str, size_t len, struct conv_buffer* output_buf) { - size_t res; - size_t outsize = out_len; - char *wrptr = output_buffer; - char *rdptr = conv_buf; - char *retval = output_buffer; - /* set up input buffer (concatenate to what was left previous time) */ - /* can't use strcpy, because possible null bytes from unicode */ - memcpy(conv_buf + conv_buf_size, str, len); - conv_buf_size += len; - /* set up output buffer (empty it) */ - memset(output_buffer, 0, out_len); - /* do the conversion */ - res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize); - if (res == (size_t)-1) { - if (errno == EILSEQ) { - /* restart from an empty state and return NULL */ - iconv(cd_to_internal, NULL, NULL, NULL, NULL); - retval = NULL; - rdptr++; - conv_buf_size--; - } - else if (errno == EINVAL) { - /* Do nothing, leave it to next iteration */ - } - else { - gedcom_error(_("Error in converting characters: %s"), strerror(errno)); - } - } - /* then shift what is left over to the head of the input buffer */ - memmove(conv_buf, rdptr, conv_buf_size); - memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size); - return retval; + if (conversion_set_output_buffer(to_int, output_buf)) + return convert_to_utf8_incremental(to_int, str, len); + else + return error_value; }