1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
35 #define ENCODING_CONF_FILE "gedcom.enc"
36 #define GCONV_SEARCH_PATH "GCONV_PATH"
40 static iconv_t cd_to_internal = (iconv_t) -1;
42 static ENCODING the_enc = ONE_BYTE;
43 static hash_t *encodings = NULL;
45 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
47 hnode_t *node_alloc(void *c UNUSED)
49 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
52 void node_free(hnode_t *n, void *c UNUSED)
54 free((void*)hnode_getkey(n));
59 void add_encoding(const char *gedcom_n, const char* charwidth,
64 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
65 val = (char *) malloc(strlen(iconv_n) + 1);
68 /* sprintf is safe here (malloc'ed before) */
69 sprintf(key, "%s(%s)", gedcom_n, charwidth);
72 if (hash_lookup(encodings, key)) {
73 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
79 hash_alloc_insert(encodings, key, val);
86 char* get_encoding(const char* gedcom_n, ENCODING enc)
91 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
94 /* sprintf is safe here (malloc'ed before) */
95 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
97 node = hash_lookup(encodings, key);
100 return hnode_get(node);
103 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
113 void cleanup_encodings()
115 hash_free(encodings);
118 #ifdef USE_GLIBC_ICONV
120 static char *new_gconv_path;
122 void cleanup_gconv_path()
124 /* Clean up environment */
125 putenv(GCONV_SEARCH_PATH);
127 free(new_gconv_path);
130 /* Let function be called before main() */
131 void update_gconv_search_path() __attribute__ ((constructor));
133 #endif /* USE_GLIBC_ICONV */
137 The environment variable GCONV_PATH has to be adjusted before the very
138 first call of iconv_open. For the most general case, it means that we
139 have to make our own constructor here (in case some of the other library
140 constructors would use iconv_open).
142 However, it looks like a change of an environment variable in a constructor
143 doesn't always survive until the main() function. This is the case if
144 the environment variable is a new one, for which there was no room yet
145 in the initial environment. The initial environment is located on the
146 stack, but when variables are added, it is moved to the heap (to be able
147 to grow). Now, the main function takes again the one from the stack, not
148 from the heap, so changes are lost.
150 For this, the function below will also be called in gedcom_init(), which
151 needs to be called as early as possible in the program.
154 void update_gconv_search_path()
156 #ifdef USE_GLIBC_ICONV
158 /* Add gedcom data directory to gconv search path */
159 gconv_path = getenv(GCONV_SEARCH_PATH);
160 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
161 if (gconv_path == NULL) {
162 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
166 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
169 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
174 sprintf(new_gconv_path, "%s=%s:%s",
175 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
178 /* Ignore failures of putenv (can't do anything about it anyway) */
179 putenv(new_gconv_path);
181 fprintf(stderr, "Could not allocate memory at %s, %d\n",
186 if (init_called && atexit(cleanup_gconv_path) != 0) {
187 gedcom_warning(_("Could not register path cleanup function"));
189 #endif /* USE_GLIBC_ICONV */
192 void init_encodings()
194 if (encodings == NULL) {
196 char buffer[MAXBUF + 1];
197 char gedcom_n[MAXBUF + 1];
198 char charwidth[MAXBUF + 1];
199 char iconv_n[MAXBUF + 1];
201 if (atexit(cleanup_encodings) != 0) {
202 gedcom_warning(_("Could not register encoding cleanup function"));
205 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
206 hash_set_allocator(encodings, node_alloc, node_free, NULL);
208 /* Open gedcom configuration file and read */
209 in = fopen(ENCODING_CONF_FILE, "r");
212 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
213 in = fopen(path, "r");
216 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
217 ENCODING_CONF_FILE, strerror(errno));
221 while (fgets(buffer, sizeof(buffer), in) != NULL) {
222 if (buffer[strlen(buffer) - 1] != '\n') {
223 gedcom_error(_("Line too long in encoding configuration file '%s'"),
227 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
228 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
229 add_encoding(gedcom_n, charwidth, iconv_n);
232 gedcom_error(_("Missing data in encoding configuration file '%s'"),
238 if (fclose(in) != 0) {
239 gedcom_warning(_("Error closing file '%s': %s"),
240 ENCODING_CONF_FILE, strerror(errno));
246 void set_encoding_width(ENCODING enc)
251 static convert_t to_int = NULL;
252 static char* error_value = "<error>";
254 int open_conv_to_internal(const char* fromcode)
256 convert_t new_to_int = NULL;
257 const char *encoding = get_encoding(fromcode, the_enc);
259 if (encoding != NULL) {
260 new_to_int = initialize_utf8_conversion(encoding, 1);
261 if (new_to_int == NULL) {
262 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
263 encoding, strerror(errno));
267 if (new_to_int != NULL) {
269 cleanup_utf8_conversion(to_int);
273 return (new_to_int != NULL);
276 void close_conv_to_internal()
278 if (to_int != NULL) {
279 cleanup_utf8_conversion(to_int);
285 char* to_internal(const char* str, size_t len, struct conv_buffer* output_buf)
287 if (conversion_set_output_buffer(to_int, output_buf))
288 return convert_to_utf8_incremental(to_int, str, len);