1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c UNUSED)
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c UNUSED)
51 free((void*)hnode_getkey(n));
56 void add_encoding(const char *gedcom_n, const char* charwidth,
61 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
62 val = (char *) malloc(strlen(iconv_n) + 1);
65 /* sprintf is safe here (malloc'ed before) */
66 sprintf(key, "%s(%s)", gedcom_n, charwidth);
69 if (hash_lookup(encodings, key)) {
70 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
76 hash_alloc_insert(encodings, key, val);
83 char* get_encoding(const char* gedcom_n, ENCODING enc)
88 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
91 /* sprintf is safe here (malloc'ed before) */
92 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
94 node = hash_lookup(encodings, key);
97 return hnode_get(node);
100 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
110 void cleanup_encodings()
112 hash_free(encodings);
115 #ifdef USE_GLIBC_ICONV
117 static char *new_gconv_path;
119 void cleanup_gconv_path()
121 /* Clean up environment */
122 putenv(GCONV_SEARCH_PATH);
124 free(new_gconv_path);
127 /* Let function be called before main() */
128 void update_gconv_search_path() __attribute__ ((constructor));
130 #endif /* USE_GLIBC_ICONV */
134 The environment variable GCONV_PATH has to be adjusted before the very
135 first call of iconv_open. For the most general case, it means that we
136 have to make our own constructor here (in case some of the other library
137 constructors would use iconv_open).
139 However, it looks like a change of an environment variable in a constructor
140 doesn't always survive until the main() function. This is the case if
141 the environment variable is a new one, for which there was no room yet
142 in the initial environment. The initial environment is located on the
143 stack, but when variables are added, it is moved to the heap (to be able
144 to grow). Now, the main function takes again the one from the stack, not
145 from the heap, so changes are lost.
147 For this, the function below will also be called in gedcom_init(), which
148 needs to be called as early as possible in the program.
151 void update_gconv_search_path()
153 #ifdef USE_GLIBC_ICONV
155 /* Add gedcom data directory to gconv search path */
156 gconv_path = getenv(GCONV_SEARCH_PATH);
157 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
158 if (gconv_path == NULL) {
159 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
163 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
166 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
171 sprintf(new_gconv_path, "%s=%s:%s",
172 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
175 /* Ignore failures of putenv (can't do anything about it anyway) */
176 putenv(new_gconv_path);
178 fprintf(stderr, "Could not allocate memory at %s, %d\n",
183 if (init_called && atexit(cleanup_gconv_path) != 0) {
184 gedcom_warning(_("Could not register path cleanup function"));
186 #endif /* USE_GLIBC_ICONV */
189 void init_encodings()
191 if (encodings == NULL) {
193 char buffer[MAXBUF + 1];
194 char gedcom_n[MAXBUF + 1];
195 char charwidth[MAXBUF + 1];
196 char iconv_n[MAXBUF + 1];
198 if (atexit(cleanup_encodings) != 0) {
199 gedcom_warning(_("Could not register encoding cleanup function"));
202 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
203 hash_set_allocator(encodings, node_alloc, node_free, NULL);
205 /* Open gedcom configuration file and read */
206 in = fopen(ENCODING_CONF_FILE, "r");
209 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
210 in = fopen(path, "r");
213 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
214 ENCODING_CONF_FILE, strerror(errno));
218 while (fgets(buffer, sizeof(buffer), in) != NULL) {
219 if (buffer[strlen(buffer) - 1] != '\n') {
220 gedcom_error(_("Line too long in encoding configuration file '%s'"),
224 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
225 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
226 add_encoding(gedcom_n, charwidth, iconv_n);
229 gedcom_error(_("Missing data in encoding configuration file '%s'"),
235 if (fclose(in) != 0) {
236 gedcom_warning(_("Error closing file '%s': %s"),
237 ENCODING_CONF_FILE, strerror(errno));
243 void set_encoding_width(ENCODING enc)
248 static char conv_buf[MAXGEDCLINELEN * 2];
249 static size_t conv_buf_size;
251 int open_conv_to_internal(const char* fromcode)
253 iconv_t new_cd_to_internal;
254 const char *encoding = get_encoding(fromcode, the_enc);
255 if (encoding == NULL) {
256 new_cd_to_internal = (iconv_t) -1;
259 memset(conv_buf, 0, sizeof(conv_buf));
261 new_cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
262 if (new_cd_to_internal == (iconv_t) -1) {
263 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
264 encoding, strerror(errno));
267 if (new_cd_to_internal != (iconv_t) -1) {
268 if (cd_to_internal != (iconv_t) -1)
269 iconv_close(cd_to_internal);
270 cd_to_internal = new_cd_to_internal;
272 return (new_cd_to_internal != (iconv_t) -1);
275 void close_conv_to_internal()
277 if (cd_to_internal != (iconv_t) -1) {
278 if (iconv_close(cd_to_internal) != 0) {
279 gedcom_warning(_("Error closing conversion context: %s"),
282 cd_to_internal = (iconv_t) -1;
286 char* to_internal(const char* str, size_t len,
287 char* output_buffer, size_t out_len)
290 size_t outsize = out_len;
291 char *wrptr = output_buffer;
292 char *rdptr = conv_buf;
293 char *retval = output_buffer;
294 /* set up input buffer (concatenate to what was left previous time) */
295 /* can't use strcpy, because possible null bytes from unicode */
296 memcpy(conv_buf + conv_buf_size, str, len);
297 conv_buf_size += len;
298 /* set up output buffer (empty it) */
299 memset(output_buffer, 0, out_len);
300 /* do the conversion */
301 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
302 if (res == (size_t)-1) {
303 if (errno == EILSEQ) {
304 /* restart from an empty state and return NULL */
305 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
310 else if (errno == EINVAL) {
311 /* Do nothing, leave it to next iteration */
314 gedcom_error(_("Error in converting characters: %s"), strerror(errno));
317 /* then shift what is left over to the head of the input buffer */
318 memmove(conv_buf, rdptr, conv_buf_size);
319 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);