1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c __attribute__((unused)))
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c __attribute__((unused)))
51 free((void*)hnode_getkey(n));
56 void add_encoding(const char *gedcom_n, const char* charwidth,
61 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
62 val = (char *) malloc(strlen(iconv_n) + 1);
65 /* sprintf is safe here (malloc'ed before) */
66 sprintf(key, "%s(%s)", gedcom_n, charwidth);
69 if (hash_lookup(encodings, key)) {
70 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
76 hash_alloc_insert(encodings, key, val);
83 char* get_encoding(const char* gedcom_n, ENCODING enc)
88 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
91 /* sprintf is safe here (malloc'ed before) */
92 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
94 node = hash_lookup(encodings, key);
97 return hnode_get(node);
100 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
110 static char *new_gconv_path;
112 void cleanup_encodings()
114 hash_free(encodings);
115 /* Clean up environment */
116 putenv(GCONV_SEARCH_PATH);
118 free(new_gconv_path);
121 /* Let function be called before main() */
122 void update_gconv_search_path() __attribute__ ((constructor));
126 The environment variable GCONV_PATH has to be adjusted before the very
127 first call of iconv_open. For the most general case, it means that we
128 have to make our own constructor here (in case some of the other library
129 constructors would use iconv_open).
131 However, it looks like a change of an environment variable in a constructor
132 doesn't always survive until the main() function. This is the case if
133 the environment variable is a new one, for which there was no room yet
134 in the initial environment. The initial environment is located on the
135 stack, but when variables are added, it is moved to the heap (to be able
136 to grow). Now, the main function takes again the one from the stack, not
137 from the heap, so changes are lost.
139 For this, the function below will also be called in gedcom_init(), which
140 needs to be called as early as possible in the program.
143 void update_gconv_search_path()
146 /* Add gedcom data directory to gconv search path */
147 gconv_path = getenv(GCONV_SEARCH_PATH);
148 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
149 if (gconv_path == NULL) {
150 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
154 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
157 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
162 sprintf(new_gconv_path, "%s=%s:%s",
163 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
166 /* Ignore failures of putenv (can't do anything about it anyway) */
167 putenv(new_gconv_path);
169 fprintf(stderr, "Could not allocate memory at %s, %d\n",
176 void init_encodings()
178 if (encodings == NULL) {
180 char buffer[MAXBUF + 1];
181 char gedcom_n[MAXBUF + 1];
182 char charwidth[MAXBUF + 1];
183 char iconv_n[MAXBUF + 1];
185 if (atexit(cleanup_encodings) != 0) {
186 gedcom_warning(_("Could not register encoding cleanup function"));
189 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
190 hash_set_allocator(encodings, node_alloc, node_free, NULL);
192 /* Open gedcom configuration file and read */
193 in = fopen(ENCODING_CONF_FILE, "r");
196 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
197 in = fopen(path, "r");
200 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
201 ENCODING_CONF_FILE, strerror(errno));
205 while (fgets(buffer, sizeof(buffer), in) != NULL) {
206 if (buffer[strlen(buffer) - 1] != '\n') {
207 gedcom_error(_("Line too long in encoding configuration file '%s'"),
211 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
212 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
213 add_encoding(gedcom_n, charwidth, iconv_n);
216 gedcom_error(_("Missing data in encoding configuration file '%s'"),
222 if (fclose(in) != 0) {
223 gedcom_warning(_("Error closing file '%s': %s"),
224 ENCODING_CONF_FILE, strerror(errno));
230 void set_encoding_width(ENCODING enc)
235 static char conv_buf[MAXGEDCLINELEN * 2];
236 static size_t conv_buf_size;
238 int open_conv_to_internal(const char* fromcode)
240 iconv_t new_cd_to_internal;
241 const char *encoding = get_encoding(fromcode, the_enc);
242 if (encoding == NULL) {
243 new_cd_to_internal = (iconv_t) -1;
246 memset(conv_buf, 0, sizeof(conv_buf));
248 new_cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
249 if (new_cd_to_internal == (iconv_t) -1) {
250 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
251 encoding, strerror(errno));
254 if (new_cd_to_internal != (iconv_t) -1) {
255 if (cd_to_internal != (iconv_t) -1)
256 iconv_close(cd_to_internal);
257 cd_to_internal = new_cd_to_internal;
259 return (new_cd_to_internal != (iconv_t) -1);
262 void close_conv_to_internal()
264 if (iconv_close(cd_to_internal) != 0) {
265 gedcom_warning(_("Error closing conversion context: %s"), strerror(errno));
267 cd_to_internal = (iconv_t) -1;
270 char* to_internal(const char* str, size_t len,
271 char* output_buffer, size_t out_len)
274 size_t outsize = out_len;
275 char *wrptr = output_buffer;
276 char *rdptr = conv_buf;
277 char *retval = output_buffer;
278 /* set up input buffer (concatenate to what was left previous time) */
279 /* can't use strcpy, because possible null bytes from unicode */
280 memcpy(conv_buf + conv_buf_size, str, len);
281 conv_buf_size += len;
282 /* set up output buffer (empty it) */
283 memset(output_buffer, 0, out_len);
284 /* do the conversion */
285 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
286 if (res == (size_t)-1) {
287 if (errno == EILSEQ) {
288 /* restart from an empty state and return NULL */
289 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
294 else if (errno == EINVAL) {
295 /* Do nothing, leave it to next iteration */
298 gedcom_error(_("Error in converting characters: %s"), strerror(errno));
301 /* then shift what is left over to the head of the input buffer */
302 memmove(conv_buf, rdptr, conv_buf_size);
303 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);