1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c __attribute__((unused)))
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c __attribute__((unused)))
51 free((void*)hnode_getkey(n));
56 void add_encoding(const char *gedcom_n, const char* charwidth,
61 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
62 val = (char *) malloc(strlen(iconv_n) + 1);
65 /* sprintf is safe here (malloc'ed before) */
66 sprintf(key, "%s(%s)", gedcom_n, charwidth);
69 if (hash_lookup(encodings, key)) {
70 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
76 hash_alloc_insert(encodings, key, val);
83 char* get_encoding(const char* gedcom_n, ENCODING enc)
88 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
91 /* sprintf is safe here (malloc'ed before) */
92 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
94 node = hash_lookup(encodings, key);
97 return hnode_get(node);
100 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
110 static char *new_gconv_path;
112 void cleanup_encodings()
114 hash_free(encodings);
117 void cleanup_gconv_path()
119 /* Clean up environment */
120 putenv(GCONV_SEARCH_PATH);
122 free(new_gconv_path);
125 /* Let function be called before main() */
126 void update_gconv_search_path() __attribute__ ((constructor));
130 The environment variable GCONV_PATH has to be adjusted before the very
131 first call of iconv_open. For the most general case, it means that we
132 have to make our own constructor here (in case some of the other library
133 constructors would use iconv_open).
135 However, it looks like a change of an environment variable in a constructor
136 doesn't always survive until the main() function. This is the case if
137 the environment variable is a new one, for which there was no room yet
138 in the initial environment. The initial environment is located on the
139 stack, but when variables are added, it is moved to the heap (to be able
140 to grow). Now, the main function takes again the one from the stack, not
141 from the heap, so changes are lost.
143 For this, the function below will also be called in gedcom_init(), which
144 needs to be called as early as possible in the program.
147 void update_gconv_search_path()
150 /* Add gedcom data directory to gconv search path */
151 gconv_path = getenv(GCONV_SEARCH_PATH);
152 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
153 if (gconv_path == NULL) {
154 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
158 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
161 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
166 sprintf(new_gconv_path, "%s=%s:%s",
167 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
170 /* Ignore failures of putenv (can't do anything about it anyway) */
171 putenv(new_gconv_path);
173 fprintf(stderr, "Could not allocate memory at %s, %d\n",
178 if (init_called && atexit(cleanup_gconv_path) != 0) {
179 gedcom_warning(_("Could not register path cleanup function"));
183 void init_encodings()
185 if (encodings == NULL) {
187 char buffer[MAXBUF + 1];
188 char gedcom_n[MAXBUF + 1];
189 char charwidth[MAXBUF + 1];
190 char iconv_n[MAXBUF + 1];
192 if (atexit(cleanup_encodings) != 0) {
193 gedcom_warning(_("Could not register encoding cleanup function"));
196 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
197 hash_set_allocator(encodings, node_alloc, node_free, NULL);
199 /* Open gedcom configuration file and read */
200 in = fopen(ENCODING_CONF_FILE, "r");
203 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
204 in = fopen(path, "r");
207 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
208 ENCODING_CONF_FILE, strerror(errno));
212 while (fgets(buffer, sizeof(buffer), in) != NULL) {
213 if (buffer[strlen(buffer) - 1] != '\n') {
214 gedcom_error(_("Line too long in encoding configuration file '%s'"),
218 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
219 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
220 add_encoding(gedcom_n, charwidth, iconv_n);
223 gedcom_error(_("Missing data in encoding configuration file '%s'"),
229 if (fclose(in) != 0) {
230 gedcom_warning(_("Error closing file '%s': %s"),
231 ENCODING_CONF_FILE, strerror(errno));
237 void set_encoding_width(ENCODING enc)
242 static char conv_buf[MAXGEDCLINELEN * 2];
243 static size_t conv_buf_size;
245 int open_conv_to_internal(const char* fromcode)
247 iconv_t new_cd_to_internal;
248 const char *encoding = get_encoding(fromcode, the_enc);
249 if (encoding == NULL) {
250 new_cd_to_internal = (iconv_t) -1;
253 memset(conv_buf, 0, sizeof(conv_buf));
255 new_cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
256 if (new_cd_to_internal == (iconv_t) -1) {
257 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
258 encoding, strerror(errno));
261 if (new_cd_to_internal != (iconv_t) -1) {
262 if (cd_to_internal != (iconv_t) -1)
263 iconv_close(cd_to_internal);
264 cd_to_internal = new_cd_to_internal;
266 return (new_cd_to_internal != (iconv_t) -1);
269 void close_conv_to_internal()
271 if (iconv_close(cd_to_internal) != 0) {
272 gedcom_warning(_("Error closing conversion context: %s"), strerror(errno));
274 cd_to_internal = (iconv_t) -1;
277 char* to_internal(const char* str, size_t len,
278 char* output_buffer, size_t out_len)
281 size_t outsize = out_len;
282 char *wrptr = output_buffer;
283 char *rdptr = conv_buf;
284 char *retval = output_buffer;
285 /* set up input buffer (concatenate to what was left previous time) */
286 /* can't use strcpy, because possible null bytes from unicode */
287 memcpy(conv_buf + conv_buf_size, str, len);
288 conv_buf_size += len;
289 /* set up output buffer (empty it) */
290 memset(output_buffer, 0, out_len);
291 /* do the conversion */
292 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
293 if (res == (size_t)-1) {
294 if (errno == EILSEQ) {
295 /* restart from an empty state and return NULL */
296 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
301 else if (errno == EINVAL) {
302 /* Do nothing, leave it to next iteration */
305 gedcom_error(_("Error in converting characters: %s"), strerror(errno));
308 /* then shift what is left over to the head of the input buffer */
309 memmove(conv_buf, rdptr, conv_buf_size);
310 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);