1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c __attribute__((unused)))
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c __attribute__((unused)))
51 free((void*)hnode_getkey(n));
56 void add_encoding(const char *gedcom_n, const char* charwidth,
61 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
62 val = (char *) malloc(strlen(iconv_n) + 1);
65 /* sprintf is safe here (malloc'ed before) */
66 sprintf(key, "%s(%s)", gedcom_n, charwidth);
69 if (hash_lookup(encodings, key)) {
70 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
76 hash_alloc_insert(encodings, key, val);
83 char* get_encoding(const char* gedcom_n, ENCODING enc)
88 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
91 /* sprintf is safe here (malloc'ed before) */
92 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
94 node = hash_lookup(encodings, key);
97 return hnode_get(node);
100 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
110 void cleanup_encodings()
112 hash_free(encodings);
115 /* Let function be called before main() */
116 void update_gconv_search_path() __attribute__ ((constructor));
120 The environment variable GCONV_PATH has to be adjusted before the very
121 first call of iconv_open. For the most general case, it means that we
122 have to make our own constructor here (in case some of the other library
123 constructors would use iconv_open).
125 However, it looks like a change of an environment variable in a constructor
126 doesn't always survive until the main() function. This is the case if
127 the environment variable is a new one, for which there was no room yet
128 in the initial environment. The initial environment is located on the
129 stack, but when variables are added, it is moved to the heap (to be able
130 to grow). Now, the main function takes again the one from the stack, not
131 from the heap, so changes are lost.
133 For this, the function below will also be called in gedcom_init(), which
134 needs to be called as early as possible in the program.
137 void update_gconv_search_path()
140 /* Add gedcom data directory to gconv search path */
141 gconv_path = getenv(GCONV_SEARCH_PATH);
142 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
143 char *new_gconv_path;
144 if (gconv_path == NULL) {
145 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
149 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
152 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
157 sprintf(new_gconv_path, "%s=%s:%s",
158 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
161 /* Ignore failures of putenv (can't do anything about it anyway) */
162 putenv(new_gconv_path);
164 fprintf(stderr, "Could not allocate memory at %s, %d\n",
171 void init_encodings()
173 if (encodings == NULL) {
175 char buffer[MAXBUF + 1];
176 char gedcom_n[MAXBUF + 1];
177 char charwidth[MAXBUF + 1];
178 char iconv_n[MAXBUF + 1];
180 if (atexit(cleanup_encodings) != 0) {
181 gedcom_warning(_("Could not register encoding cleanup function"));
184 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
185 hash_set_allocator(encodings, node_alloc, node_free, NULL);
187 /* Open gedcom configuration file and read */
188 in = fopen(ENCODING_CONF_FILE, "r");
191 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
192 in = fopen(path, "r");
195 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
196 ENCODING_CONF_FILE, strerror(errno));
200 while (fgets(buffer, sizeof(buffer), in) != NULL) {
201 if (buffer[strlen(buffer) - 1] != '\n') {
202 gedcom_error(_("Line too long in encoding configuration file '%s'"),
206 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
207 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
208 add_encoding(gedcom_n, charwidth, iconv_n);
211 gedcom_error(_("Missing data in encoding configuration file '%s'"),
217 if (fclose(in) != 0) {
218 gedcom_warning(_("Error closing file '%s': %s"),
219 ENCODING_CONF_FILE, strerror(errno));
225 void set_encoding_width(ENCODING enc)
230 static char conv_buf[MAXGEDCLINELEN * 2];
231 static size_t conv_buf_size;
233 int open_conv_to_internal(const char* fromcode)
235 const char *encoding = get_encoding(fromcode, the_enc);
236 if (cd_to_internal != (iconv_t) -1)
237 iconv_close(cd_to_internal);
238 if (encoding == NULL) {
239 cd_to_internal = (iconv_t) -1;
242 memset(conv_buf, 0, sizeof(conv_buf));
244 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
245 if (cd_to_internal == (iconv_t) -1) {
246 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
247 encoding, strerror(errno));
250 return (cd_to_internal != (iconv_t) -1);
253 void close_conv_to_internal()
255 if (iconv_close(cd_to_internal) != 0) {
256 gedcom_warning(_("Error closing conversion context: %s"), strerror(errno));
258 cd_to_internal = (iconv_t) -1;
261 char* to_internal(const char* str, size_t len,
262 char* output_buffer, size_t out_len)
265 size_t outsize = out_len;
266 char *wrptr = output_buffer;
267 char *rdptr = conv_buf;
268 char *retval = output_buffer;
269 /* set up input buffer (concatenate to what was left previous time) */
270 /* can't use strcpy, because possible null bytes from unicode */
271 memcpy(conv_buf + conv_buf_size, str, len);
272 conv_buf_size += len;
273 /* set up output buffer (empty it) */
274 memset(output_buffer, 0, out_len);
275 /* do the conversion */
276 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
277 if (res == (size_t)-1) {
278 if (errno == EILSEQ) {
279 /* restart from an empty state and return NULL */
280 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
285 else if (errno == EINVAL) {
286 /* Do nothing, leave it to next iteration */
289 gedcom_error(_("Error in converting characters: %s"), strerror(errno));
292 /* then shift what is left over to the head of the input buffer */
293 memmove(conv_buf, rdptr, conv_buf_size);
294 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);