1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c __attribute__((unused)))
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c __attribute__((unused)))
51 free((void*)hnode_getkey(n));
56 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
60 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
61 val = (char *) malloc(strlen(iconv_n) + 1);
64 /* sprintf is safe here (malloc'ed before) */
65 sprintf(key, "%s(%s)", gedcom_n, charwidth);
68 if (hash_lookup(encodings, key)) {
69 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
75 hash_alloc_insert(encodings, key, val);
82 char* get_encoding(char* gedcom_n, ENCODING enc)
87 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
90 /* sprintf is safe here (malloc'ed before) */
91 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
93 node = hash_lookup(encodings, key);
96 return hnode_get(node);
99 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
109 void cleanup_encodings()
111 hash_free(encodings);
114 /* Let function be called before main() */
115 void update_gconv_search_path() __attribute__ ((constructor));
119 The environment variable GCONV_PATH has to be adjusted before the very
120 first call of iconv_open. For the most general case, it means that we
121 have to make our own constructor here (in case some of the other library
122 constructors would use iconv_open).
124 However, it looks like a change of an environment variable in a constructor
125 doesn't always survive until the main() function. This is the case if
126 the environment variable is a new one, for which there was no room yet
127 in the initial environment. The initial environment is located on the
128 stack, but when variables are added, it is moved to the heap (to be able
129 to grow). Now, the main function takes again the one from the stack, not
130 from the heap, so changes are lost.
132 For this, the function below will also be called in gedcom_init(), which
133 needs to be called as early as possible in the program.
136 void update_gconv_search_path()
139 /* Add gedcom data directory to gconv search path */
140 gconv_path = getenv(GCONV_SEARCH_PATH);
141 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
142 char *new_gconv_path;
143 if (gconv_path == NULL) {
144 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
148 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
151 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
156 sprintf(new_gconv_path, "%s=%s:%s",
157 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
160 /* Ignore failures of putenv (can't do anything about it anyway) */
161 putenv(new_gconv_path);
163 fprintf(stderr, "Could not allocate memory at %s, %d\n",
170 void init_encodings()
172 if (encodings == NULL) {
174 char buffer[MAXBUF + 1];
175 char gedcom_n[MAXBUF + 1];
176 char charwidth[MAXBUF + 1];
177 char iconv_n[MAXBUF + 1];
179 if (atexit(cleanup_encodings) != 0) {
180 gedcom_warning(_("Could not register encoding cleanup function"));
183 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
184 hash_set_allocator(encodings, node_alloc, node_free, NULL);
186 /* Open gedcom configuration file and read */
187 in = fopen(ENCODING_CONF_FILE, "r");
190 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
191 in = fopen(path, "r");
194 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
195 ENCODING_CONF_FILE, strerror(errno));
199 while (fgets(buffer, sizeof(buffer), in) != NULL) {
200 if (buffer[strlen(buffer) - 1] != '\n') {
201 gedcom_error(_("Line too long in encoding configuration file '%s'"),
205 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
206 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
207 add_encoding(gedcom_n, charwidth, iconv_n);
210 gedcom_error(_("Missing data in encoding configuration file '%s'"),
216 if (fclose(in) != 0) {
217 gedcom_warning(_("Error closing file '%s': %s"),
218 ENCODING_CONF_FILE, strerror(errno));
224 void set_encoding_width(ENCODING enc)
229 static char conv_buf[MAXGEDCLINELEN * 2];
230 static size_t conv_buf_size;
232 int open_conv_to_internal(char* fromcode)
234 char *encoding = get_encoding(fromcode, the_enc);
235 if (cd_to_internal != (iconv_t) -1)
236 iconv_close(cd_to_internal);
237 if (encoding == NULL) {
238 cd_to_internal = (iconv_t) -1;
241 memset(conv_buf, 0, sizeof(conv_buf));
243 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
244 if (cd_to_internal == (iconv_t) -1) {
245 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
246 encoding, strerror(errno));
249 return (cd_to_internal != (iconv_t) -1);
252 void close_conv_to_internal()
254 if (iconv_close(cd_to_internal) != 0) {
255 gedcom_warning(_("Error closing conversion context: %s"), strerror(errno));
257 cd_to_internal = (iconv_t) -1;
260 char* to_internal(char* str, size_t len,
261 char* output_buffer, size_t out_len)
264 size_t outsize = out_len;
265 char *wrptr = output_buffer;
266 char *rdptr = conv_buf;
267 char *retval = output_buffer;
268 /* set up input buffer (concatenate to what was left previous time) */
269 /* can't use strcpy, because possible null bytes from unicode */
270 memcpy(conv_buf + conv_buf_size, str, len);
271 conv_buf_size += len;
272 /* set up output buffer (empty it) */
273 memset(output_buffer, 0, out_len);
274 /* do the conversion */
275 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
276 if (res == (size_t)-1) {
277 if (errno == EILSEQ) {
278 /* restart from an empty state and return NULL */
279 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
284 else if (errno == EINVAL) {
285 /* Do nothing, leave it to next iteration */
288 gedcom_error(_("Error in converting characters: %s"), strerror(errno));
291 /* then shift what is left over to the head of the input buffer */
292 memmove(conv_buf, rdptr, conv_buf_size);
293 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);