1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
33 #define ENCODING_CONF_FILE "gedcom.enc"
34 #define GCONV_SEARCH_PATH "GCONV_PATH"
37 static iconv_t cd_to_internal = (iconv_t) -1;
38 static ENCODING the_enc = ONE_BYTE;
39 static hash_t *encodings = NULL;
41 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
43 hnode_t *node_alloc(void *c __attribute__((unused)))
45 return malloc(sizeof *node_alloc(NULL));
48 void node_free(hnode_t *n, void *c __attribute__((unused)))
50 free((void*)hnode_getkey(n));
55 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
59 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
60 val = (char *) malloc(strlen(iconv_n) + 1);
62 /* sprintf is safe here (malloc'ed before) */
63 sprintf(key, "%s(%s)", gedcom_n, charwidth);
66 if (hash_lookup(encodings, key)) {
67 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
73 hash_alloc_insert(encodings, key, val);
77 char* get_encoding(char* gedcom_n, ENCODING enc)
82 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
83 /* sprintf is safe here (malloc'ed before) */
84 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
86 node = hash_lookup(encodings, key);
89 return hnode_get(node);
92 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
97 void cleanup_encodings()
102 /* Let function be called before main() */
103 void update_gconv_search_path() __attribute__ ((constructor));
107 The environment variable GCONV_PATH has to be adjusted before the very
108 first call of iconv_open. For the most general case, it means that we
109 have to make our own constructor here (in case some of the other library
110 constructors would use iconv_open).
112 However, it looks like a change of an environment variable in a constructor
113 doesn't always survive until the main() function. This is the case if
114 the environment variable is a new one, for which there was no room yet
115 in the initial environment. The initial environment is located on the
116 stack, but when variables are added, it is moved to the heap (to be able
117 to grow). Now, the main function takes again the one from the stack, not
118 from the heap, so changes are lost.
120 For this, the function below will also be called in gedcom_init(), which
121 needs to be called as early as possible in the program.
124 void update_gconv_search_path()
127 /* Add gedcom data directory to gconv search path */
128 gconv_path = getenv(GCONV_SEARCH_PATH);
129 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
130 char *new_gconv_path;
131 if (gconv_path == NULL) {
132 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
135 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
138 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
142 sprintf(new_gconv_path, "%s=%s:%s",
143 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
145 /* Ignore failures of putenv (can't do anything about it anyway) */
146 putenv(new_gconv_path);
150 void init_encodings()
152 if (encodings == NULL) {
154 char buffer[MAXBUF + 1];
155 char gedcom_n[MAXBUF + 1];
156 char charwidth[MAXBUF + 1];
157 char iconv_n[MAXBUF + 1];
159 atexit(cleanup_encodings);
161 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
162 hash_set_allocator(encodings, node_alloc, node_free, NULL);
164 /* Open gedcom configuration file and read */
165 in = fopen(ENCODING_CONF_FILE, "r");
168 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
169 in = fopen(path, "r");
172 gedcom_warning(_("Could not open encoding configuration file '%s'"),
177 while (fgets(buffer, sizeof(buffer), in) != NULL) {
178 if (buffer[strlen(buffer) - 1] != '\n') {
179 gedcom_error(_("Line too long in encoding configuration file '%s'"),
183 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
184 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
185 add_encoding(gedcom_n, charwidth, iconv_n);
188 gedcom_error(_("Missing data in encoding configuration file '%s'"),
199 void set_encoding_width(ENCODING enc)
204 static char conv_buf[MAXGEDCLINELEN * 2];
205 static size_t conv_buf_size;
207 int open_conv_to_internal(char* fromcode)
209 char *encoding = get_encoding(fromcode, the_enc);
210 if (cd_to_internal != (iconv_t) -1)
211 iconv_close(cd_to_internal);
212 if (encoding == NULL) {
213 cd_to_internal = (iconv_t) -1;
216 memset(conv_buf, 0, sizeof(conv_buf));
218 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
219 if (cd_to_internal == (iconv_t) -1) {
220 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
221 encoding, strerror(errno));
224 return (cd_to_internal != (iconv_t) -1);
227 void close_conv_to_internal()
229 iconv_close(cd_to_internal);
230 cd_to_internal = (iconv_t) -1;
233 char* to_internal(char* str, size_t len,
234 char* output_buffer, size_t out_len)
237 size_t outsize = out_len;
238 char *wrptr = output_buffer;
239 char *rdptr = conv_buf;
240 char *retval = output_buffer;
241 /* set up input buffer (concatenate to what was left previous time) */
242 /* can't use strcpy, because possible null bytes from unicode */
243 memcpy(conv_buf + conv_buf_size, str, len);
244 conv_buf_size += len;
245 /* set up output buffer (empty it) */
246 memset(output_buffer, 0, out_len);
247 /* do the conversion */
248 res = iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
249 if (res == (size_t)-1) {
250 if (errno == EILSEQ) {
251 /* restart from an empty state and return NULL */
252 iconv(cd_to_internal, NULL, NULL, NULL, NULL);
258 /* then shift what is left over to the head of the input buffer */
259 memmove(conv_buf, rdptr, conv_buf_size);
260 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);