1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 #include "gedcom_internal.h"
32 #include "utf8tools.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static Encoding the_enc = ONE_BYTE;
39 static hash_t *encodings = NULL;
41 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
43 hnode_t *node_alloc(void *c UNUSED)
45 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
48 void node_free(hnode_t *n, void *c UNUSED)
50 free((void*)hnode_getkey(n));
55 void add_encoding(const char *gedcom_n, const char* charwidth,
60 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
61 val = (char *) malloc(strlen(iconv_n) + 1);
64 /* sprintf is safe here (malloc'ed before) */
65 sprintf(key, "%s(%s)", gedcom_n, charwidth);
68 if (hash_lookup(encodings, key)) {
69 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
75 hash_alloc_insert(encodings, key, val);
82 char* get_encoding(const char* gedcom_n, Encoding enc)
87 if (encodings == NULL) return NULL;
89 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
92 /* sprintf is safe here (malloc'ed before) */
93 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
95 node = hash_lookup(encodings, key);
98 return hnode_get(node);
101 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
111 void cleanup_encodings()
113 hash_free(encodings);
116 #ifdef USE_GLIBC_ICONV
118 static char *new_gconv_path;
120 void cleanup_gconv_path()
122 /* Clean up environment */
123 putenv(GCONV_SEARCH_PATH);
125 free(new_gconv_path);
128 /* Let function be called before main() */
129 void update_gconv_search_path() __attribute__ ((constructor));
131 #endif /* USE_GLIBC_ICONV */
135 The environment variable GCONV_PATH has to be adjusted before the very
136 first call of iconv_open. For the most general case, it means that we
137 have to make our own constructor here (in case some of the other library
138 constructors would use iconv_open).
140 However, it looks like a change of an environment variable in a constructor
141 doesn't always survive until the main() function. This is the case if
142 the environment variable is a new one, for which there was no room yet
143 in the initial environment. The initial environment is located on the
144 stack, but when variables are added, it is moved to the heap (to be able
145 to grow). Now, the main function takes again the one from the stack, not
146 from the heap, so changes are lost.
148 For this, the function below will also be called in gedcom_init(), which
149 needs to be called as early as possible in the program.
152 void update_gconv_search_path()
154 #ifdef USE_GLIBC_ICONV
156 /* Add gedcom data directory to gconv search path */
157 gconv_path = getenv(GCONV_SEARCH_PATH);
158 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
159 if (gconv_path == NULL) {
160 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
164 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
167 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
172 sprintf(new_gconv_path, "%s=%s:%s",
173 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
176 /* Ignore failures of putenv (can't do anything about it anyway) */
177 putenv(new_gconv_path);
179 fprintf(stderr, "Could not allocate memory at %s, %d\n",
184 if (init_called && atexit(cleanup_gconv_path) != 0) {
185 gedcom_warning(_("Could not register path cleanup function"));
187 #endif /* USE_GLIBC_ICONV */
190 void init_encodings()
192 if (encodings == NULL) {
194 char buffer[MAXBUF + 1];
195 char gedcom_n[MAXBUF + 1];
196 char charwidth[MAXBUF + 1];
197 char iconv_n[MAXBUF + 1];
199 if (atexit(cleanup_encodings) != 0) {
200 gedcom_warning(_("Could not register encoding cleanup function"));
203 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
204 hash_set_allocator(encodings, node_alloc, node_free, NULL);
206 /* Open gedcom configuration file and read */
207 in = fopen(ENCODING_CONF_FILE, "r");
210 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
211 in = fopen(path, "r");
214 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
215 ENCODING_CONF_FILE, strerror(errno));
219 while (fgets(buffer, sizeof(buffer), in) != NULL) {
220 if (buffer[strlen(buffer) - 1] != '\n') {
221 gedcom_error(_("Line too long in encoding configuration file '%s'"),
226 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
227 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
228 add_encoding(gedcom_n, charwidth, iconv_n);
231 gedcom_error(_("Missing data in encoding configuration file '%s'"),
239 if (fclose(in) != 0) {
240 gedcom_warning(_("Error closing file '%s': %s"),
241 ENCODING_CONF_FILE, strerror(errno));
247 void set_encoding_width(Encoding enc)
252 static convert_t to_int = NULL;
253 static char* error_value = "<error>";
255 int open_conv_to_internal(const char* fromcode)
257 convert_t new_to_int = NULL;
258 const char *encoding = get_encoding(fromcode, the_enc);
260 if (encoding != NULL) {
261 new_to_int = initialize_utf8_conversion(encoding, 1);
262 if (new_to_int == NULL) {
263 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
264 encoding, strerror(errno));
268 if (new_to_int != NULL) {
270 cleanup_utf8_conversion(to_int);
274 return (new_to_int != NULL);
277 void close_conv_to_internal()
279 if (to_int != NULL) {
280 cleanup_utf8_conversion(to_int);
286 char* to_internal(const char* str, size_t len, struct conv_buffer* output_buf)
288 if (conversion_set_output_buffer(to_int, output_buf))
289 return convert_to_utf8_incremental(to_int, str, len);