1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 #include "gedcom_internal.h"
32 #include "utf8tools.h"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 struct encoding_state read_encoding;
40 static hash_t *encodings = NULL;
42 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c UNUSED)
46 return (hnode_t *)malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c UNUSED)
51 free((void*)hnode_getkey(n));
56 void add_encoding(const char *gedcom_n, const char* charwidth,
61 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
62 val = (char *) malloc(strlen(iconv_n) + 1);
65 /* sprintf is safe here (malloc'ed before) */
66 sprintf(key, "%s(%s)", gedcom_n, charwidth);
69 if (hash_lookup(encodings, key)) {
70 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
76 hash_alloc_insert(encodings, key, val);
83 char* get_encoding(const char* gedcom_n, Encoding enc)
88 if (encodings == NULL) return NULL;
90 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
93 /* sprintf is safe here (malloc'ed before) */
94 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
96 node = hash_lookup(encodings, key);
99 return hnode_get(node);
102 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
112 void cleanup_encodings()
114 hash_free(encodings);
117 #ifdef USE_GLIBC_ICONV
119 static char *new_gconv_path;
121 void cleanup_gconv_path()
123 /* Clean up environment */
124 putenv(GCONV_SEARCH_PATH);
126 free(new_gconv_path);
129 /* Let function be called before main() */
130 void update_gconv_search_path() __attribute__ ((constructor));
132 #endif /* USE_GLIBC_ICONV */
136 The environment variable GCONV_PATH has to be adjusted before the very
137 first call of iconv_open. For the most general case, it means that we
138 have to make our own constructor here (in case some of the other library
139 constructors would use iconv_open).
141 However, it looks like a change of an environment variable in a constructor
142 doesn't always survive until the main() function. This is the case if
143 the environment variable is a new one, for which there was no room yet
144 in the initial environment. The initial environment is located on the
145 stack, but when variables are added, it is moved to the heap (to be able
146 to grow). Now, the main function takes again the one from the stack, not
147 from the heap, so changes are lost.
149 For this, the function below will also be called in gedcom_init(), which
150 needs to be called as early as possible in the program.
153 void update_gconv_search_path()
155 #ifdef USE_GLIBC_ICONV
157 /* Add gedcom data directory to gconv search path */
158 gconv_path = getenv(GCONV_SEARCH_PATH);
159 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
160 if (gconv_path == NULL) {
161 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
165 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
168 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
173 sprintf(new_gconv_path, "%s=%s:%s",
174 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
177 /* Ignore failures of putenv (can't do anything about it anyway) */
178 putenv(new_gconv_path);
180 fprintf(stderr, "Could not allocate memory at %s, %d\n",
185 if (init_called && atexit(cleanup_gconv_path) != 0) {
186 gedcom_warning(_("Could not register path cleanup function"));
188 #endif /* USE_GLIBC_ICONV */
191 void init_encodings()
193 if (encodings == NULL) {
195 char buffer[MAXBUF + 1];
196 char gedcom_n[MAXBUF + 1];
197 char charwidth[MAXBUF + 1];
198 char iconv_n[MAXBUF + 1];
200 if (atexit(cleanup_encodings) != 0) {
201 gedcom_warning(_("Could not register encoding cleanup function"));
204 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
205 hash_set_allocator(encodings, node_alloc, node_free, NULL);
207 /* Open gedcom configuration file and read */
208 in = fopen(ENCODING_CONF_FILE, "r");
211 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
212 in = fopen(path, "r");
215 gedcom_warning(_("Could not open encoding configuration file '%s': %s"),
216 ENCODING_CONF_FILE, strerror(errno));
220 while (fgets(buffer, sizeof(buffer), in) != NULL) {
221 if (buffer[strlen(buffer) - 1] != '\n') {
222 gedcom_error(_("Line too long in encoding configuration file '%s'"),
227 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
228 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
229 add_encoding(gedcom_n, charwidth, iconv_n);
232 gedcom_error(_("Missing data in encoding configuration file '%s'"),
240 if (fclose(in) != 0) {
241 gedcom_warning(_("Error closing file '%s': %s"),
242 ENCODING_CONF_FILE, strerror(errno));
248 void set_encoding_width(Encoding enc)
250 read_encoding.width = enc;
253 void set_encoding_bom(Enc_bom bom)
255 read_encoding.bom = bom;
258 void set_encoding_terminator(char* term)
260 strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
263 static convert_t to_int = NULL;
264 static char* error_value = "<error>";
266 int open_conv_to_internal(const char* fromcode)
268 convert_t new_to_int = NULL;
269 const char *encoding = get_encoding(fromcode, read_encoding.width);
271 if (encoding != NULL) {
272 new_to_int = initialize_utf8_conversion(encoding, 1);
273 if (new_to_int == NULL) {
274 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
275 encoding, strerror(errno));
279 if (new_to_int != NULL) {
281 cleanup_utf8_conversion(to_int);
283 strncpy(read_encoding.charset, fromcode, MAX_CHARSET_LEN);
284 read_encoding.encoding = encoding;
285 gedcom_debug_print("Encoding state is now: ");
286 gedcom_debug_print(" charset : %s", read_encoding.charset);
287 gedcom_debug_print(" encoding : %s", read_encoding.encoding);
288 gedcom_debug_print(" width : %d", read_encoding.width);
289 gedcom_debug_print(" BOM : %d", read_encoding.bom);
290 gedcom_debug_print(" terminator: 0x%02x 0x%02x",
291 read_encoding.terminator[0],
292 read_encoding.terminator[1]);
295 return (new_to_int != NULL);
298 void close_conv_to_internal()
300 if (to_int != NULL) {
301 cleanup_utf8_conversion(to_int);
307 char* to_internal(const char* str, size_t len, struct conv_buffer* output_buf)
309 if (conversion_set_output_buffer(to_int, output_buf))
310 return convert_to_utf8_incremental(to_int, str, len);