1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
33 #define ENCODING_CONF_FILE "gedcom.enc"
34 #define GCONV_SEARCH_PATH "GCONV_PATH"
36 #define INIT_NR_ENCODINGS 10
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static ENCODING the_enc = ONE_BYTE;
40 static hash_t *encodings = NULL;
42 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
44 hnode_t *node_alloc(void *c __attribute__((unused)))
46 return malloc(sizeof *node_alloc(NULL));
49 void node_free(hnode_t *n, void *c __attribute__((unused)))
51 free((void*)hnode_getkey(n));
56 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
60 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
61 val = (char *) malloc(strlen(iconv_n) + 1);
63 /* sprintf is safe here (malloc'ed before) */
64 sprintf(key, "%s(%s)", gedcom_n, charwidth);
67 if (hash_lookup(encodings, key)) {
68 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
72 hash_alloc_insert(encodings, key, val);
76 char* get_encoding(char* gedcom_n, ENCODING enc)
81 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
82 /* sprintf is safe here (malloc'ed before) */
83 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
85 node = hash_lookup(encodings, key);
88 return hnode_get(node);
91 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
96 void cleanup_encodings()
101 void init_encodings()
103 if (encodings == NULL) {
105 char buffer[MAXBUF + 1];
106 char gedcom_n[MAXBUF + 1];
107 char charwidth[MAXBUF + 1];
108 char iconv_n[MAXBUF + 1];
111 atexit(cleanup_encodings);
113 /* Add gedcom data directory to gconv search path */
114 gconv_path = getenv(GCONV_SEARCH_PATH);
115 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
116 char *new_gconv_path;
117 if (gconv_path == NULL) {
118 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
121 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
124 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
128 sprintf(new_gconv_path, "%s=%s:%s",
129 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
131 if (putenv(new_gconv_path) != 0) {
132 gedcom_warning(_("Failed updating conversion module path"));
136 encodings = hash_create(INIT_NR_ENCODINGS, NULL, NULL);
137 hash_set_allocator(encodings, node_alloc, node_free, NULL);
139 /* Open gedcom configuration file and read */
140 in = fopen(ENCODING_CONF_FILE, "r");
143 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
144 in = fopen(path, "r");
147 gedcom_warning(_("Could not open encoding configuration file '%s'"),
152 while (fgets(buffer, sizeof(buffer), in) != NULL) {
153 if (buffer[strlen(buffer) - 1] != '\n') {
154 gedcom_error(_("Line too long in encoding configuration file '%s'"),
158 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
159 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
160 add_encoding(gedcom_n, charwidth, iconv_n);
163 gedcom_error(_("Missing data in encoding configuration file '%s'"),
174 void set_encoding_width(ENCODING enc)
179 static char conv_buf[MAXGEDCLINELEN * 2];
180 static size_t conv_buf_size;
182 int open_conv_to_internal(char* fromcode)
184 char *encoding = get_encoding(fromcode, the_enc);
185 if (cd_to_internal != (iconv_t) -1)
186 iconv_close(cd_to_internal);
187 if (encoding == NULL) {
188 cd_to_internal = (iconv_t) -1;
191 memset(conv_buf, 0, sizeof(conv_buf));
193 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
194 if (cd_to_internal == (iconv_t) -1) {
195 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
196 encoding, strerror(errno));
199 return (cd_to_internal != (iconv_t) -1);
202 void close_conv_to_internal()
204 iconv_close(cd_to_internal);
205 cd_to_internal = (iconv_t) -1;
208 char* to_internal(char* str, size_t len,
209 char* output_buffer, size_t out_len)
211 size_t outsize = out_len;
212 char *wrptr = output_buffer;
213 char *rdptr = conv_buf;
214 /* set up input buffer (concatenate to what was left previous time) */
215 /* can't use strcpy, because possible null bytes from unicode */
216 memcpy(conv_buf + conv_buf_size, str, len);
217 conv_buf_size += len;
218 /* set up output buffer (empty it) */
219 memset(output_buffer, 0, out_len);
220 /* do the conversion */
221 iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
222 /* then shift what is left over to the head of the input buffer */
223 memmove(conv_buf, rdptr, conv_buf_size);
224 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);
225 return output_buffer;