1 /* Conversion between encodings.
2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 #include "gedcom_internal.h"
33 #define ENCODING_CONF_FILE "gedcom.enc"
34 #define GCONV_SEARCH_PATH "GCONV_PATH"
37 static iconv_t cd_to_internal = (iconv_t) -1;
38 static ENCODING the_enc = ONE_BYTE;
39 static hash_t *encodings = NULL;
41 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
43 hnode_t *node_alloc(void *c __attribute__((unused)))
45 return malloc(sizeof *node_alloc(NULL));
48 void node_free(hnode_t *n, void *c __attribute__((unused)))
50 free((void*)hnode_getkey(n));
55 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
59 key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3);
60 val = (char *) malloc(strlen(iconv_n) + 1);
62 /* sprintf is safe here (malloc'ed before) */
63 sprintf(key, "%s(%s)", gedcom_n, charwidth);
66 if (hash_lookup(encodings, key)) {
67 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
73 hash_alloc_insert(encodings, key, val);
77 char* get_encoding(char* gedcom_n, ENCODING enc)
82 key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
83 /* sprintf is safe here (malloc'ed before) */
84 sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]);
86 node = hash_lookup(encodings, key);
89 return hnode_get(node);
92 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
97 void cleanup_encodings()
102 void init_encodings()
104 if (encodings == NULL) {
106 char buffer[MAXBUF + 1];
107 char gedcom_n[MAXBUF + 1];
108 char charwidth[MAXBUF + 1];
109 char iconv_n[MAXBUF + 1];
112 atexit(cleanup_encodings);
114 /* Add gedcom data directory to gconv search path */
115 gconv_path = getenv(GCONV_SEARCH_PATH);
116 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
117 char *new_gconv_path;
118 if (gconv_path == NULL) {
119 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
122 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
125 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
129 sprintf(new_gconv_path, "%s=%s:%s",
130 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
132 if (putenv(new_gconv_path) != 0) {
133 gedcom_warning(_("Failed updating conversion module path"));
137 encodings = hash_create(HASHCOUNT_T_MAX, NULL, NULL);
138 hash_set_allocator(encodings, node_alloc, node_free, NULL);
140 /* Open gedcom configuration file and read */
141 in = fopen(ENCODING_CONF_FILE, "r");
144 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
145 in = fopen(path, "r");
148 gedcom_warning(_("Could not open encoding configuration file '%s'"),
153 while (fgets(buffer, sizeof(buffer), in) != NULL) {
154 if (buffer[strlen(buffer) - 1] != '\n') {
155 gedcom_error(_("Line too long in encoding configuration file '%s'"),
159 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
160 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
161 add_encoding(gedcom_n, charwidth, iconv_n);
164 gedcom_error(_("Missing data in encoding configuration file '%s'"),
175 void set_encoding_width(ENCODING enc)
180 static char conv_buf[MAXGEDCLINELEN * 2];
181 static size_t conv_buf_size;
183 int open_conv_to_internal(char* fromcode)
185 char *encoding = get_encoding(fromcode, the_enc);
186 if (cd_to_internal != (iconv_t) -1)
187 iconv_close(cd_to_internal);
188 if (encoding == NULL) {
189 cd_to_internal = (iconv_t) -1;
192 memset(conv_buf, 0, sizeof(conv_buf));
194 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
195 if (cd_to_internal == (iconv_t) -1) {
196 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
197 encoding, strerror(errno));
200 return (cd_to_internal != (iconv_t) -1);
203 void close_conv_to_internal()
205 iconv_close(cd_to_internal);
206 cd_to_internal = (iconv_t) -1;
209 char* to_internal(char* str, size_t len,
210 char* output_buffer, size_t out_len)
212 size_t outsize = out_len;
213 char *wrptr = output_buffer;
214 char *rdptr = conv_buf;
215 /* set up input buffer (concatenate to what was left previous time) */
216 /* can't use strcpy, because possible null bytes from unicode */
217 memcpy(conv_buf + conv_buf_size, str, len);
218 conv_buf_size += len;
219 /* set up output buffer (empty it) */
220 memset(output_buffer, 0, out_len);
221 /* do the conversion */
222 iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
223 /* then shift what is left over to the head of the input buffer */
224 memmove(conv_buf, rdptr, conv_buf_size);
225 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);
226 return output_buffer;