1 /* Conversion between encodings.
2 Copyright (C) 2001 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include "gedcom_internal.h"
33 #define ENCODING_CONF_FILE "gedcom.enc"
34 #define GCONV_SEARCH_PATH "GCONV_PATH"
37 static iconv_t cd_to_internal = (iconv_t) -1;
38 static void *encoding_mapping = NULL;
39 static ENCODING the_enc = ONE_BYTE;
46 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
48 int node_compare(const void *node1, const void *node2)
50 return strcmp(((const struct node *) node1)->gedcom_name,
51 ((const struct node *) node2)->gedcom_name);
54 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
57 struct node *nodeptr = (struct node *) malloc(sizeof *nodeptr);
58 nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n)
59 + strlen(charwidth) + 3);
60 nodeptr->iconv_name = (char *) malloc(strlen(iconv_n) + 1);
61 /* sprintf is safe here (malloc'ed before) */
62 sprintf(nodeptr->gedcom_name, "%s(%s)", gedcom_n, charwidth);
63 strcpy(nodeptr->iconv_name, iconv_n);
64 datum = tsearch(nodeptr, &encoding_mapping, node_compare);
65 if ((datum == NULL) || (*datum != nodeptr)) {
66 gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"),
71 char* get_encoding(char* gedcom_n, ENCODING enc)
74 struct node search_node;
76 buffer = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
77 /* sprintf is safe here (malloc'ed before) */
78 sprintf(buffer, "%s(%s)", gedcom_n, charwidth_string[enc]);
79 search_node.gedcom_name = buffer;
80 datum = tfind(&search_node, &encoding_mapping, node_compare);
83 gedcom_error(_("No encoding defined for '%s'"), gedcom_n);
87 return ((const struct node *) *datum)->iconv_name;
93 if (encoding_mapping == NULL) {
95 char buffer[MAXBUF + 1];
96 char gedcom_n[MAXBUF + 1];
97 char charwidth[MAXBUF + 1];
98 char iconv_n[MAXBUF + 1];
101 /* Add gedcom data directory to gconv search path */
102 gconv_path = getenv(GCONV_SEARCH_PATH);
103 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
104 char *new_gconv_path;
105 if (gconv_path == NULL) {
106 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
109 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
112 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
116 sprintf(new_gconv_path, "%s=%s:%s",
117 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
119 if (putenv(new_gconv_path) != 0) {
120 gedcom_warning(_("Failed updating conversion module path"));
124 /* Open gedcom configuration file and read */
125 in = fopen(ENCODING_CONF_FILE, "r");
128 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
129 in = fopen(path, "r");
132 gedcom_warning(_("Could not open encoding configuration file '%s'"),
137 while (fgets(buffer, sizeof(buffer), in) != NULL) {
138 if (buffer[strlen(buffer) - 1] != '\n') {
139 gedcom_error(_("Line too long in encoding configuration file '%s'"),
143 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
144 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
145 add_encoding(gedcom_n, charwidth, iconv_n);
148 gedcom_error(_("Missing data in encoding configuration file '%s'"),
159 void set_encoding_width(ENCODING enc)
164 static char conv_buf[MAXGEDCLINELEN * 2];
165 static size_t conv_buf_size;
167 int open_conv_to_internal(char* fromcode)
169 char *encoding = get_encoding(fromcode, the_enc);
170 if (cd_to_internal != (iconv_t) -1)
171 iconv_close(cd_to_internal);
172 if (encoding == NULL) {
173 cd_to_internal = (iconv_t) -1;
176 memset(conv_buf, 0, sizeof(conv_buf));
178 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
179 if (cd_to_internal == (iconv_t) -1) {
180 gedcom_error(_("Error opening conversion context for encoding %s: %s"),
181 encoding, strerror(errno));
184 return (cd_to_internal != (iconv_t) -1);
187 void close_conv_to_internal()
189 iconv_close(cd_to_internal);
190 cd_to_internal = (iconv_t) -1;
193 char* to_internal(char* str, size_t len,
194 char* output_buffer, size_t out_len)
196 size_t outsize = out_len;
197 char *wrptr = output_buffer;
198 char *rdptr = conv_buf;
199 /* set up input buffer (concatenate to what was left previous time) */
200 /* can't use strcpy, because possible null bytes from unicode */
201 memcpy(conv_buf + conv_buf_size, str, len);
202 conv_buf_size += len;
203 /* set up output buffer (empty it) */
204 memset(output_buffer, 0, out_len);
205 /* do the conversion */
206 iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
207 /* then shift what is left over to the head of the input buffer */
208 memmove(conv_buf, rdptr, conv_buf_size);
209 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);
210 return output_buffer;