1 /* Conversion between encodings.
2 Copyright (C) 2001 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 #include "gedcom_internal.h"
33 #define INTERNAL_ENCODING "UTF8"
34 #define ENCODING_CONF_FILE "gedcom.enc"
35 #define GCONV_SEARCH_PATH "GCONV_PATH"
38 static iconv_t cd_to_internal = (iconv_t) -1;
39 static void *encoding_mapping = NULL;
40 static ENCODING the_enc = ONE_BYTE;
47 char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
49 int node_compare(const void *node1, const void *node2)
51 return strcmp(((const struct node *) node1)->gedcom_name,
52 ((const struct node *) node2)->gedcom_name);
55 void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n)
58 struct node *nodeptr = (struct node *) malloc(sizeof *nodeptr);
59 nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n)
60 + strlen(charwidth) + 3);
61 nodeptr->iconv_name = (char *) malloc(strlen(iconv_n) + 1);
62 /* sprintf is safe here (malloc'ed before) */
63 sprintf(nodeptr->gedcom_name, "%s(%s)", gedcom_n, charwidth);
64 strcpy(nodeptr->iconv_name, iconv_n);
65 datum = tsearch(nodeptr, &encoding_mapping, node_compare);
66 if ((datum == NULL) || (*datum != nodeptr)) {
67 gedcom_warning("Duplicate entry found for encoding '%s', ignoring",
72 char* get_encoding(char* gedcom_n, ENCODING enc)
75 struct node search_node;
77 buffer = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3);
78 /* sprintf is safe here (malloc'ed before) */
79 sprintf(buffer, "%s(%s)", gedcom_n, charwidth_string[enc]);
80 search_node.gedcom_name = buffer;
81 datum = tfind(&search_node, &encoding_mapping, node_compare);
84 gedcom_error("No encoding found for '%s'", gedcom_n);
88 return ((const struct node *) *datum)->iconv_name;
94 if (encoding_mapping == NULL) {
96 char buffer[MAXBUF + 1];
97 char gedcom_n[MAXBUF + 1];
98 char charwidth[MAXBUF + 1];
99 char iconv_n[MAXBUF + 1];
102 /* Add gedcom data directory to gconv search path */
103 gconv_path = getenv(GCONV_SEARCH_PATH);
104 if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) {
105 char *new_gconv_path;
106 if (gconv_path == NULL) {
107 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
110 sprintf(new_gconv_path, "%s=%s", GCONV_SEARCH_PATH, PKGDATADIR);
113 new_gconv_path = (char *)malloc(strlen(GCONV_SEARCH_PATH)
117 sprintf(new_gconv_path, "%s=%s:%s",
118 GCONV_SEARCH_PATH, gconv_path, PKGDATADIR);
120 if (putenv(new_gconv_path) != 0) {
121 gedcom_warning("Failed updating environment variable %s",
126 /* Open gedcom configuration file and read */
127 in = fopen(ENCODING_CONF_FILE, "r");
130 sprintf(path, "%s/%s", PKGDATADIR, ENCODING_CONF_FILE);
131 in = fopen(path, "r");
134 gedcom_warning("Could not open encoding configuration file '%s'",
138 while (fgets(buffer, sizeof(buffer), in) != NULL) {
139 if (buffer[strlen(buffer) - 1] != '\n') {
140 gedcom_error("Line too long in encoding configuration file '%s'",
144 else if ((buffer[0] != '#') && (strcmp(buffer, "\n") != 0)) {
145 if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) {
146 add_encoding(gedcom_n, charwidth, iconv_n);
149 gedcom_error("Missing data in encoding configuration file '%s'",
160 void set_encoding_width(ENCODING enc)
165 static char conv_buf[MAXGEDCLINELEN * 2];
166 static size_t conv_buf_size;
168 int open_conv_to_internal(char* fromcode)
170 char *encoding = get_encoding(fromcode, the_enc);
171 if (cd_to_internal != (iconv_t) -1)
172 iconv_close(cd_to_internal);
173 if (encoding == NULL) {
174 cd_to_internal = (iconv_t) -1;
177 memset(conv_buf, 0, sizeof(conv_buf));
179 cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding);
180 if (cd_to_internal == (iconv_t) -1) {
181 gedcom_error("Error opening conversion context for encoding %s: %s",
182 encoding, strerror(errno));
185 return (cd_to_internal != (iconv_t) -1);
188 void close_conv_to_internal()
190 iconv_close(cd_to_internal);
191 cd_to_internal = (iconv_t) -1;
194 char* to_internal(char* str, size_t len,
195 char* output_buffer, size_t out_len)
197 size_t outsize = out_len;
198 char *wrptr = output_buffer;
199 char *rdptr = conv_buf;
200 /* set up input buffer (concatenate to what was left previous time) */
201 /* can't use strcpy, because possible null bytes from unicode */
202 memcpy(conv_buf + conv_buf_size, str, len);
203 conv_buf_size += len;
204 /* set up output buffer (empty it) */
205 memset(output_buffer, 0, out_len);
206 /* do the conversion */
207 iconv(cd_to_internal, &rdptr, &conv_buf_size, &wrptr, &outsize);
208 /* then shift what is left over to the head of the input buffer */
209 memmove(conv_buf, rdptr, conv_buf_size);
210 memset(conv_buf + conv_buf_size, 0, sizeof(conv_buf) - conv_buf_size);
211 return output_buffer;