From: Peter Verthez Date: Sat, 24 Nov 2001 22:28:59 +0000 (+0000) Subject: Better interface for encoding. X-Git-Url: https://git.dlugolecki.net.pl/?a=commitdiff_plain;h=f4faf7679e0ae7cc5e5cf25c1f070a790f5f4a40;hp=2c487bf2ff728c4015b46788d11ac9eb2f41af54;p=gedcom-parse.git Better interface for encoding. Bugfix in use of iconv: there can be characters left... --- diff --git a/encoding.c b/encoding.c index ba5aa1d..6c342e0 100644 --- a/encoding.c +++ b/encoding.c @@ -12,25 +12,29 @@ static iconv_t cd_to_internal = (iconv_t) -1; static char int_buf[MAXGEDCLINELEN*2]; static void *encoding_mapping = NULL; +static ENCODING the_enc = ONE_BYTE; struct node { char *gedcom_name; char *iconv_name; }; +char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" }; + int node_compare(const void *node1, const void *node2) { return strcmp(((const struct node *) node1)->gedcom_name, ((const struct node *) node2)->gedcom_name); } -void add_encoding(char *gedcom_n, char *iconv_n) +void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n) { void **datum; struct node *nodeptr = (struct node *) malloc(sizeof *nodeptr); - nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n) + 1); + nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n) + + strlen(charwidth) + 3); nodeptr->iconv_name = (char *) malloc(strlen(iconv_n) + 1); - strcpy(nodeptr->gedcom_name, gedcom_n); + sprintf(nodeptr->gedcom_name, "%s(%s)", gedcom_n, charwidth); strcpy(nodeptr->iconv_name, iconv_n); datum = tsearch(nodeptr, &encoding_mapping, node_compare); if ((datum == NULL) || (*datum != nodeptr)) { @@ -39,11 +43,13 @@ void add_encoding(char *gedcom_n, char *iconv_n) } } -char* get_encoding(char* gedcom_n) +char* get_encoding(char* gedcom_n, ENCODING enc) { void **datum; struct node search_node; - search_node.gedcom_name = gedcom_n; + char buffer[MAXBUF + 1]; + sprintf(buffer, "%s(%s)", gedcom_n, charwidth_string[enc]); + search_node.gedcom_name = buffer; datum = tfind(&search_node, &encoding_mapping, node_compare); if (datum == NULL) { gedcom_error("No encoding found for '%s'", gedcom_n); @@ -60,6 +66,7 @@ void init_encodings() FILE *in; char buffer[MAXBUF + 1]; char gedcom_n[MAXBUF + 1]; + char charwidth[MAXBUF + 1]; char iconv_n[MAXBUF + 1]; in = fopen(ENCODING_CONF_FILE, "r"); if (in != NULL) { @@ -70,8 +77,13 @@ void init_encodings() return; } else if (buffer[0] != '#') { - if (sscanf(buffer, "%s %s", gedcom_n, iconv_n) == 2) { - add_encoding(gedcom_n, iconv_n); + if (sscanf(buffer, "%s %s %s", gedcom_n, charwidth, iconv_n) == 3) { + add_encoding(gedcom_n, charwidth, iconv_n); + } + else { + gedcom_error("Missing data in encoding configuration file '%s'", + ENCODING_CONF_FILE); + return; } } } @@ -84,15 +96,25 @@ void init_encodings() } } +void set_encoding_width(ENCODING enc) +{ + the_enc = enc; +} + +static char conv_buf[MAXGEDCLINELEN * 2]; +static int conv_buf_size; + int open_conv_to_internal(char* fromcode) { - char *encoding = get_encoding(fromcode); + char *encoding = get_encoding(fromcode, the_enc); if (cd_to_internal != (iconv_t) -1) iconv_close(cd_to_internal); if (encoding == NULL) { cd_to_internal = (iconv_t) -1; } else { + memset(conv_buf, 0, sizeof(conv_buf)); + conv_buf_size = 0; cd_to_internal = iconv_open(INTERNAL_ENCODING, encoding); } return (cd_to_internal != (iconv_t) -1); @@ -106,12 +128,23 @@ void close_conv_to_internal() char* to_internal(char* str, size_t len) { - size_t insize = len; + size_t insize; size_t outsize = MAXGEDCLINELEN * 2; char *wrptr = int_buf; - char *rdptr = str; + char *rdptr = conv_buf; + /* set up input buffer (concatenate to what was left previous time) */ + /* can't use strcpy, because possible null bytes from unicode */ + memcpy(conv_buf + conv_buf_size, str, len); + conv_buf_size += len; + insize = conv_buf_size; + /* set up output buffer (empty it) */ memset(int_buf, 0, sizeof(int_buf)); + /* do the conversion */ iconv(cd_to_internal, &rdptr, &insize, &wrptr, &outsize); + /* then shift what is left over to the head of the input buffer */ + memmove(conv_buf, rdptr, insize); + memset(conv_buf + insize, 0, sizeof(conv_buf) - insize); + conv_buf_size = insize; return int_buf; }