From: Peter Verthez Date: Sat, 24 Nov 2001 22:27:12 +0000 (+0000) Subject: Better interface for selection of encoding. X-Git-Url: https://git.dlugolecki.net.pl/?a=commitdiff_plain;h=2c487bf2ff728c4015b46788d11ac9eb2f41af54;p=gedcom-parse.git Better interface for selection of encoding. --- diff --git a/encoding.h b/encoding.h index 287e478..12473d2 100644 --- a/encoding.h +++ b/encoding.h @@ -1,5 +1,17 @@ +/* Basic file encoding */ +#ifndef __ENCODING_H +#define __ENCODING_H + +typedef enum _ENC { + ONE_BYTE = 0, + TWO_BYTE_HILO = 1, + TWO_BYTE_LOHI = 2 +} ENCODING; + int open_conv_to_internal(char* fromcode); void close_conv_to_internal(); char* to_internal(char* str, size_t len); void init_encodings(); -char* get_encoding(char* gedcom_name); +void set_encoding_width(ENCODING enc); + +#endif /* __ENCODING_H */ diff --git a/gedcom.enc b/gedcom.enc index 8c2a56b..b47dc56 100644 --- a/gedcom.enc +++ b/gedcom.enc @@ -1,8 +1,11 @@ # Mapping of charsets for gedcom parsing -# Each line contains the gedcom name, appended with (LOHI) or (HILO) -# for 2 byte encodings, and the iconv name of the charset, separated -# by whitespace -UNICODE(LOHI) UTF16LE -UNICODE(HILO) UTF16BE -ASCII ASCII -ANSI CP1252 +# Each line contains (separated by whitespace): +# - the gedcom name +# - a token identifying the width of characters and the ordering; +# currently supported values: 1, 2_LOHI, 2_HILO +# - the iconv name of the charset +UNICODE 2_LOHI UTF16LE +UNICODE 2_HILO UTF16BE +ASCII 1 ASCII +ANSI 1 CP1252 +ANSEL 1 ANSEL