2 Copyright (C) 2001,2002 The Genes Development Team
3 This file is part of the Gedcom parser library.
4 Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
6 The Gedcom parser library is free software; you can redistribute it
7 and/or modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The Gedcom parser library is distributed in the hope that it will be
12 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the Gedcom parser library; if not, write to the
18 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
21 #include "gedcom_internal.h"
24 #include "encoding_state.h"
27 struct encoding_state read_encoding;
28 /* SYS_NEWLINE is defined in config.h */
29 struct encoding_state write_encoding =
30 { "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
32 Enc_from write_encoding_from = ENC_FROM_FILE;
33 Enc_from write_terminator_from = ENC_FROM_SYS;
35 const char* terminator[] = {
38 /* END_CR_LF */ "\x0D\x0A",
39 /* END_LF_CR */ "\x0A\x0D"
42 void set_read_encoding(const char* charset, const char* encoding)
44 strncpy(read_encoding.charset, charset, MAX_CHARSET_LEN);
45 read_encoding.encoding = encoding;
46 gedcom_debug_print("Encoding state is now: ");
47 gedcom_debug_print(" charset : %s", read_encoding.charset);
48 gedcom_debug_print(" encoding : %s", read_encoding.encoding);
49 gedcom_debug_print(" width : %d", read_encoding.width);
50 gedcom_debug_print(" BOM : %d", read_encoding.bom);
51 gedcom_debug_print(" terminator: 0x%02x 0x%02x",
52 read_encoding.terminator[0],
53 read_encoding.terminator[1]);
56 void set_read_encoding_width(Encoding enc)
58 read_encoding.width = enc;
61 void set_read_encoding_bom(Enc_bom bom)
63 read_encoding.bom = bom;
66 void set_read_encoding_terminator(char* term)
68 strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
71 /** Allows to change the encoding for writing files. It should be called
72 \em before calling gedcom_write_open(), i.e. it affects all files that are
73 opened after it is being called.
75 Valid values for the character set are given in
76 the first column in the file \c gedcom.enc in the data directory of
77 gedcom-parse (\c $PREFIX/share/gedcom-parse). The character sets UNICODE,
78 ASCII and ANSEL are always supported (these are standard for GEDCOM), as
79 well as ANSI (not standard), but there may be others.
81 Note that you still need to pass the correct charset value for the
82 \c HEAD.CHAR tag, otherwise you will get a warning and the value will
83 be forced to the correct value.
85 \param from Indicates how you want the encoding to be set. When
86 ENC_FROM_FILE is selected, the other parameters in the function are ignored
87 (they can be passed as 0). ENC_FROM_SYS is not a valid value here.
88 The default setting is ENC_FROM_FILE.
89 \param charset The character set to be used.
90 \param width The width and endianness of the character set. You can
91 pass 0 for non-UNICODE encodings.
92 \param bom Determines whether a byte-order-mark should be written in
93 the file in case of UNICODE encoding (usually preferred because it then
94 clearly indicates the byte ordering). You can pass 0 for non-UNICODE
95 encodings, but the byte-order-mark can also be used for UTF-8.
97 \retval 0 in case of success
98 \retval >0 in case of error
100 int gedcom_write_set_encoding(Enc_from from, const char* charset,
101 Encoding width, Enc_bom bom)
103 const char* new_charset = charset;
104 char* new_encoding = NULL;
105 if (from == ENC_FROM_SYS) {
108 write_encoding_from = from;
109 if (from == ENC_MANUAL) {
110 if (!strcmp(new_charset, "UNICODE")) {
111 if (width == ONE_BYTE) {
112 gedcom_error(_("Unicode cannot be encoded into one byte"));
116 new_encoding = get_encoding(new_charset, width);
118 write_encoding.encoding = new_encoding;
119 write_encoding.width = width;
120 write_encoding.bom = bom;
121 strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
128 new_encoding = get_encoding(new_charset, ONE_BYTE);
130 write_encoding.encoding = new_encoding;
131 write_encoding.width = ONE_BYTE;
132 write_encoding.bom = bom;
133 strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
142 void init_write_encoding()
144 if (write_encoding_from == ENC_FROM_FILE
145 && read_encoding.charset[0] != '\0') {
146 strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
147 write_encoding.encoding = read_encoding.encoding;
148 write_encoding.width = read_encoding.width;
149 write_encoding.bom = read_encoding.bom;
153 /** Allows to change the line terminator to use on writing. It should be
155 \em before calling gedcom_write_open(), i.e. it affects all files that are
156 opened after it is being called.
158 By default, the line terminator is set to the appropriate line terminator
159 on the current platform, so it only needs to be changed if there is some
160 special reason for it.
162 \param from Indicates how you want the encoding to be set. When
163 ENC_FROM_FILE or ENC_FROM_SYS is selected, the other parameter in the
164 function is ignored (and can be passed as 0).
165 The default setting is ENC_FROM_SYS.
166 \param end The wanted line terminator.
169 \retval >0 if failure
171 int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
173 const char* new_term = NULL;
174 write_terminator_from = from;
175 if (from == ENC_FROM_SYS) {
176 new_term = SYS_NEWLINE;
178 else if (from == ENC_MANUAL) {
179 new_term = terminator[end];
182 strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
186 void init_write_terminator()
188 if (write_terminator_from == ENC_FROM_FILE
189 && read_encoding.terminator[0] != '\0') {
190 strncpy(write_encoding.terminator, read_encoding.terminator,