age.c \
compat.c \
buffer.c \
- write.c
+ write.c \
+ encoding_state.c
libgedcom_la_LDFLAGS = -export-dynamic -version-info $(LIBVERSION)
libgedcom_la_LIBADD = calendar/libcalendar.la @INTLLIBS@
BUILT_SOURCES = lex.gedcom_1byte_.c \
age.h \
compat.h \
buffer.h \
- tag_data.h
+ tag_data.h \
+ encoding_state.h
EXTRA_DIST = gedcom.y \
gedcom_date.y \
gedcom_1byte.lex \
#include "gedcom_internal.h"
#include "gedcom.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "hash.h"
#include "utf8tools.h"
#define GCONV_SEARCH_PATH "GCONV_PATH"
#define MAXBUF 255
-struct encoding_state read_encoding;
-
static hash_t *encodings = NULL;
const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
}
}
-void set_encoding_width(Encoding enc)
-{
- read_encoding.width = enc;
-}
-
-void set_encoding_bom(Enc_bom bom)
-{
- read_encoding.bom = bom;
-}
-
-void set_encoding_terminator(char* term)
-{
- strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
-}
-
static convert_t to_int = NULL;
static char* error_value = "<error>";
if (to_int != NULL)
cleanup_utf8_conversion(to_int);
to_int = new_to_int;
- strncpy(read_encoding.charset, fromcode, MAX_CHARSET_LEN);
- read_encoding.encoding = encoding;
- gedcom_debug_print("Encoding state is now: ");
- gedcom_debug_print(" charset : %s", read_encoding.charset);
- gedcom_debug_print(" encoding : %s", read_encoding.encoding);
- gedcom_debug_print(" width : %d", read_encoding.width);
- gedcom_debug_print(" BOM : %d", read_encoding.bom);
- gedcom_debug_print(" terminator: 0x%02x 0x%02x",
- read_encoding.terminator[0],
- read_encoding.terminator[1]);
+ set_read_encoding(fromcode, encoding);
}
return (new_to_int != NULL);
#include "gedcom.h"
#include "utf8tools.h"
-#define MAX_CHARSET_LEN 32
-#define MAX_TERMINATOR_LEN 2
-
-struct encoding_state {
- char charset[MAX_CHARSET_LEN + 1];
- const char* encoding;
- Encoding width;
- Enc_bom bom;
- char terminator[MAX_TERMINATOR_LEN + 1];
-};
-
-struct encoding_state read_encoding;
-
void init_encodings();
char* get_encoding(const char* gedcom_n, Encoding enc);
void update_gconv_search_path();
int open_conv_to_internal(const char* fromcode);
void close_conv_to_internal();
char* to_internal(const char* str, size_t len, struct conv_buffer *output_buf);
-void set_encoding_width(Encoding enc);
-void set_encoding_bom(Enc_bom bom);
-void set_encoding_terminator(char* term);
#endif /* __ENCODING_H */
--- /dev/null
+/* Encoding state.
+ Copyright (C) 2001,2002 The Genes Development Team
+ This file is part of the Gedcom parser library.
+ Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
+
+ The Gedcom parser library is free software; you can redistribute it
+ and/or modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The Gedcom parser library is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the Gedcom parser library; if not, write to the
+ Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include "gedcom_internal.h"
+#include "gedcom.h"
+#include "encoding.h"
+#include "encoding_state.h"
+#include <string.h>
+
+struct encoding_state read_encoding;
+/* SYS_NEWLINE is defined in config.h */
+struct encoding_state write_encoding =
+{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
+
+Enc_from write_encoding_from = ENC_FROM_FILE;
+Enc_from write_terminator_from = ENC_FROM_SYS;
+
+const char* terminator[] = {
+ /* END_CR */ "\x0D",
+ /* END_LF */ "\x0A",
+ /* END_CR_LF */ "\x0D\x0A",
+ /* END_LF_CR */ "\x0A\x0D"
+};
+
+void set_read_encoding(const char* charset, const char* encoding)
+{
+ strncpy(read_encoding.charset, charset, MAX_CHARSET_LEN);
+ read_encoding.encoding = encoding;
+ gedcom_debug_print("Encoding state is now: ");
+ gedcom_debug_print(" charset : %s", read_encoding.charset);
+ gedcom_debug_print(" encoding : %s", read_encoding.encoding);
+ gedcom_debug_print(" width : %d", read_encoding.width);
+ gedcom_debug_print(" BOM : %d", read_encoding.bom);
+ gedcom_debug_print(" terminator: 0x%02x 0x%02x",
+ read_encoding.terminator[0],
+ read_encoding.terminator[1]);
+}
+
+void set_read_encoding_width(Encoding enc)
+{
+ read_encoding.width = enc;
+}
+
+void set_read_encoding_bom(Enc_bom bom)
+{
+ read_encoding.bom = bom;
+}
+
+void set_read_encoding_terminator(char* term)
+{
+ strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
+}
+
+int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
+ Encoding width, Enc_bom bom)
+{
+ char* new_encoding = NULL;
+ if (from == ENC_FROM_SYS) {
+ return 1;
+ }
+ write_encoding_from = from;
+ if (from == ENC_MANUAL) {
+ if (!strcmp(new_charset, "UNICODE")) {
+ if (width == ONE_BYTE) {
+ gedcom_error(_("Unicode cannot be encoded into one byte"));
+ return 1;
+ }
+ else {
+ new_encoding = get_encoding(new_charset, width);
+ if (new_encoding) {
+ write_encoding.encoding = new_encoding;
+ write_encoding.width = width;
+ write_encoding.bom = bom;
+ strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+ }
+ else
+ return 1;
+ }
+ }
+ else {
+ new_encoding = get_encoding(new_charset, ONE_BYTE);
+ if (new_encoding) {
+ write_encoding.encoding = new_encoding;
+ write_encoding.width = ONE_BYTE;
+ write_encoding.bom = bom;
+ strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+ }
+ else
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void init_write_encoding()
+{
+ if (write_encoding_from == ENC_FROM_FILE
+ && read_encoding.charset[0] != '\0') {
+ strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
+ write_encoding.encoding = read_encoding.encoding;
+ write_encoding.width = read_encoding.width;
+ write_encoding.bom = read_encoding.bom;
+ }
+}
+
+int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
+{
+ const char* new_term = NULL;
+ write_terminator_from = from;
+ if (from == ENC_FROM_SYS) {
+ new_term = SYS_NEWLINE;
+ }
+ else if (from == ENC_MANUAL) {
+ new_term = terminator[end];
+ }
+ if (new_term)
+ strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
+ return 0;
+}
+
+void init_write_terminator()
+{
+ if (write_terminator_from == ENC_FROM_FILE
+ && read_encoding.terminator[0] != '\0') {
+ strncpy(write_encoding.terminator, read_encoding.terminator,
+ MAX_TERMINATOR_LEN);
+ }
+}
+
--- /dev/null
+/* Header file for encoding.c.
+ Copyright (C) 2001 The Genes Development Team
+ This file is part of the Gedcom parser library.
+ Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
+
+ The Gedcom parser library is free software; you can redistribute it
+ and/or modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The Gedcom parser library is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the Gedcom parser library; if not, write to the
+ Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* $Id$ */
+/* $Name$ */
+
+#ifndef __ENCODING_STATE_H
+#define __ENCODING_STATE_H
+
+#include "gedcom.h"
+
+#define MAX_CHARSET_LEN 32
+#define MAX_TERMINATOR_LEN 2
+
+struct encoding_state {
+ char charset[MAX_CHARSET_LEN + 1];
+ const char* encoding;
+ Encoding width;
+ Enc_bom bom;
+ char terminator[MAX_TERMINATOR_LEN + 1];
+};
+
+struct encoding_state read_encoding;
+struct encoding_state write_encoding;
+
+void set_read_encoding(const char* charset, const char* encoding);
+void set_read_encoding_width(Encoding enc);
+void set_read_encoding_bom(Enc_bom bom);
+void set_read_encoding_terminator(char* term);
+
+void init_write_encoding();
+void init_write_terminator();
+
+#endif /* __ENCODING_STATE_H */
#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "gedcom.h"
#include "gedcom.tabgen.h"
#include "compat.h"
{ CHECK_LINE_LEN; \
INIT_LINE_LEN; \
if (line_no == 1) \
- set_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
+ set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \
BEGIN(INITIAL); \
}
#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "xref.h"
int line_no = 0;
if (enc == ONE_BYTE) {
lf = &gedcom_1byte_lex;
gedcom_1byte_myinit(f);
- set_encoding_width(enc);
+ set_read_encoding_width(enc);
return open_conv_to_internal("ASCII");
}
else if (enc == TWO_BYTE_HILO) {
lf = &gedcom_hilo_lex;
gedcom_hilo_myinit(f);
- set_encoding_width(enc);
+ set_read_encoding_width(enc);
return open_conv_to_internal("UNICODE");
}
else if (enc == TWO_BYTE_LOHI) {
lf = &gedcom_lohi_lex;
gedcom_lohi_myinit(f);
- set_encoding_width(enc);
+ set_read_encoding_width(enc);
return open_conv_to_internal("UNICODE");
}
else {
char first[2];
int read;
- set_encoding_bom(WITHOUT_BOM);
+ set_read_encoding_bom(WITHOUT_BOM);
read = fread(first, 1, 2, f);
if (read != 2) {
gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
}
else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
gedcom_debug_print("Two-byte encoding, high-low, with BOM");
- set_encoding_bom(WITH_BOM);
+ set_read_encoding_bom(WITH_BOM);
return TWO_BYTE_HILO;
}
else if ((first[0] == '0') && (first[1] == '\0')) {
}
else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
gedcom_debug_print("Two-byte encoding, low-high, with BOM");
- set_encoding_bom(WITH_BOM);
+ set_read_encoding_bom(WITH_BOM);
return TWO_BYTE_LOHI;
}
else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
rewind_file(f);
}
else if (first[0] == '\xBF') {
- set_encoding_bom(WITH_BOM);
+ set_read_encoding_bom(WITH_BOM);
gedcom_debug_print("UTF-8 encoding, with BOM");
}
else {
#include "gedcom_internal.h"
#include "gedcom.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "tag_data.h"
#include "buffer.h"
#include "utf8tools.h"
#define MAXWRITELEN MAXGEDCLINELEN
-/* SYS_NEWLINE is defined in config.h */
-struct encoding_state write_encoding =
-{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
-Enc_from write_encoding_from = ENC_FROM_FILE;
-Enc_from write_terminator_from = ENC_FROM_SYS;
-
struct Gedcom_write_struct {
int filedesc;
convert_t conv;
int ctxt_level;
};
-const char* default_encoding[] = {
- /* ONE_BYTE */ "ASCII",
- /* TWO_BYTE_HILO */ "UCS-2BE",
- /* TWO_BYTE_LOHI */ "UCS-2LE"
-};
-
-const char* terminator[] = {
- /* END_CR */ "\x0D",
- /* END_LF */ "\x0A",
- /* END_CR_LF */ "\x0D\x0A",
- /* END_LF_CR */ "\x0A\x0D"
-};
-
void cleanup_write_buffer();
struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer };
return 0;
}
-int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
- Encoding width, Enc_bom bom)
-{
- char* new_encoding = NULL;
- if (from == ENC_FROM_SYS) {
- return 1;
- }
- write_encoding_from = from;
- if (from == ENC_MANUAL) {
- if (!strcmp(new_charset, "UNICODE")) {
- if (width == ONE_BYTE) {
- gedcom_error(_("Unicode cannot be encoded into one byte"));
- return 1;
- }
- else {
- new_encoding = get_encoding(new_charset, width);
- if (new_encoding) {
- write_encoding.encoding = new_encoding;
- write_encoding.width = width;
- write_encoding.bom = bom;
- strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
- }
- else
- return 1;
- }
- }
- else {
- new_encoding = get_encoding(new_charset, ONE_BYTE);
- if (new_encoding) {
- write_encoding.encoding = new_encoding;
- write_encoding.width = ONE_BYTE;
- write_encoding.bom = bom;
- strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
- }
- else
- return 1;
- }
- }
- return 0;
-}
-
-void copy_write_encoding_from_file()
-{
- if (read_encoding.charset[0] != '\0') {
- strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
- write_encoding.encoding = read_encoding.encoding;
- write_encoding.width = read_encoding.width;
- write_encoding.bom = read_encoding.bom;
- }
-}
-
-int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
-{
- const char* new_term = NULL;
- write_terminator_from = from;
- if (from == ENC_FROM_SYS) {
- new_term = SYS_NEWLINE;
- }
- else if (from == ENC_MANUAL) {
- new_term = terminator[end];
- }
- if (new_term)
- strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
- return 0;
-}
-
-void copy_write_terminator_from_file()
-{
- if (read_encoding.terminator[0] != '\0') {
- strncpy(write_encoding.terminator, read_encoding.terminator,
- MAX_TERMINATOR_LEN);
- }
-}
-
Gedcom_write_hndl gedcom_write_open(const char *filename)
{
Gedcom_write_hndl hndl;
if (!hndl)
MEMORY_ERROR;
else {
- if (write_encoding_from == ENC_FROM_FILE)
- copy_write_encoding_from_file();
- if (write_terminator_from == ENC_FROM_FILE)
- copy_write_terminator_from_file();
+ init_write_encoding();
+ init_write_terminator();
hndl->total_conv_fails = 0;
hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0);
if (!hndl->conv) {