From 6de2cd5b195e6d1493dea896aafb1b7033835517 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Sat, 25 Jan 2003 16:08:58 +0000 Subject: [PATCH] Moved encoding state to separate source file. --- gedcom/Makefile.am | 6 +- gedcom/encoding.c | 29 +------- gedcom/encoding.h | 16 ---- gedcom/encoding_state.c | 146 +++++++++++++++++++++++++++++++++++++ gedcom/encoding_state.h | 51 +++++++++++++ gedcom/gedcom_lex_common.c | 3 +- gedcom/multilex.c | 15 ++-- gedcom/write.c | 100 +------------------------ 8 files changed, 216 insertions(+), 150 deletions(-) create mode 100644 gedcom/encoding_state.c create mode 100644 gedcom/encoding_state.h diff --git a/gedcom/Makefile.am b/gedcom/Makefile.am index 9cfc216..abbfe56 100644 --- a/gedcom/Makefile.am +++ b/gedcom/Makefile.am @@ -29,7 +29,8 @@ libgedcom_la_SOURCES = lex.gedcom_1byte_.c \ age.c \ compat.c \ buffer.c \ - write.c + write.c \ + encoding_state.c libgedcom_la_LDFLAGS = -export-dynamic -version-info $(LIBVERSION) libgedcom_la_LIBADD = calendar/libcalendar.la @INTLLIBS@ BUILT_SOURCES = lex.gedcom_1byte_.c \ @@ -53,7 +54,8 @@ noinst_HEADERS = encoding.h \ age.h \ compat.h \ buffer.h \ - tag_data.h + tag_data.h \ + encoding_state.h EXTRA_DIST = gedcom.y \ gedcom_date.y \ gedcom_1byte.lex \ diff --git a/gedcom/encoding.c b/gedcom/encoding.c index 4828c0a..1cae728 100644 --- a/gedcom/encoding.c +++ b/gedcom/encoding.c @@ -28,6 +28,7 @@ #include "gedcom_internal.h" #include "gedcom.h" #include "encoding.h" +#include "encoding_state.h" #include "hash.h" #include "utf8tools.h" @@ -35,8 +36,6 @@ #define GCONV_SEARCH_PATH "GCONV_PATH" #define MAXBUF 255 -struct encoding_state read_encoding; - static hash_t *encodings = NULL; const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" }; @@ -245,21 +244,6 @@ void init_encodings() } } -void set_encoding_width(Encoding enc) -{ - read_encoding.width = enc; -} - -void set_encoding_bom(Enc_bom bom) -{ - read_encoding.bom = bom; -} - -void set_encoding_terminator(char* term) -{ - strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN); -} - static convert_t to_int = NULL; static char* error_value = ""; @@ -280,16 +264,7 @@ int open_conv_to_internal(const char* fromcode) if (to_int != NULL) cleanup_utf8_conversion(to_int); to_int = new_to_int; - strncpy(read_encoding.charset, fromcode, MAX_CHARSET_LEN); - read_encoding.encoding = encoding; - gedcom_debug_print("Encoding state is now: "); - gedcom_debug_print(" charset : %s", read_encoding.charset); - gedcom_debug_print(" encoding : %s", read_encoding.encoding); - gedcom_debug_print(" width : %d", read_encoding.width); - gedcom_debug_print(" BOM : %d", read_encoding.bom); - gedcom_debug_print(" terminator: 0x%02x 0x%02x", - read_encoding.terminator[0], - read_encoding.terminator[1]); + set_read_encoding(fromcode, encoding); } return (new_to_int != NULL); diff --git a/gedcom/encoding.h b/gedcom/encoding.h index 93f6986..77459c3 100644 --- a/gedcom/encoding.h +++ b/gedcom/encoding.h @@ -28,19 +28,6 @@ #include "gedcom.h" #include "utf8tools.h" -#define MAX_CHARSET_LEN 32 -#define MAX_TERMINATOR_LEN 2 - -struct encoding_state { - char charset[MAX_CHARSET_LEN + 1]; - const char* encoding; - Encoding width; - Enc_bom bom; - char terminator[MAX_TERMINATOR_LEN + 1]; -}; - -struct encoding_state read_encoding; - void init_encodings(); char* get_encoding(const char* gedcom_n, Encoding enc); void update_gconv_search_path(); @@ -48,8 +35,5 @@ void update_gconv_search_path(); int open_conv_to_internal(const char* fromcode); void close_conv_to_internal(); char* to_internal(const char* str, size_t len, struct conv_buffer *output_buf); -void set_encoding_width(Encoding enc); -void set_encoding_bom(Enc_bom bom); -void set_encoding_terminator(char* term); #endif /* __ENCODING_H */ diff --git a/gedcom/encoding_state.c b/gedcom/encoding_state.c new file mode 100644 index 0000000..5fce25e --- /dev/null +++ b/gedcom/encoding_state.c @@ -0,0 +1,146 @@ +/* Encoding state. + Copyright (C) 2001,2002 The Genes Development Team + This file is part of the Gedcom parser library. + Contributed by Peter Verthez , 2001. + + The Gedcom parser library is free software; you can redistribute it + and/or modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The Gedcom parser library is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the Gedcom parser library; if not, write to the + Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include "gedcom_internal.h" +#include "gedcom.h" +#include "encoding.h" +#include "encoding_state.h" +#include + +struct encoding_state read_encoding; +/* SYS_NEWLINE is defined in config.h */ +struct encoding_state write_encoding = +{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE }; + +Enc_from write_encoding_from = ENC_FROM_FILE; +Enc_from write_terminator_from = ENC_FROM_SYS; + +const char* terminator[] = { + /* END_CR */ "\x0D", + /* END_LF */ "\x0A", + /* END_CR_LF */ "\x0D\x0A", + /* END_LF_CR */ "\x0A\x0D" +}; + +void set_read_encoding(const char* charset, const char* encoding) +{ + strncpy(read_encoding.charset, charset, MAX_CHARSET_LEN); + read_encoding.encoding = encoding; + gedcom_debug_print("Encoding state is now: "); + gedcom_debug_print(" charset : %s", read_encoding.charset); + gedcom_debug_print(" encoding : %s", read_encoding.encoding); + gedcom_debug_print(" width : %d", read_encoding.width); + gedcom_debug_print(" BOM : %d", read_encoding.bom); + gedcom_debug_print(" terminator: 0x%02x 0x%02x", + read_encoding.terminator[0], + read_encoding.terminator[1]); +} + +void set_read_encoding_width(Encoding enc) +{ + read_encoding.width = enc; +} + +void set_read_encoding_bom(Enc_bom bom) +{ + read_encoding.bom = bom; +} + +void set_read_encoding_terminator(char* term) +{ + strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN); +} + +int gedcom_write_set_encoding(Enc_from from, const char* new_charset, + Encoding width, Enc_bom bom) +{ + char* new_encoding = NULL; + if (from == ENC_FROM_SYS) { + return 1; + } + write_encoding_from = from; + if (from == ENC_MANUAL) { + if (!strcmp(new_charset, "UNICODE")) { + if (width == ONE_BYTE) { + gedcom_error(_("Unicode cannot be encoded into one byte")); + return 1; + } + else { + new_encoding = get_encoding(new_charset, width); + if (new_encoding) { + write_encoding.encoding = new_encoding; + write_encoding.width = width; + write_encoding.bom = bom; + strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); + } + else + return 1; + } + } + else { + new_encoding = get_encoding(new_charset, ONE_BYTE); + if (new_encoding) { + write_encoding.encoding = new_encoding; + write_encoding.width = ONE_BYTE; + write_encoding.bom = bom; + strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); + } + else + return 1; + } + } + return 0; +} + +void init_write_encoding() +{ + if (write_encoding_from == ENC_FROM_FILE + && read_encoding.charset[0] != '\0') { + strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN); + write_encoding.encoding = read_encoding.encoding; + write_encoding.width = read_encoding.width; + write_encoding.bom = read_encoding.bom; + } +} + +int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end) +{ + const char* new_term = NULL; + write_terminator_from = from; + if (from == ENC_FROM_SYS) { + new_term = SYS_NEWLINE; + } + else if (from == ENC_MANUAL) { + new_term = terminator[end]; + } + if (new_term) + strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN); + return 0; +} + +void init_write_terminator() +{ + if (write_terminator_from == ENC_FROM_FILE + && read_encoding.terminator[0] != '\0') { + strncpy(write_encoding.terminator, read_encoding.terminator, + MAX_TERMINATOR_LEN); + } +} + diff --git a/gedcom/encoding_state.h b/gedcom/encoding_state.h new file mode 100644 index 0000000..caf363c --- /dev/null +++ b/gedcom/encoding_state.h @@ -0,0 +1,51 @@ +/* Header file for encoding.c. + Copyright (C) 2001 The Genes Development Team + This file is part of the Gedcom parser library. + Contributed by Peter Verthez , 2001. + + The Gedcom parser library is free software; you can redistribute it + and/or modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The Gedcom parser library is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the Gedcom parser library; if not, write to the + Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* $Id$ */ +/* $Name$ */ + +#ifndef __ENCODING_STATE_H +#define __ENCODING_STATE_H + +#include "gedcom.h" + +#define MAX_CHARSET_LEN 32 +#define MAX_TERMINATOR_LEN 2 + +struct encoding_state { + char charset[MAX_CHARSET_LEN + 1]; + const char* encoding; + Encoding width; + Enc_bom bom; + char terminator[MAX_TERMINATOR_LEN + 1]; +}; + +struct encoding_state read_encoding; +struct encoding_state write_encoding; + +void set_read_encoding(const char* charset, const char* encoding); +void set_read_encoding_width(Encoding enc); +void set_read_encoding_bom(Enc_bom bom); +void set_read_encoding_terminator(char* term); + +void init_write_encoding(); +void init_write_terminator(); + +#endif /* __ENCODING_STATE_H */ diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c index 703a9de..68db840 100644 --- a/gedcom/gedcom_lex_common.c +++ b/gedcom/gedcom_lex_common.c @@ -26,6 +26,7 @@ #include "gedcom_internal.h" #include "multilex.h" #include "encoding.h" +#include "encoding_state.h" #include "gedcom.h" #include "gedcom.tabgen.h" #include "compat.h" @@ -337,7 +338,7 @@ static int dummy_conv = 0; { CHECK_LINE_LEN; \ INIT_LINE_LEN; \ if (line_no == 1) \ - set_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \ + set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \ BEGIN(INITIAL); \ } diff --git a/gedcom/multilex.c b/gedcom/multilex.c index 721702e..e412817 100644 --- a/gedcom/multilex.c +++ b/gedcom/multilex.c @@ -24,6 +24,7 @@ #include "gedcom_internal.h" #include "multilex.h" #include "encoding.h" +#include "encoding_state.h" #include "xref.h" int line_no = 0; @@ -38,19 +39,19 @@ int lexer_init(Encoding enc, FILE* f) if (enc == ONE_BYTE) { lf = &gedcom_1byte_lex; gedcom_1byte_myinit(f); - set_encoding_width(enc); + set_read_encoding_width(enc); return open_conv_to_internal("ASCII"); } else if (enc == TWO_BYTE_HILO) { lf = &gedcom_hilo_lex; gedcom_hilo_myinit(f); - set_encoding_width(enc); + set_read_encoding_width(enc); return open_conv_to_internal("UNICODE"); } else if (enc == TWO_BYTE_LOHI) { lf = &gedcom_lohi_lex; gedcom_lohi_myinit(f); - set_encoding_width(enc); + set_read_encoding_width(enc); return open_conv_to_internal("UNICODE"); } else { @@ -79,7 +80,7 @@ int determine_encoding(FILE* f) char first[2]; int read; - set_encoding_bom(WITHOUT_BOM); + set_read_encoding_bom(WITHOUT_BOM); read = fread(first, 1, 2, f); if (read != 2) { gedcom_warning(_("Error reading from input file: %s"), strerror(errno)); @@ -98,7 +99,7 @@ int determine_encoding(FILE* f) } else if ((first[0] == '\xFE') && (first[1] == '\xFF')) { gedcom_debug_print("Two-byte encoding, high-low, with BOM"); - set_encoding_bom(WITH_BOM); + set_read_encoding_bom(WITH_BOM); return TWO_BYTE_HILO; } else if ((first[0] == '0') && (first[1] == '\0')) { @@ -108,7 +109,7 @@ int determine_encoding(FILE* f) } else if ((first[0] == '\xFF') && (first[1] == '\xFE')) { gedcom_debug_print("Two-byte encoding, low-high, with BOM"); - set_encoding_bom(WITH_BOM); + set_read_encoding_bom(WITH_BOM); return TWO_BYTE_LOHI; } else if ((first[0] == '\xEF') && (first[1] == '\xBB')) { @@ -118,7 +119,7 @@ int determine_encoding(FILE* f) rewind_file(f); } else if (first[0] == '\xBF') { - set_encoding_bom(WITH_BOM); + set_read_encoding_bom(WITH_BOM); gedcom_debug_print("UTF-8 encoding, with BOM"); } else { diff --git a/gedcom/write.c b/gedcom/write.c index 7554b56..f880bbb 100644 --- a/gedcom/write.c +++ b/gedcom/write.c @@ -24,6 +24,7 @@ #include "gedcom_internal.h" #include "gedcom.h" #include "encoding.h" +#include "encoding_state.h" #include "tag_data.h" #include "buffer.h" #include "utf8tools.h" @@ -34,12 +35,6 @@ #define MAXWRITELEN MAXGEDCLINELEN -/* SYS_NEWLINE is defined in config.h */ -struct encoding_state write_encoding = -{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE }; -Enc_from write_encoding_from = ENC_FROM_FILE; -Enc_from write_terminator_from = ENC_FROM_SYS; - struct Gedcom_write_struct { int filedesc; convert_t conv; @@ -49,19 +44,6 @@ struct Gedcom_write_struct { int ctxt_level; }; -const char* default_encoding[] = { - /* ONE_BYTE */ "ASCII", - /* TWO_BYTE_HILO */ "UCS-2BE", - /* TWO_BYTE_LOHI */ "UCS-2LE" -}; - -const char* terminator[] = { - /* END_CR */ "\x0D", - /* END_LF */ "\x0A", - /* END_CR_LF */ "\x0D\x0A", - /* END_LF_CR */ "\x0A\x0D" -}; - void cleanup_write_buffer(); struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer }; @@ -201,80 +183,6 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, return 0; } -int gedcom_write_set_encoding(Enc_from from, const char* new_charset, - Encoding width, Enc_bom bom) -{ - char* new_encoding = NULL; - if (from == ENC_FROM_SYS) { - return 1; - } - write_encoding_from = from; - if (from == ENC_MANUAL) { - if (!strcmp(new_charset, "UNICODE")) { - if (width == ONE_BYTE) { - gedcom_error(_("Unicode cannot be encoded into one byte")); - return 1; - } - else { - new_encoding = get_encoding(new_charset, width); - if (new_encoding) { - write_encoding.encoding = new_encoding; - write_encoding.width = width; - write_encoding.bom = bom; - strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); - } - else - return 1; - } - } - else { - new_encoding = get_encoding(new_charset, ONE_BYTE); - if (new_encoding) { - write_encoding.encoding = new_encoding; - write_encoding.width = ONE_BYTE; - write_encoding.bom = bom; - strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); - } - else - return 1; - } - } - return 0; -} - -void copy_write_encoding_from_file() -{ - if (read_encoding.charset[0] != '\0') { - strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN); - write_encoding.encoding = read_encoding.encoding; - write_encoding.width = read_encoding.width; - write_encoding.bom = read_encoding.bom; - } -} - -int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end) -{ - const char* new_term = NULL; - write_terminator_from = from; - if (from == ENC_FROM_SYS) { - new_term = SYS_NEWLINE; - } - else if (from == ENC_MANUAL) { - new_term = terminator[end]; - } - if (new_term) - strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN); - return 0; -} - -void copy_write_terminator_from_file() -{ - if (read_encoding.terminator[0] != '\0') { - strncpy(write_encoding.terminator, read_encoding.terminator, - MAX_TERMINATOR_LEN); - } -} - Gedcom_write_hndl gedcom_write_open(const char *filename) { Gedcom_write_hndl hndl; @@ -284,10 +192,8 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) if (!hndl) MEMORY_ERROR; else { - if (write_encoding_from == ENC_FROM_FILE) - copy_write_encoding_from_file(); - if (write_terminator_from == ENC_FROM_FILE) - copy_write_terminator_from_file(); + init_write_encoding(); + init_write_terminator(); hndl->total_conv_fails = 0; hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0); if (!hndl->conv) { -- 2.30.2