Moved encoding state to separate source file.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 25 Jan 2003 16:08:58 +0000 (16:08 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 25 Jan 2003 16:08:58 +0000 (16:08 +0000)
gedcom/Makefile.am
gedcom/encoding.c
gedcom/encoding.h
gedcom/encoding_state.c [new file with mode: 0644]
gedcom/encoding_state.h [new file with mode: 0644]
gedcom/gedcom_lex_common.c
gedcom/multilex.c
gedcom/write.c

index 9cfc216cc2a62001adb8cd4be7d4a2a1f724d7f3..abbfe56d72ea9de8b76a91fbe74b2a5f3fa341a1 100644 (file)
@@ -29,7 +29,8 @@ libgedcom_la_SOURCES = lex.gedcom_1byte_.c \
                       age.c \
                       compat.c \
                       buffer.c \
-                      write.c
+                      write.c \
+                      encoding_state.c
 libgedcom_la_LDFLAGS = -export-dynamic -version-info $(LIBVERSION)
 libgedcom_la_LIBADD  = calendar/libcalendar.la @INTLLIBS@
 BUILT_SOURCES = lex.gedcom_1byte_.c \
@@ -53,7 +54,8 @@ noinst_HEADERS = encoding.h \
                 age.h \
                 compat.h \
                 buffer.h \
-                tag_data.h
+                tag_data.h \
+                encoding_state.h
 EXTRA_DIST = gedcom.y \
             gedcom_date.y \
             gedcom_1byte.lex \
index 4828c0a6a7d3c7bc9e9ef89537b6f17b1b39efb7..1cae728808f5c8dfd9f9cf6556f4c575a20e78dd 100644 (file)
@@ -28,6 +28,7 @@
 #include "gedcom_internal.h"
 #include "gedcom.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "hash.h"
 #include "utf8tools.h"
 
@@ -35,8 +36,6 @@
 #define GCONV_SEARCH_PATH "GCONV_PATH"
 #define MAXBUF 255
 
-struct encoding_state read_encoding;
-
 static hash_t *encodings = NULL;
 
 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
@@ -245,21 +244,6 @@ void init_encodings()
   }
 }
 
-void set_encoding_width(Encoding enc)
-{
-  read_encoding.width = enc;
-}
-
-void set_encoding_bom(Enc_bom bom)
-{
-  read_encoding.bom = bom;
-}
-
-void set_encoding_terminator(char* term)
-{
-  strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
-}
-
 static convert_t to_int = NULL;
 static char* error_value = "<error>";
 
@@ -280,16 +264,7 @@ int open_conv_to_internal(const char* fromcode)
     if (to_int != NULL)
       cleanup_utf8_conversion(to_int);
     to_int = new_to_int;
-    strncpy(read_encoding.charset, fromcode, MAX_CHARSET_LEN);
-    read_encoding.encoding = encoding;
-    gedcom_debug_print("Encoding state is now: ");
-    gedcom_debug_print("  charset   : %s", read_encoding.charset);
-    gedcom_debug_print("  encoding  : %s", read_encoding.encoding);
-    gedcom_debug_print("  width     : %d", read_encoding.width);
-    gedcom_debug_print("  BOM       : %d", read_encoding.bom);
-    gedcom_debug_print("  terminator: 0x%02x 0x%02x",
-                      read_encoding.terminator[0],
-                      read_encoding.terminator[1]);
+    set_read_encoding(fromcode, encoding);
   }
 
   return (new_to_int != NULL);
index 93f69863562cedba392b94916e5673b0af810e37..77459c3167aea0464e5489e62f7e19f8e290dfed 100644 (file)
 #include "gedcom.h"
 #include "utf8tools.h"
 
-#define MAX_CHARSET_LEN 32
-#define MAX_TERMINATOR_LEN 2
-
-struct encoding_state {
-  char         charset[MAX_CHARSET_LEN + 1];
-  const char*  encoding;
-  Encoding     width;
-  Enc_bom      bom;
-  char         terminator[MAX_TERMINATOR_LEN + 1];
-};
-
-struct encoding_state read_encoding;
-
 void init_encodings();
 char* get_encoding(const char* gedcom_n, Encoding enc);
 void update_gconv_search_path();
@@ -48,8 +35,5 @@ void update_gconv_search_path();
 int open_conv_to_internal(const char* fromcode);
 void close_conv_to_internal();
 char* to_internal(const char* str, size_t len, struct conv_buffer *output_buf);
-void set_encoding_width(Encoding enc);
-void set_encoding_bom(Enc_bom bom);
-void set_encoding_terminator(char* term);
 
 #endif /* __ENCODING_H */
diff --git a/gedcom/encoding_state.c b/gedcom/encoding_state.c
new file mode 100644 (file)
index 0000000..5fce25e
--- /dev/null
@@ -0,0 +1,146 @@
+/* Encoding state.
+   Copyright (C) 2001,2002 The Genes Development Team
+   This file is part of the Gedcom parser library.
+   Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
+
+   The Gedcom parser library is free software; you can redistribute it
+   and/or modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The Gedcom parser library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the Gedcom parser library; if not, write to the
+   Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "gedcom_internal.h"
+#include "gedcom.h"
+#include "encoding.h"
+#include "encoding_state.h"
+#include <string.h>
+
+struct encoding_state read_encoding;
+/* SYS_NEWLINE is defined in config.h */
+struct encoding_state write_encoding =
+{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
+
+Enc_from write_encoding_from   = ENC_FROM_FILE;
+Enc_from write_terminator_from = ENC_FROM_SYS;
+
+const char* terminator[] = {
+  /* END_CR */     "\x0D",
+  /* END_LF */     "\x0A",
+  /* END_CR_LF */  "\x0D\x0A",
+  /* END_LF_CR */  "\x0A\x0D"
+};
+
+void set_read_encoding(const char* charset, const char* encoding)
+{
+  strncpy(read_encoding.charset, charset, MAX_CHARSET_LEN);
+  read_encoding.encoding = encoding;
+  gedcom_debug_print("Encoding state is now: ");
+  gedcom_debug_print("  charset   : %s", read_encoding.charset);
+  gedcom_debug_print("  encoding  : %s", read_encoding.encoding);
+  gedcom_debug_print("  width     : %d", read_encoding.width);
+  gedcom_debug_print("  BOM       : %d", read_encoding.bom);
+  gedcom_debug_print("  terminator: 0x%02x 0x%02x",
+                    read_encoding.terminator[0],
+                    read_encoding.terminator[1]);
+}
+
+void set_read_encoding_width(Encoding enc)
+{
+  read_encoding.width = enc;
+}
+
+void set_read_encoding_bom(Enc_bom bom)
+{
+  read_encoding.bom = bom;
+}
+
+void set_read_encoding_terminator(char* term)
+{
+  strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
+}
+
+int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
+                             Encoding width, Enc_bom bom)
+{
+  char* new_encoding = NULL;
+  if (from == ENC_FROM_SYS) {
+    return 1;
+  }
+  write_encoding_from = from;
+  if (from == ENC_MANUAL) {
+    if (!strcmp(new_charset, "UNICODE")) {
+      if (width == ONE_BYTE) {
+       gedcom_error(_("Unicode cannot be encoded into one byte"));
+       return 1;
+      }
+      else {
+       new_encoding = get_encoding(new_charset, width);
+       if (new_encoding) {
+         write_encoding.encoding = new_encoding;
+         write_encoding.width = width;
+         write_encoding.bom   = bom;
+         strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+       }
+       else
+         return 1;
+      }
+    }
+    else {
+      new_encoding = get_encoding(new_charset, ONE_BYTE);
+      if (new_encoding) {
+       write_encoding.encoding = new_encoding;
+       write_encoding.width = ONE_BYTE;
+       write_encoding.bom   = bom;
+       strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+      }
+      else
+       return 1;
+    }
+  }
+  return 0;
+}
+
+void init_write_encoding()
+{
+  if (write_encoding_from == ENC_FROM_FILE
+      && read_encoding.charset[0] != '\0') {
+    strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
+    write_encoding.encoding = read_encoding.encoding;
+    write_encoding.width    = read_encoding.width;
+    write_encoding.bom      = read_encoding.bom;
+  }
+}
+
+int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
+{
+  const char* new_term = NULL;
+  write_terminator_from = from;
+  if (from == ENC_FROM_SYS) {
+    new_term = SYS_NEWLINE;
+  }
+  else if (from == ENC_MANUAL) {
+    new_term = terminator[end];
+  }
+  if (new_term)
+    strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
+  return 0;
+}
+
+void init_write_terminator()
+{
+  if (write_terminator_from == ENC_FROM_FILE
+      && read_encoding.terminator[0] != '\0') {
+    strncpy(write_encoding.terminator, read_encoding.terminator,
+           MAX_TERMINATOR_LEN);
+  }
+}
+
diff --git a/gedcom/encoding_state.h b/gedcom/encoding_state.h
new file mode 100644 (file)
index 0000000..caf363c
--- /dev/null
@@ -0,0 +1,51 @@
+/* Header file for encoding.c.
+   Copyright (C) 2001 The Genes Development Team
+   This file is part of the Gedcom parser library.
+   Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
+
+   The Gedcom parser library is free software; you can redistribute it
+   and/or modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The Gedcom parser library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the Gedcom parser library; if not, write to the
+   Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* $Id$ */
+/* $Name$ */
+
+#ifndef __ENCODING_STATE_H
+#define __ENCODING_STATE_H
+
+#include "gedcom.h"
+
+#define MAX_CHARSET_LEN 32
+#define MAX_TERMINATOR_LEN 2
+
+struct encoding_state {
+  char         charset[MAX_CHARSET_LEN + 1];
+  const char*  encoding;
+  Encoding     width;
+  Enc_bom      bom;
+  char         terminator[MAX_TERMINATOR_LEN + 1];
+};
+
+struct encoding_state read_encoding;
+struct encoding_state write_encoding;
+
+void set_read_encoding(const char* charset, const char* encoding);
+void set_read_encoding_width(Encoding enc);
+void set_read_encoding_bom(Enc_bom bom);
+void set_read_encoding_terminator(char* term);
+
+void init_write_encoding();
+void init_write_terminator();
+
+#endif /* __ENCODING_STATE_H */
index 703a9de9d3f21987ec3b22f51d267218603b596e..68db840b4f645da43fcbd372fd474111bd95d99c 100644 (file)
@@ -26,6 +26,7 @@
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "gedcom.h"
 #include "gedcom.tabgen.h"
 #include "compat.h"
@@ -337,7 +338,7 @@ static int dummy_conv = 0;
   { CHECK_LINE_LEN;                                                           \
     INIT_LINE_LEN;                                                            \
     if (line_no == 1)                                                         \
-      set_encoding_terminator(TO_INTERNAL(yytext, str_buffer));               \
+      set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer));          \
     BEGIN(INITIAL);                                                           \
   }
 
index 721702e2541184339e9bbb7079a861ff2e6d1f8d..e4128174d5e6393ea497a8b6faa9f0be079a44c2 100644 (file)
@@ -24,6 +24,7 @@
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "xref.h"
 
 int line_no = 0;
@@ -38,19 +39,19 @@ int lexer_init(Encoding enc, FILE* f)
   if (enc == ONE_BYTE) {
     lf  = &gedcom_1byte_lex;
     gedcom_1byte_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("ASCII");
   }
   else if (enc == TWO_BYTE_HILO) {
     lf  = &gedcom_hilo_lex;
     gedcom_hilo_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("UNICODE");
   }
   else if (enc == TWO_BYTE_LOHI) {
     lf  = &gedcom_lohi_lex;
     gedcom_lohi_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("UNICODE");
   }
   else {
@@ -79,7 +80,7 @@ int determine_encoding(FILE* f)
   char first[2];
   int read;
 
-  set_encoding_bom(WITHOUT_BOM);
+  set_read_encoding_bom(WITHOUT_BOM);
   read = fread(first, 1, 2, f);
   if (read != 2) {
     gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
@@ -98,7 +99,7 @@ int determine_encoding(FILE* f)
   }
   else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
     gedcom_debug_print("Two-byte encoding, high-low, with BOM");
-    set_encoding_bom(WITH_BOM);
+    set_read_encoding_bom(WITH_BOM);
     return TWO_BYTE_HILO;
   }
   else if ((first[0] == '0') && (first[1] == '\0')) {
@@ -108,7 +109,7 @@ int determine_encoding(FILE* f)
   }
   else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
     gedcom_debug_print("Two-byte encoding, low-high, with BOM");
-    set_encoding_bom(WITH_BOM);
+    set_read_encoding_bom(WITH_BOM);
     return TWO_BYTE_LOHI;
   }
   else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
@@ -118,7 +119,7 @@ int determine_encoding(FILE* f)
       rewind_file(f);
     }
     else if (first[0] == '\xBF') {
-      set_encoding_bom(WITH_BOM);
+      set_read_encoding_bom(WITH_BOM);
       gedcom_debug_print("UTF-8 encoding, with BOM");
     }
     else {
index 7554b560ea7832163748df254fd4926ffec76a9f..f880bbbe90ef0279d4ab3b1cbfcccab7c608b38b 100644 (file)
@@ -24,6 +24,7 @@
 #include "gedcom_internal.h"
 #include "gedcom.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "tag_data.h"
 #include "buffer.h"
 #include "utf8tools.h"
 
 #define MAXWRITELEN MAXGEDCLINELEN
 
-/* SYS_NEWLINE is defined in config.h */
-struct encoding_state write_encoding =
-{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
-Enc_from write_encoding_from   = ENC_FROM_FILE;
-Enc_from write_terminator_from = ENC_FROM_SYS;
-
 struct Gedcom_write_struct {
   int       filedesc;
   convert_t conv;
@@ -49,19 +44,6 @@ struct Gedcom_write_struct {
   int       ctxt_level;
 };
 
-const char* default_encoding[] = {
-  /* ONE_BYTE */      "ASCII",
-  /* TWO_BYTE_HILO */ "UCS-2BE",
-  /* TWO_BYTE_LOHI */ "UCS-2LE"
-};
-
-const char* terminator[] = {
-  /* END_CR */     "\x0D",
-  /* END_LF */     "\x0A",
-  /* END_CR_LF */  "\x0D\x0A",
-  /* END_LF_CR */  "\x0A\x0D"
-};
-
 void cleanup_write_buffer();
 struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer };
 
@@ -201,80 +183,6 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec,
   return 0;
 }
 
-int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
-                             Encoding width, Enc_bom bom)
-{
-  char* new_encoding = NULL;
-  if (from == ENC_FROM_SYS) {
-    return 1;
-  }
-  write_encoding_from = from;
-  if (from == ENC_MANUAL) {
-    if (!strcmp(new_charset, "UNICODE")) {
-      if (width == ONE_BYTE) {
-       gedcom_error(_("Unicode cannot be encoded into one byte"));
-       return 1;
-      }
-      else {
-       new_encoding = get_encoding(new_charset, width);
-       if (new_encoding) {
-         write_encoding.encoding = new_encoding;
-         write_encoding.width = width;
-         write_encoding.bom   = bom;
-         strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
-       }
-       else
-         return 1;
-      }
-    }
-    else {
-      new_encoding = get_encoding(new_charset, ONE_BYTE);
-      if (new_encoding) {
-       write_encoding.encoding = new_encoding;
-       write_encoding.width = ONE_BYTE;
-       write_encoding.bom   = bom;
-       strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
-      }
-      else
-       return 1;
-    }
-  }
-  return 0;
-}
-
-void copy_write_encoding_from_file()
-{
-  if (read_encoding.charset[0] != '\0') {
-    strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
-    write_encoding.encoding = read_encoding.encoding;
-    write_encoding.width    = read_encoding.width;
-    write_encoding.bom      = read_encoding.bom;
-  }
-}
-
-int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
-{
-  const char* new_term = NULL;
-  write_terminator_from = from;
-  if (from == ENC_FROM_SYS) {
-    new_term = SYS_NEWLINE;
-  }
-  else if (from == ENC_MANUAL) {
-    new_term = terminator[end];
-  }
-  if (new_term)
-    strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
-  return 0;
-}
-
-void copy_write_terminator_from_file()
-{
-  if (read_encoding.terminator[0] != '\0') {
-    strncpy(write_encoding.terminator, read_encoding.terminator,
-           MAX_TERMINATOR_LEN);
-  }
-}
-
 Gedcom_write_hndl gedcom_write_open(const char *filename)
 {
   Gedcom_write_hndl hndl;
@@ -284,10 +192,8 @@ Gedcom_write_hndl gedcom_write_open(const char *filename)
   if (!hndl)
     MEMORY_ERROR;
   else {
-    if (write_encoding_from == ENC_FROM_FILE)
-      copy_write_encoding_from_file();
-    if (write_terminator_from == ENC_FROM_FILE)
-      copy_write_terminator_from_file();
+    init_write_encoding();
+    init_write_terminator();
     hndl->total_conv_fails = 0;
     hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0);
     if (!hndl->conv) {