Implemented an encoding state.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 25 Jan 2003 14:13:22 +0000 (14:13 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 25 Jan 2003 14:13:22 +0000 (14:13 +0000)
gedcom/encoding.c
gedcom/encoding.h
gedcom/write.c

index a8780515135006b486f95fc89adf143950d14786..4828c0a6a7d3c7bc9e9ef89537b6f17b1b39efb7 100644 (file)
@@ -35,7 +35,8 @@
 #define GCONV_SEARCH_PATH "GCONV_PATH"
 #define MAXBUF 255
 
-static Encoding the_enc = ONE_BYTE;
+struct encoding_state read_encoding;
+
 static hash_t *encodings = NULL;
 
 const char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" };
@@ -246,7 +247,17 @@ void init_encodings()
 
 void set_encoding_width(Encoding enc)
 {
-  the_enc = enc;
+  read_encoding.width = enc;
+}
+
+void set_encoding_bom(Enc_bom bom)
+{
+  read_encoding.bom = bom;
+}
+
+void set_encoding_terminator(char* term)
+{
+  strncpy(read_encoding.terminator, term, MAX_TERMINATOR_LEN);
 }
 
 static convert_t to_int = NULL;
@@ -255,7 +266,7 @@ static char* error_value = "<error>";
 int open_conv_to_internal(const char* fromcode)
 {
   convert_t new_to_int = NULL;
-  const char *encoding = get_encoding(fromcode, the_enc);
+  const char *encoding = get_encoding(fromcode, read_encoding.width);
   
   if (encoding != NULL) {
     new_to_int = initialize_utf8_conversion(encoding, 1);
@@ -269,6 +280,16 @@ int open_conv_to_internal(const char* fromcode)
     if (to_int != NULL)
       cleanup_utf8_conversion(to_int);
     to_int = new_to_int;
+    strncpy(read_encoding.charset, fromcode, MAX_CHARSET_LEN);
+    read_encoding.encoding = encoding;
+    gedcom_debug_print("Encoding state is now: ");
+    gedcom_debug_print("  charset   : %s", read_encoding.charset);
+    gedcom_debug_print("  encoding  : %s", read_encoding.encoding);
+    gedcom_debug_print("  width     : %d", read_encoding.width);
+    gedcom_debug_print("  BOM       : %d", read_encoding.bom);
+    gedcom_debug_print("  terminator: 0x%02x 0x%02x",
+                      read_encoding.terminator[0],
+                      read_encoding.terminator[1]);
   }
 
   return (new_to_int != NULL);
index d4327cad9a6dd67645ec34cf89f027316f216c5a..93f69863562cedba392b94916e5673b0af810e37 100644 (file)
 #include "gedcom.h"
 #include "utf8tools.h"
 
+#define MAX_CHARSET_LEN 32
+#define MAX_TERMINATOR_LEN 2
+
+struct encoding_state {
+  char         charset[MAX_CHARSET_LEN + 1];
+  const char*  encoding;
+  Encoding     width;
+  Enc_bom      bom;
+  char         terminator[MAX_TERMINATOR_LEN + 1];
+};
+
+struct encoding_state read_encoding;
+
+void init_encodings();
+char* get_encoding(const char* gedcom_n, Encoding enc);
+void update_gconv_search_path();
+
 int open_conv_to_internal(const char* fromcode);
 void close_conv_to_internal();
 char* to_internal(const char* str, size_t len, struct conv_buffer *output_buf);
-void init_encodings();
-char* get_encoding(const char* gedcom_n, Encoding enc);
 void set_encoding_width(Encoding enc);
-void update_gconv_search_path();
+void set_encoding_bom(Enc_bom bom);
+void set_encoding_terminator(char* term);
 
 #endif /* __ENCODING_H */
index f0b1529ca8ca9271d107dbf1cc69aa98bca241c6..1e44ff46476b5a12c92bde65786b765c8cb56323 100644 (file)
 #include <fcntl.h>
 
 #define MAXWRITELEN MAXGEDCLINELEN
-#define MAXCHARSETLEN 32
 
-char charset[MAXCHARSETLEN+1]  = "ASCII";
-const char* encoding = "ASCII";
-int write_encoding_details = ONE_BYTE;
 /* SYS_NEWLINE is defined in config.h */
-const char* write_terminator = SYS_NEWLINE;
+struct encoding_state write_encoding =
+{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
 
 struct Gedcom_write_struct {
   int       filedesc;
@@ -122,12 +119,13 @@ int write_simple(Gedcom_write_hndl hndl,
   return 0;
 }
 
-int write_encoding(Gedcom_write_hndl hndl,
-                  int level, char* xref, char* tag, char* value)
+int write_encoding_value(Gedcom_write_hndl hndl,
+                        int level, char* xref, char* tag, char* value)
 {
-  if (strcmp(value, charset))
-    gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"), charset);
-  return write_simple(hndl, level, xref, tag, charset);
+  if (strcmp(value, write_encoding.charset))
+    gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"),
+                  write_encoding.charset);
+  return write_simple(hndl, level, xref, tag, write_encoding.charset);
 }
 
 int supports_continuation(int elt_or_rec, int which_continuation)
@@ -213,9 +211,10 @@ int gedcom_write_set_encoding(const char* new_charset,
     else {
       new_encoding = get_encoding(new_charset, width);
       if (new_encoding) {
-       encoding = new_encoding;
-       write_encoding_details = width | bom;
-       strncpy(charset, new_charset, MAXCHARSETLEN);
+       write_encoding.encoding = new_encoding;
+       write_encoding.width = width;
+       write_encoding.bom   = bom;
+       strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
       }
       else
        return 1;
@@ -224,9 +223,10 @@ int gedcom_write_set_encoding(const char* new_charset,
   else {
     new_encoding = get_encoding(new_charset, ONE_BYTE);
     if (new_encoding) {
-      encoding = new_encoding;
-      write_encoding_details = ONE_BYTE;
-      strncpy(charset, new_charset, MAXCHARSETLEN);
+      write_encoding.encoding = new_encoding;
+      write_encoding.width = ONE_BYTE;
+      write_encoding.bom   = bom;
+      strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
     }
     else
       return 1;
@@ -236,7 +236,7 @@ int gedcom_write_set_encoding(const char* new_charset,
 
 int gedcom_write_set_line_terminator(Enc_line_end end)
 {
-  write_terminator = terminator[end];
+  strncpy(write_encoding.terminator, terminator[end], MAX_TERMINATOR_LEN);
   return 0;
 }
 
@@ -250,10 +250,10 @@ Gedcom_write_hndl gedcom_write_open(const char *filename)
     MEMORY_ERROR;
   else {
     hndl->total_conv_fails = 0;
-    hndl->conv = initialize_utf8_conversion(encoding, 0);
+    hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0);
     if (!hndl->conv) {
       gedcom_error(_("Could not open encoding '%s' for writing: %s"),
-                  encoding, strerror(errno));
+                  write_encoding.encoding, strerror(errno));
       free(hndl);
       hndl = NULL;
     }
@@ -267,15 +267,17 @@ Gedcom_write_hndl gedcom_write_open(const char *filename)
        hndl = NULL;
       }
       else {
-       hndl->term = write_terminator;
+       hndl->term = write_encoding.terminator;
        hndl->ctxt_level = -1;
-       if (write_encoding_details & WITH_BOM) {
-         if (write_encoding_details & TWO_BYTE_HILO)
+       if (write_encoding.bom == WITH_BOM) {
+         if (write_encoding.width == TWO_BYTE_HILO)
            write(hndl->filedesc, "\xFE\xFF", 2);
-         else if (write_encoding_details & TWO_BYTE_LOHI)
+         else if (write_encoding.width == TWO_BYTE_LOHI)
            write(hndl->filedesc, "\xFF\xFE", 2);
+         else if (!strcmp(write_encoding.encoding, "UTF-8"))
+           write(hndl->filedesc, "\xEF\xBB\xBF", 3);
          else
-           gedcom_warning(_("Byte order mark configured, but no Unicode"));
+           gedcom_warning(_("Byte order mark configured, but not relevant"));
        }
       }
     }
@@ -384,7 +386,7 @@ int _gedcom_write_val(Gedcom_write_hndl hndl,
   level   = get_level(hndl, rec_or_elt, parent_rec_or_elt);
   if (tag_str && (level != -1)) {
     if (rec_or_elt == ELT_HEAD_CHAR)
-      result = write_encoding(hndl, level, xrefstr, tag_str, val);
+      result = write_encoding_value(hndl, level, xrefstr, tag_str, val);
     else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC))
       result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val);
     else