Write encoding is by default the read encoding.
[gedcom-parse.git] / gedcom / write.c
index f8721da6cc91bab6fd841a150a586a9a79407831..7554b560ea7832163748df254fd4926ffec76a9f 100644 (file)
@@ -26,7 +26,7 @@
 #include "encoding.h"
 #include "tag_data.h"
 #include "buffer.h"
-#include "utf8.h"
+#include "utf8tools.h"
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #define MAXWRITELEN MAXGEDCLINELEN
 
-const char* encoding = "ASCII";
-int write_encoding_details = ONE_BYTE;
 /* SYS_NEWLINE is defined in config.h */
-const char* write_terminator = SYS_NEWLINE;
+struct encoding_state write_encoding =
+{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
+Enc_from write_encoding_from   = ENC_FROM_FILE;
+Enc_from write_terminator_from = ENC_FROM_SYS;
 
 struct Gedcom_write_struct {
   int       filedesc;
@@ -79,7 +80,8 @@ void cleanup_convert_at_buffer()
 }
 
 int write_simple(Gedcom_write_hndl hndl,
-                int level, char* xref, char* tag, char* value)
+                int level, const char* xref, const char* tag,
+                const char* value)
 {
   int res;
   
@@ -119,31 +121,41 @@ int write_simple(Gedcom_write_hndl hndl,
   return 0;
 }
 
+int write_encoding_value(Gedcom_write_hndl hndl,
+                        int level, char* xref, char* tag, char* value)
+{
+  if (strcmp(value, write_encoding.charset))
+    gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"),
+                  write_encoding.charset);
+  return write_simple(hndl, level, xref, tag, write_encoding.charset);
+}
+
 int supports_continuation(int elt_or_rec, int which_continuation)
 {
   return tag_data[elt_or_rec].options & which_continuation;
 }
 
 int write_long(Gedcom_write_hndl hndl, int elt_or_rec,
-              int level, char* xref, char* tag, char* value)
+              int level, const char* xref, const char* tag, const char* value)
 {
-  int prefix_len, value_len, term_len;
-  char* nl_pos = strchr(value, '\n');
+  int prefix_len, value_len = 0, term_len;
+  char* nl_pos = NULL;
+  if (value) nl_pos = strchr(value, '\n');
 
   prefix_len = utf8_strlen(tag) + 3;  /* for e.g. "0 INDI " */
   if (level > 9) prefix_len++;
   if (xref)      prefix_len += utf8_strlen(xref) + 1;
-  value_len  = utf8_strlen(value);
+  if (value)     value_len  = utf8_strlen(value);
   term_len   = strlen(hndl->term);
 
   if (!nl_pos && prefix_len + value_len + term_len <= MAXWRITELEN)
     write_simple(hndl, level, xref, tag, value);
   else {
-    char* value_ptr = value;
+    const char* value_ptr = value;
     int cont_supported = supports_continuation(elt_or_rec, OPT_CONT);
     int cont_as_conc   = supports_continuation(elt_or_rec, OPT_CONT_AS_CONC);
     if (nl_pos && !cont_supported) {
-      gedcom_error (_("The tag %s doesn't support newlines\n"), tag);
+      gedcom_error (_("The tag %s doesn't support newlines"), tag);
       return 1;
     }
     else {
@@ -189,43 +201,80 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec,
   return 0;
 }
 
-int gedcom_write_set_encoding(const char* charset,
+int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
                              Encoding width, Enc_bom bom)
 {
   char* new_encoding = NULL;
-  if (!strcmp(charset, "UNICODE")) {
-    if (width == ONE_BYTE) {
-      gedcom_error(_("Unicode cannot be encoded into one byte"));
-      return 1;
+  if (from == ENC_FROM_SYS) {
+    return 1;
+  }
+  write_encoding_from = from;
+  if (from == ENC_MANUAL) {
+    if (!strcmp(new_charset, "UNICODE")) {
+      if (width == ONE_BYTE) {
+       gedcom_error(_("Unicode cannot be encoded into one byte"));
+       return 1;
+      }
+      else {
+       new_encoding = get_encoding(new_charset, width);
+       if (new_encoding) {
+         write_encoding.encoding = new_encoding;
+         write_encoding.width = width;
+         write_encoding.bom   = bom;
+         strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+       }
+       else
+         return 1;
+      }
     }
     else {
-      new_encoding = get_encoding(charset, width);
+      new_encoding = get_encoding(new_charset, ONE_BYTE);
       if (new_encoding) {
-       encoding = new_encoding;
-       write_encoding_details = width | bom;
+       write_encoding.encoding = new_encoding;
+       write_encoding.width = ONE_BYTE;
+       write_encoding.bom   = bom;
+       strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
       }
       else
        return 1;
     }
   }
-  else {
-    new_encoding = get_encoding(charset, ONE_BYTE);
-    if (new_encoding) {
-      encoding = new_encoding;
-      write_encoding_details = ONE_BYTE;
-    }
-    else
-      return 1;
-  }
   return 0;
 }
 
-int gedcom_write_set_line_terminator(Enc_line_end end)
+void copy_write_encoding_from_file()
+{
+  if (read_encoding.charset[0] != '\0') {
+    strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
+    write_encoding.encoding = read_encoding.encoding;
+    write_encoding.width    = read_encoding.width;
+    write_encoding.bom      = read_encoding.bom;
+  }
+}
+
+int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
 {
-  write_terminator = terminator[end];
+  const char* new_term = NULL;
+  write_terminator_from = from;
+  if (from == ENC_FROM_SYS) {
+    new_term = SYS_NEWLINE;
+  }
+  else if (from == ENC_MANUAL) {
+    new_term = terminator[end];
+  }
+  if (new_term)
+    strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
   return 0;
 }
 
+void copy_write_terminator_from_file()
+{
+  if (read_encoding.terminator[0] != '\0') {
+    strncpy(write_encoding.terminator, read_encoding.terminator,
+           MAX_TERMINATOR_LEN);
+  }
+}
+
 Gedcom_write_hndl gedcom_write_open(const char *filename)
 {
   Gedcom_write_hndl hndl;
@@ -235,11 +284,15 @@ Gedcom_write_hndl gedcom_write_open(const char *filename)
   if (!hndl)
     MEMORY_ERROR;
   else {
+    if (write_encoding_from == ENC_FROM_FILE)
+      copy_write_encoding_from_file();
+    if (write_terminator_from == ENC_FROM_FILE)
+      copy_write_terminator_from_file();
     hndl->total_conv_fails = 0;
-    hndl->conv = initialize_utf8_conversion(encoding, 0);
+    hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0);
     if (!hndl->conv) {
       gedcom_error(_("Could not open encoding '%s' for writing: %s"),
-                  encoding, strerror(errno));
+                  write_encoding.encoding, strerror(errno));
       free(hndl);
       hndl = NULL;
     }
@@ -253,15 +306,17 @@ Gedcom_write_hndl gedcom_write_open(const char *filename)
        hndl = NULL;
       }
       else {
-       hndl->term = write_terminator;
+       hndl->term = write_encoding.terminator;
        hndl->ctxt_level = -1;
-       if (write_encoding_details & WITH_BOM) {
-         if (write_encoding_details & TWO_BYTE_HILO)
+       if (write_encoding.bom == WITH_BOM) {
+         if (write_encoding.width == TWO_BYTE_HILO)
            write(hndl->filedesc, "\xFE\xFF", 2);
-         else if (write_encoding_details & TWO_BYTE_LOHI)
+         else if (write_encoding.width == TWO_BYTE_LOHI)
            write(hndl->filedesc, "\xFF\xFE", 2);
+         else if (!strcmp(write_encoding.encoding, "UTF-8"))
+           write(hndl->filedesc, "\xEF\xBB\xBF", 3);
          else
-           gedcom_warning(_("Byte order mark configured, but no Unicode"));
+           gedcom_warning(_("Byte order mark configured, but not relevant"));
        }
       }
     }
@@ -297,7 +352,7 @@ char* get_tag_string(int elt_or_rec, int tag)
     }
   }
   else {
-    gedcom_error(_("The element or record type '%s' requires a specific tag"
+    gedcom_error(_("The element or record type '%s' requires a specific tag "
                   "for writing"),
                 tag_data[elt_or_rec].elt_name);
     return NULL;
@@ -369,7 +424,9 @@ int _gedcom_write_val(Gedcom_write_hndl hndl,
   tag_str = get_tag_string(rec_or_elt, tag);
   level   = get_level(hndl, rec_or_elt, parent_rec_or_elt);
   if (tag_str && (level != -1)) {
-    if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC|OPT_CONT_AS_CONC))
+    if (rec_or_elt == ELT_HEAD_CHAR)
+      result = write_encoding_value(hndl, level, xrefstr, tag_str, val);
+    else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC))
       result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val);
     else
       result = write_simple(hndl, level, xrefstr, tag_str, val);
@@ -379,12 +436,11 @@ int _gedcom_write_val(Gedcom_write_hndl hndl,
 }
 
 int gedcom_write_record_str(Gedcom_write_hndl hndl,
-                           Gedcom_rec rec, int tag,
-                           char* xrefstr, char* val)
+                           Gedcom_rec rec, char* xrefstr, char* val)
 {
   int result = 1;
   if (check_type(rec, (val ? GV_CHAR_PTR : GV_NULL)))
-    result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, convert_at(val));
+    result = _gedcom_write_val(hndl, rec, 0, -1, xrefstr, convert_at(val));
   return result;
 }
 
@@ -399,16 +455,6 @@ int gedcom_write_element_str(Gedcom_write_hndl hndl,
   return result;
 }
 
-int gedcom_write_record_xref(Gedcom_write_hndl hndl,
-                            Gedcom_rec rec, int tag,
-                            char* xrefstr, struct xref_value* val)
-{
-  int result = 1;
-  if (check_type(rec, (val ? GV_XREF_PTR : GV_NULL)))
-    result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, val->string);
-  return result;
-}
-
 int gedcom_write_element_xref(Gedcom_write_hndl hndl,
                              Gedcom_elt elt, int tag, int parent_rec_or_elt,
                              struct xref_value* val)