X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fwrite.c;h=7554b560ea7832163748df254fd4926ffec76a9f;hb=58c7f618a6964a7fa5fc95c67f13f0a52689a29a;hp=f8721da6cc91bab6fd841a150a586a9a79407831;hpb=1568cb8ab16ecc1984d0d00b8ae8189c32508b08;p=gedcom-parse.git diff --git a/gedcom/write.c b/gedcom/write.c index f8721da..7554b56 100644 --- a/gedcom/write.c +++ b/gedcom/write.c @@ -26,7 +26,7 @@ #include "encoding.h" #include "tag_data.h" #include "buffer.h" -#include "utf8.h" +#include "utf8tools.h" #include #include #include @@ -34,10 +34,11 @@ #define MAXWRITELEN MAXGEDCLINELEN -const char* encoding = "ASCII"; -int write_encoding_details = ONE_BYTE; /* SYS_NEWLINE is defined in config.h */ -const char* write_terminator = SYS_NEWLINE; +struct encoding_state write_encoding = +{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE }; +Enc_from write_encoding_from = ENC_FROM_FILE; +Enc_from write_terminator_from = ENC_FROM_SYS; struct Gedcom_write_struct { int filedesc; @@ -79,7 +80,8 @@ void cleanup_convert_at_buffer() } int write_simple(Gedcom_write_hndl hndl, - int level, char* xref, char* tag, char* value) + int level, const char* xref, const char* tag, + const char* value) { int res; @@ -119,31 +121,41 @@ int write_simple(Gedcom_write_hndl hndl, return 0; } +int write_encoding_value(Gedcom_write_hndl hndl, + int level, char* xref, char* tag, char* value) +{ + if (strcmp(value, write_encoding.charset)) + gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"), + write_encoding.charset); + return write_simple(hndl, level, xref, tag, write_encoding.charset); +} + int supports_continuation(int elt_or_rec, int which_continuation) { return tag_data[elt_or_rec].options & which_continuation; } int write_long(Gedcom_write_hndl hndl, int elt_or_rec, - int level, char* xref, char* tag, char* value) + int level, const char* xref, const char* tag, const char* value) { - int prefix_len, value_len, term_len; - char* nl_pos = strchr(value, '\n'); + int prefix_len, value_len = 0, term_len; + char* nl_pos = NULL; + if (value) nl_pos = strchr(value, '\n'); prefix_len = utf8_strlen(tag) + 3; /* for e.g. "0 INDI " */ if (level > 9) prefix_len++; if (xref) prefix_len += utf8_strlen(xref) + 1; - value_len = utf8_strlen(value); + if (value) value_len = utf8_strlen(value); term_len = strlen(hndl->term); if (!nl_pos && prefix_len + value_len + term_len <= MAXWRITELEN) write_simple(hndl, level, xref, tag, value); else { - char* value_ptr = value; + const char* value_ptr = value; int cont_supported = supports_continuation(elt_or_rec, OPT_CONT); int cont_as_conc = supports_continuation(elt_or_rec, OPT_CONT_AS_CONC); if (nl_pos && !cont_supported) { - gedcom_error (_("The tag %s doesn't support newlines\n"), tag); + gedcom_error (_("The tag %s doesn't support newlines"), tag); return 1; } else { @@ -189,43 +201,80 @@ int write_long(Gedcom_write_hndl hndl, int elt_or_rec, return 0; } -int gedcom_write_set_encoding(const char* charset, +int gedcom_write_set_encoding(Enc_from from, const char* new_charset, Encoding width, Enc_bom bom) { char* new_encoding = NULL; - if (!strcmp(charset, "UNICODE")) { - if (width == ONE_BYTE) { - gedcom_error(_("Unicode cannot be encoded into one byte")); - return 1; + if (from == ENC_FROM_SYS) { + return 1; + } + write_encoding_from = from; + if (from == ENC_MANUAL) { + if (!strcmp(new_charset, "UNICODE")) { + if (width == ONE_BYTE) { + gedcom_error(_("Unicode cannot be encoded into one byte")); + return 1; + } + else { + new_encoding = get_encoding(new_charset, width); + if (new_encoding) { + write_encoding.encoding = new_encoding; + write_encoding.width = width; + write_encoding.bom = bom; + strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); + } + else + return 1; + } } else { - new_encoding = get_encoding(charset, width); + new_encoding = get_encoding(new_charset, ONE_BYTE); if (new_encoding) { - encoding = new_encoding; - write_encoding_details = width | bom; + write_encoding.encoding = new_encoding; + write_encoding.width = ONE_BYTE; + write_encoding.bom = bom; + strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN); } else return 1; } } - else { - new_encoding = get_encoding(charset, ONE_BYTE); - if (new_encoding) { - encoding = new_encoding; - write_encoding_details = ONE_BYTE; - } - else - return 1; - } return 0; } -int gedcom_write_set_line_terminator(Enc_line_end end) +void copy_write_encoding_from_file() +{ + if (read_encoding.charset[0] != '\0') { + strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN); + write_encoding.encoding = read_encoding.encoding; + write_encoding.width = read_encoding.width; + write_encoding.bom = read_encoding.bom; + } +} + +int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end) { - write_terminator = terminator[end]; + const char* new_term = NULL; + write_terminator_from = from; + if (from == ENC_FROM_SYS) { + new_term = SYS_NEWLINE; + } + else if (from == ENC_MANUAL) { + new_term = terminator[end]; + } + if (new_term) + strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN); return 0; } +void copy_write_terminator_from_file() +{ + if (read_encoding.terminator[0] != '\0') { + strncpy(write_encoding.terminator, read_encoding.terminator, + MAX_TERMINATOR_LEN); + } +} + Gedcom_write_hndl gedcom_write_open(const char *filename) { Gedcom_write_hndl hndl; @@ -235,11 +284,15 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) if (!hndl) MEMORY_ERROR; else { + if (write_encoding_from == ENC_FROM_FILE) + copy_write_encoding_from_file(); + if (write_terminator_from == ENC_FROM_FILE) + copy_write_terminator_from_file(); hndl->total_conv_fails = 0; - hndl->conv = initialize_utf8_conversion(encoding, 0); + hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0); if (!hndl->conv) { gedcom_error(_("Could not open encoding '%s' for writing: %s"), - encoding, strerror(errno)); + write_encoding.encoding, strerror(errno)); free(hndl); hndl = NULL; } @@ -253,15 +306,17 @@ Gedcom_write_hndl gedcom_write_open(const char *filename) hndl = NULL; } else { - hndl->term = write_terminator; + hndl->term = write_encoding.terminator; hndl->ctxt_level = -1; - if (write_encoding_details & WITH_BOM) { - if (write_encoding_details & TWO_BYTE_HILO) + if (write_encoding.bom == WITH_BOM) { + if (write_encoding.width == TWO_BYTE_HILO) write(hndl->filedesc, "\xFE\xFF", 2); - else if (write_encoding_details & TWO_BYTE_LOHI) + else if (write_encoding.width == TWO_BYTE_LOHI) write(hndl->filedesc, "\xFF\xFE", 2); + else if (!strcmp(write_encoding.encoding, "UTF-8")) + write(hndl->filedesc, "\xEF\xBB\xBF", 3); else - gedcom_warning(_("Byte order mark configured, but no Unicode")); + gedcom_warning(_("Byte order mark configured, but not relevant")); } } } @@ -297,7 +352,7 @@ char* get_tag_string(int elt_or_rec, int tag) } } else { - gedcom_error(_("The element or record type '%s' requires a specific tag" + gedcom_error(_("The element or record type '%s' requires a specific tag " "for writing"), tag_data[elt_or_rec].elt_name); return NULL; @@ -369,7 +424,9 @@ int _gedcom_write_val(Gedcom_write_hndl hndl, tag_str = get_tag_string(rec_or_elt, tag); level = get_level(hndl, rec_or_elt, parent_rec_or_elt); if (tag_str && (level != -1)) { - if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC|OPT_CONT_AS_CONC)) + if (rec_or_elt == ELT_HEAD_CHAR) + result = write_encoding_value(hndl, level, xrefstr, tag_str, val); + else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC)) result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val); else result = write_simple(hndl, level, xrefstr, tag_str, val); @@ -379,12 +436,11 @@ int _gedcom_write_val(Gedcom_write_hndl hndl, } int gedcom_write_record_str(Gedcom_write_hndl hndl, - Gedcom_rec rec, int tag, - char* xrefstr, char* val) + Gedcom_rec rec, char* xrefstr, char* val) { int result = 1; if (check_type(rec, (val ? GV_CHAR_PTR : GV_NULL))) - result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, convert_at(val)); + result = _gedcom_write_val(hndl, rec, 0, -1, xrefstr, convert_at(val)); return result; } @@ -399,16 +455,6 @@ int gedcom_write_element_str(Gedcom_write_hndl hndl, return result; } -int gedcom_write_record_xref(Gedcom_write_hndl hndl, - Gedcom_rec rec, int tag, - char* xrefstr, struct xref_value* val) -{ - int result = 1; - if (check_type(rec, (val ? GV_XREF_PTR : GV_NULL))) - result = _gedcom_write_val(hndl, rec, tag, -1, xrefstr, val->string); - return result; -} - int gedcom_write_element_xref(Gedcom_write_hndl hndl, Gedcom_elt elt, int tag, int parent_rec_or_elt, struct xref_value* val)