#include "encoding.h"
#include "tag_data.h"
#include "buffer.h"
-#include "utf8.h"
+#include "utf8tools.h"
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-const char* encoding = "ASCII";
-int write_encoding_details = ONE_BYTE;
+#define MAXWRITELEN MAXGEDCLINELEN
+
/* SYS_NEWLINE is defined in config.h */
-const char* write_terminator = SYS_NEWLINE;
+struct encoding_state write_encoding =
+{ "ASCII", "ASCII", ONE_BYTE, WITHOUT_BOM, SYS_NEWLINE };
+Enc_from write_encoding_from = ENC_FROM_FILE;
+Enc_from write_terminator_from = ENC_FROM_SYS;
struct Gedcom_write_struct {
int filedesc;
};
void cleanup_write_buffer();
-
struct safe_buffer write_buffer = { NULL, 0, NULL, 0, cleanup_write_buffer };
+void cleanup_convert_at_buffer();
+struct safe_buffer convert_at_buffer = { NULL, 0, NULL, 0,
+ cleanup_convert_at_buffer };
+
void cleanup_write_buffer()
{
cleanup_buffer(&write_buffer);
}
+void cleanup_convert_at_buffer()
+{
+ cleanup_buffer(&convert_at_buffer);
+}
+
int write_simple(Gedcom_write_hndl hndl,
- int level, char* xref, char* tag, char* value)
+ int level, const char* xref, const char* tag,
+ const char* value)
{
int res;
res += safe_buf_append(&write_buffer, " %s", value);
res += safe_buf_append(&write_buffer, hndl->term);
- converted = convert_from_utf8(hndl->conv, get_buf_string(&write_buffer),
- &conv_fails, &outlen);
-
- if (converted && (conv_fails == 0))
- write(hndl->filedesc, converted, outlen);
+ if (utf8_strlen(get_buf_string(&write_buffer)) > MAXGEDCLINELEN) {
+ gedcom_error(_("Line too long"));
+ }
else {
- hndl->total_conv_fails += conv_fails;
- gedcom_error
- (_("Error converting output string: %s (%d conversion failures)"),
- strerror(errno), conv_fails);
+ converted = convert_from_utf8(hndl->conv, get_buf_string(&write_buffer),
+ &conv_fails, &outlen);
+
+ if (converted && (conv_fails == 0)) {
+ line_no++;
+ write(hndl->filedesc, converted, outlen);
+ }
+ else {
+ hndl->total_conv_fails += conv_fails;
+ gedcom_error
+ (_("Error converting output string: %s (%d conversion failures)"),
+ strerror(errno), conv_fails);
+ }
}
}
return 0;
}
-int gedcom_write_set_encoding(const char* charset,
- Encoding width, Enc_bom bom)
+int write_encoding_value(Gedcom_write_hndl hndl,
+ int level, char* xref, char* tag, char* value)
{
- char* new_encoding = NULL;
- if (!strcmp(charset, "UNICODE")) {
- if (width == ONE_BYTE) {
- gedcom_error(_("Unicode cannot be encoded into one byte"));
+ if (strcmp(value, write_encoding.charset))
+ gedcom_warning(_("Forcing HEAD.CHAR value to '%s'"),
+ write_encoding.charset);
+ return write_simple(hndl, level, xref, tag, write_encoding.charset);
+}
+
+int supports_continuation(int elt_or_rec, int which_continuation)
+{
+ return tag_data[elt_or_rec].options & which_continuation;
+}
+
+int write_long(Gedcom_write_hndl hndl, int elt_or_rec,
+ int level, const char* xref, const char* tag, const char* value)
+{
+ int prefix_len, value_len = 0, term_len;
+ char* nl_pos = NULL;
+ if (value) nl_pos = strchr(value, '\n');
+
+ prefix_len = utf8_strlen(tag) + 3; /* for e.g. "0 INDI " */
+ if (level > 9) prefix_len++;
+ if (xref) prefix_len += utf8_strlen(xref) + 1;
+ if (value) value_len = utf8_strlen(value);
+ term_len = strlen(hndl->term);
+
+ if (!nl_pos && prefix_len + value_len + term_len <= MAXWRITELEN)
+ write_simple(hndl, level, xref, tag, value);
+ else {
+ const char* value_ptr = value;
+ int cont_supported = supports_continuation(elt_or_rec, OPT_CONT);
+ int cont_as_conc = supports_continuation(elt_or_rec, OPT_CONT_AS_CONC);
+ if (nl_pos && !cont_supported) {
+ gedcom_error (_("The tag %s doesn't support newlines"), tag);
return 1;
}
else {
- new_encoding = get_encoding(charset, width);
- if (new_encoding) {
- encoding = new_encoding;
- write_encoding_details = width | bom;
+ char value_part[MAXWRITELEN];
+ int cont_prefix_len, write_level = level;
+ cont_prefix_len = utf8_strlen("CONT") + 3;
+ if (level + 1 > 9) cont_prefix_len++;
+
+ while (value_ptr) {
+ char* cont_tag = "CONT";
+ int line_len = (nl_pos && cont_supported
+ ? nl_pos - value_ptr : value_len);
+
+ if (prefix_len + line_len + term_len > MAXWRITELEN) {
+ line_len = MAXWRITELEN - prefix_len - term_len;
+ if (!cont_as_conc)
+ cont_tag = "CONC";
+ }
+
+ memset(value_part, 0, sizeof(value_part));
+ strncpy(value_part, value_ptr, line_len);
+ write_simple(hndl, write_level, xref, tag, value_part);
+
+ if (line_len < value_len) {
+ value_ptr = value_ptr + line_len;
+ value_len = value_len - line_len;
+ while (*value_ptr == '\n') {
+ value_ptr++;
+ value_len--;
+ }
+ prefix_len = cont_prefix_len;
+ write_level = level + 1;
+ xref = NULL;
+ tag = cont_tag;
+ nl_pos = strchr(value_ptr, '\n');
+ }
+ else
+ value_ptr = NULL;
}
}
}
- else {
- new_encoding = get_encoding(charset, ONE_BYTE);
- if (new_encoding) {
- encoding = new_encoding;
- write_encoding_details = ONE_BYTE;
+
+ return 0;
+}
+
+int gedcom_write_set_encoding(Enc_from from, const char* new_charset,
+ Encoding width, Enc_bom bom)
+{
+ char* new_encoding = NULL;
+ if (from == ENC_FROM_SYS) {
+ return 1;
+ }
+ write_encoding_from = from;
+ if (from == ENC_MANUAL) {
+ if (!strcmp(new_charset, "UNICODE")) {
+ if (width == ONE_BYTE) {
+ gedcom_error(_("Unicode cannot be encoded into one byte"));
+ return 1;
+ }
+ else {
+ new_encoding = get_encoding(new_charset, width);
+ if (new_encoding) {
+ write_encoding.encoding = new_encoding;
+ write_encoding.width = width;
+ write_encoding.bom = bom;
+ strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+ }
+ else
+ return 1;
+ }
+ }
+ else {
+ new_encoding = get_encoding(new_charset, ONE_BYTE);
+ if (new_encoding) {
+ write_encoding.encoding = new_encoding;
+ write_encoding.width = ONE_BYTE;
+ write_encoding.bom = bom;
+ strncpy(write_encoding.charset, new_charset, MAX_CHARSET_LEN);
+ }
+ else
+ return 1;
}
}
return 0;
}
-int gedcom_write_set_line_terminator(Enc_line_end end)
+void copy_write_encoding_from_file()
+{
+ if (read_encoding.charset[0] != '\0') {
+ strncpy(write_encoding.charset, read_encoding.charset, MAX_CHARSET_LEN);
+ write_encoding.encoding = read_encoding.encoding;
+ write_encoding.width = read_encoding.width;
+ write_encoding.bom = read_encoding.bom;
+ }
+}
+
+int gedcom_write_set_line_terminator(Enc_from from, Enc_line_end end)
{
- write_terminator = terminator[end];
+ const char* new_term = NULL;
+ write_terminator_from = from;
+ if (from == ENC_FROM_SYS) {
+ new_term = SYS_NEWLINE;
+ }
+ else if (from == ENC_MANUAL) {
+ new_term = terminator[end];
+ }
+ if (new_term)
+ strncpy(write_encoding.terminator, new_term, MAX_TERMINATOR_LEN);
return 0;
}
+void copy_write_terminator_from_file()
+{
+ if (read_encoding.terminator[0] != '\0') {
+ strncpy(write_encoding.terminator, read_encoding.terminator,
+ MAX_TERMINATOR_LEN);
+ }
+}
+
Gedcom_write_hndl gedcom_write_open(const char *filename)
{
Gedcom_write_hndl hndl;
if (!hndl)
MEMORY_ERROR;
else {
+ if (write_encoding_from == ENC_FROM_FILE)
+ copy_write_encoding_from_file();
+ if (write_terminator_from == ENC_FROM_FILE)
+ copy_write_terminator_from_file();
hndl->total_conv_fails = 0;
- hndl->conv = initialize_utf8_conversion(encoding, 0);
+ hndl->conv = initialize_utf8_conversion(write_encoding.encoding, 0);
if (!hndl->conv) {
gedcom_error(_("Could not open encoding '%s' for writing: %s"),
- encoding, strerror(errno));
+ write_encoding.encoding, strerror(errno));
free(hndl);
hndl = NULL;
}
hndl = NULL;
}
else {
- hndl->term = write_terminator;
+ hndl->term = write_encoding.terminator;
hndl->ctxt_level = -1;
- if (write_encoding_details & WITH_BOM) {
- if (write_encoding_details & TWO_BYTE_HILO)
+ if (write_encoding.bom == WITH_BOM) {
+ if (write_encoding.width == TWO_BYTE_HILO)
write(hndl->filedesc, "\xFE\xFF", 2);
- else if (write_encoding_details & TWO_BYTE_LOHI)
+ else if (write_encoding.width == TWO_BYTE_LOHI)
write(hndl->filedesc, "\xFF\xFE", 2);
+ else if (!strcmp(write_encoding.encoding, "UTF-8"))
+ write(hndl->filedesc, "\xEF\xBB\xBF", 3);
else
- gedcom_warning(_("Byte order mark configured, but no Unicode"));
+ gedcom_warning(_("Byte order mark configured, but not relevant"));
}
}
}
return result;
}
-char* get_tag_string(int elt_or_rec, char* tag)
+char* get_tag_string(int elt_or_rec, int tag)
{
- char* result = tag_data[elt_or_rec].tag_name;
+ int tagnum = tag_data[elt_or_rec].tag;
+ if (!tagnum) tagnum = tag;
- if (result)
- return result;
- else if (tag)
- return tag;
+ if (tagnum) {
+ if (tagnum >= TAG_NUM_START && tagnum <= TAG_NUM_END)
+ return tag_name[tagnum - TAG_NUM_START];
+ else {
+ gedcom_error(_("Not a valid tag: %d"), tagnum);
+ return NULL;
+ }
+ }
else {
- gedcom_error(_("The element or record type '%s' requires a specific tag"
+ gedcom_error(_("The element or record type '%s' requires a specific tag "
"for writing"),
tag_data[elt_or_rec].elt_name);
return NULL;
return hndl->ctxt_level;
}
-int gedcom_write_record_str(Gedcom_write_hndl hndl,
- Gedcom_rec rec, char* tag,
- struct xref_value* xref, char* val)
+char* convert_at(const char* input)
+{
+ if (input) {
+ const char* ptr = input;
+ reset_buffer(&convert_at_buffer);
+ while (*ptr) {
+ if (*ptr == '@') {
+ SAFE_BUF_ADDCHAR(&convert_at_buffer, '@');
+ SAFE_BUF_ADDCHAR(&convert_at_buffer, '@');
+ }
+ else {
+ SAFE_BUF_ADDCHAR(&convert_at_buffer, *ptr);
+ }
+ ptr++;
+ }
+ return get_buf_string(&convert_at_buffer);
+ }
+ else
+ return NULL;
+}
+
+int _gedcom_write_val(Gedcom_write_hndl hndl,
+ int rec_or_elt, int tag, int parent_rec_or_elt,
+ char* xrefstr, char* val)
{
int result = 1;
int level = 0;
char* tag_str = NULL;
- char* xref_str = NULL;
- tag_str = get_tag_string(rec, tag);
- level = get_level(hndl, rec, -1);
- if (tag_str && check_type(rec, (val ? GV_CHAR_PTR : GV_NULL))) {
- if (xref)
- xref_str = xref->string;
- result = write_simple(hndl, level, xref_str, tag_str, val);
+ tag_str = get_tag_string(rec_or_elt, tag);
+ level = get_level(hndl, rec_or_elt, parent_rec_or_elt);
+ if (tag_str && (level != -1)) {
+ if (rec_or_elt == ELT_HEAD_CHAR)
+ result = write_encoding_value(hndl, level, xrefstr, tag_str, val);
+ else if (supports_continuation(rec_or_elt, OPT_CONT|OPT_CONC))
+ result = write_long(hndl, rec_or_elt, level, xrefstr, tag_str, val);
+ else
+ result = write_simple(hndl, level, xrefstr, tag_str, val);
}
return result;
}
+int gedcom_write_record_str(Gedcom_write_hndl hndl,
+ Gedcom_rec rec, char* xrefstr, char* val)
+{
+ int result = 1;
+ if (check_type(rec, (val ? GV_CHAR_PTR : GV_NULL)))
+ result = _gedcom_write_val(hndl, rec, 0, -1, xrefstr, convert_at(val));
+ return result;
+}
+
int gedcom_write_element_str(Gedcom_write_hndl hndl,
- Gedcom_elt elt, char* tag, int parent_rec_or_elt,
+ Gedcom_elt elt, int tag, int parent_rec_or_elt,
char* val)
{
int result = 1;
- int level = -1;
- char* tag_str = NULL;
+ if (check_type(elt, (val ? GV_CHAR_PTR : GV_NULL)))
+ result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL,
+ convert_at(val));
+ return result;
+}
- tag_str = get_tag_string(elt, tag);
- level = get_level(hndl, elt, parent_rec_or_elt);
- if (tag_str && (level != -1)
- && check_type(elt, (val ? GV_CHAR_PTR : GV_NULL))) {
- result = write_simple(hndl, level, NULL, tag_str, val);
- }
+int gedcom_write_element_xref(Gedcom_write_hndl hndl,
+ Gedcom_elt elt, int tag, int parent_rec_or_elt,
+ struct xref_value* val)
+{
+ int result = 1;
+ if (check_type(elt, (val ? GV_XREF_PTR : GV_NULL)))
+ result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL,
+ val->string);
+ return result;
+}
+
+int gedcom_write_element_date(Gedcom_write_hndl hndl,
+ Gedcom_elt elt, int tag, int parent_rec_or_elt,
+ struct date_value* val)
+{
+ int result = 1;
+ if (check_type(elt, (val ? GV_DATE_VALUE : GV_NULL)))
+ result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL,
+ gedcom_date_to_string(val));
+ return result;
+}
+int gedcom_write_element_age(Gedcom_write_hndl hndl,
+ Gedcom_elt elt, int tag, int parent_rec_or_elt,
+ struct age_value* val)
+{
+ int result = 1;
+ if (check_type(elt, (val ? GV_AGE_VALUE : GV_NULL)))
+ result = _gedcom_write_val(hndl, elt, tag, parent_rec_or_elt, NULL,
+ gedcom_age_to_string(val));
+ return result;
+}
+
+int gedcom_write_user_str(Gedcom_write_hndl hndl, int level, char* tag,
+ char* xrefstr, char* value)
+{
+ int result = 1;
+ if (tag && tag[0] == '_')
+ result = write_simple(hndl, level, xrefstr, tag, convert_at(value));
+ return result;
+}
+
+int gedcom_write_user_xref(Gedcom_write_hndl hndl, int level, char* tag,
+ char* xrefstr, struct xref_value* val)
+{
+ int result = 1;
+ if (tag && tag[0] == '_')
+ result = write_simple(hndl, level, xrefstr, tag, val->string);
return result;
}