dlugolecki.net.pl
Dziennik
Polecane
Software
projects
/
gedcom-parse.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Improved context handling, to allow elements out of context.
[gedcom-parse.git]
/
gedcom
/
multilex.c
diff --git
a/gedcom/multilex.c
b/gedcom/multilex.c
index 7ff4efe0fb5eccb2651d3e78c8b84db512bfc1ec..e4128174d5e6393ea497a8b6faa9f0be079a44c2 100644
(file)
--- a/
gedcom/multilex.c
+++ b/
gedcom/multilex.c
@@
-24,6
+24,7
@@
#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
+#include "encoding_state.h"
#include "xref.h"
int line_no = 0;
#include "xref.h"
int line_no = 0;
@@
-38,19
+39,19
@@
int lexer_init(Encoding enc, FILE* f)
if (enc == ONE_BYTE) {
lf = &gedcom_1byte_lex;
gedcom_1byte_myinit(f);
if (enc == ONE_BYTE) {
lf = &gedcom_1byte_lex;
gedcom_1byte_myinit(f);
- set_encoding_width(enc);
+ set_
read_
encoding_width(enc);
return open_conv_to_internal("ASCII");
}
else if (enc == TWO_BYTE_HILO) {
lf = &gedcom_hilo_lex;
gedcom_hilo_myinit(f);
return open_conv_to_internal("ASCII");
}
else if (enc == TWO_BYTE_HILO) {
lf = &gedcom_hilo_lex;
gedcom_hilo_myinit(f);
- set_encoding_width(enc);
+ set_
read_
encoding_width(enc);
return open_conv_to_internal("UNICODE");
}
else if (enc == TWO_BYTE_LOHI) {
lf = &gedcom_lohi_lex;
gedcom_lohi_myinit(f);
return open_conv_to_internal("UNICODE");
}
else if (enc == TWO_BYTE_LOHI) {
lf = &gedcom_lohi_lex;
gedcom_lohi_myinit(f);
- set_encoding_width(enc);
+ set_
read_
encoding_width(enc);
return open_conv_to_internal("UNICODE");
}
else {
return open_conv_to_internal("UNICODE");
}
else {
@@
-79,6
+80,7
@@
int determine_encoding(FILE* f)
char first[2];
int read;
char first[2];
int read;
+ set_read_encoding_bom(WITHOUT_BOM);
read = fread(first, 1, 2, f);
if (read != 2) {
gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
read = fread(first, 1, 2, f);
if (read != 2) {
gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
@@
-86,26
+88,28
@@
int determine_encoding(FILE* f)
return ONE_BYTE;
}
else if ((first[0] == '0') && (first[1] == ' ')) {
return ONE_BYTE;
}
else if ((first[0] == '0') && (first[1] == ' ')) {
- gedcom_debug_print(
_("One-byte encoding")
);
+ gedcom_debug_print(
"One-byte encoding"
);
rewind_file(f);
return ONE_BYTE;
}
else if ((first[0] == '\0') && (first[1] == '0')) {
rewind_file(f);
return ONE_BYTE;
}
else if ((first[0] == '\0') && (first[1] == '0')) {
- gedcom_debug_print(
_("Two-byte encoding, high-low")
);
+ gedcom_debug_print(
"Two-byte encoding, high-low"
);
rewind_file(f);
return TWO_BYTE_HILO;
}
else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
rewind_file(f);
return TWO_BYTE_HILO;
}
else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
- gedcom_debug_print(_("Two-byte encoding, high-low, with BOM"));
+ gedcom_debug_print("Two-byte encoding, high-low, with BOM");
+ set_read_encoding_bom(WITH_BOM);
return TWO_BYTE_HILO;
}
else if ((first[0] == '0') && (first[1] == '\0')) {
return TWO_BYTE_HILO;
}
else if ((first[0] == '0') && (first[1] == '\0')) {
- gedcom_debug_print(
_("Two-byte encoding, low-high")
);
+ gedcom_debug_print(
"Two-byte encoding, low-high"
);
rewind_file(f);
return TWO_BYTE_LOHI;
}
else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
rewind_file(f);
return TWO_BYTE_LOHI;
}
else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
- gedcom_debug_print(_("Two-byte encoding, low-high, with BOM"));
+ gedcom_debug_print("Two-byte encoding, low-high, with BOM");
+ set_read_encoding_bom(WITH_BOM);
return TWO_BYTE_LOHI;
}
else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
return TWO_BYTE_LOHI;
}
else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
@@
-115,7
+119,8
@@
int determine_encoding(FILE* f)
rewind_file(f);
}
else if (first[0] == '\xBF') {
rewind_file(f);
}
else if (first[0] == '\xBF') {
- gedcom_debug_print(_("UTF-8 encoding, with BOM"));
+ set_read_encoding_bom(WITH_BOM);
+ gedcom_debug_print("UTF-8 encoding, with BOM");
}
else {
gedcom_warning(_("Unknown encoding, falling back to one-byte"));
}
else {
gedcom_warning(_("Unknown encoding, falling back to one-byte"));