dlugolecki.net.pl
Dziennik
Polecane
Software
projects
/
gedcom-parse.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (from parent 1:
ae879a2
)
Recognize the UTF-8 byte order mark.
author
Peter Verthez
<Peter.Verthez@advalvas.be>
Sat, 4 Jan 2003 15:50:18 +0000
(15:50 +0000)
committer
Peter Verthez
<Peter.Verthez@advalvas.be>
Sat, 4 Jan 2003 15:50:18 +0000
(15:50 +0000)
gedcom/multilex.c
patch
|
blob
|
history
diff --git
a/gedcom/multilex.c
b/gedcom/multilex.c
index e8d514cb67296e2777548eeb0ebf80c1ed733ade..420bfcb8b1c88b7a684328313f85055601df6af2 100644
(file)
--- a/
gedcom/multilex.c
+++ b/
gedcom/multilex.c
@@
-68,6
+68,12
@@
int gedcom_lex()
return (*lf)();
}
return (*lf)();
}
+void rewind_file(FILE* f)
+{
+ if (fseek(f, 0, 0) != 0)
+ gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
+}
+
int determine_encoding(FILE* f)
{
char first[2];
int determine_encoding(FILE* f)
{
char first[2];
@@
-76,42
+82,46
@@
int determine_encoding(FILE* f)
read = fread(first, 1, 2, f);
if (read != 2) {
gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
read = fread(first, 1, 2, f);
if (read != 2) {
gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
+ rewind_file(f);
return ONE_BYTE;
}
else if ((first[0] == '0') && (first[1] == ' ')) {
gedcom_debug_print(_("One-byte encoding"));
return ONE_BYTE;
}
else if ((first[0] == '0') && (first[1] == ' ')) {
gedcom_debug_print(_("One-byte encoding"));
- if (fseek(f, 0, 0) != 0)
- gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
+ rewind_file(f);
return ONE_BYTE;
}
return ONE_BYTE;
}
- else if ((first[0] == '\0') && (first[1] == '0'))
- {
+ else if ((first[0] == '\0') && (first[1] == '0')) {
gedcom_debug_print(_("Two-byte encoding, high-low"));
gedcom_debug_print(_("Two-byte encoding, high-low"));
- if (fseek(f, 0, 0) != 0)
- gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
+ rewind_file(f);
return TWO_BYTE_HILO;
}
return TWO_BYTE_HILO;
}
- else if ((first[0] == '\xFE') && (first[1] == '\xFF'))
- {
+ else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
gedcom_debug_print(_("Two-byte encoding, high-low, with BOM"));
return TWO_BYTE_HILO;
}
gedcom_debug_print(_("Two-byte encoding, high-low, with BOM"));
return TWO_BYTE_HILO;
}
- else if ((first[0] == '0') && (first[1] == '\0'))
- {
+ else if ((first[0] == '0') && (first[1] == '\0')) {
gedcom_debug_print(_("Two-byte encoding, low-high"));
gedcom_debug_print(_("Two-byte encoding, low-high"));
- if (fseek(f, 0, 0) != 0)
- gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
+ rewind_file(f);
return TWO_BYTE_LOHI;
}
return TWO_BYTE_LOHI;
}
- else if ((first[0] == '\xFF') && (first[1] == '\xFE'))
- {
+ else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
gedcom_debug_print(_("Two-byte encoding, low-high, with BOM"));
return TWO_BYTE_LOHI;
}
gedcom_debug_print(_("Two-byte encoding, low-high, with BOM"));
return TWO_BYTE_LOHI;
}
+ else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
+ read = fread(first, 1, 1, f);
+ if (read != 1) {
+ gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
+ rewind_file(f);
+ }
+ else if (first[0] == '\xBF') {
+ gedcom_debug_print(_("UTF-8 encoding, with BOM"));
+ }
+ return ONE_BYTE;
+ }
else {
gedcom_warning(_("Unknown encoding, falling back to one-byte"));
else {
gedcom_warning(_("Unknown encoding, falling back to one-byte"));
- if (fseek(f, 0, 0) != 0)
- gedcom_warning(_("Error positioning input file: %s"), strerror(errno));
+ rewind_file(f);
return ONE_BYTE;
}
}
return ONE_BYTE;
}
}