X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fmultilex.c;h=393516adc8fdde69a30ad81e005b708cd1dedc33;hb=60282b9f2f1326231ea5019e239d00bdccd6608b;hp=420bfcb8b1c88b7a684328313f85055601df6af2;hpb=32da62601457ba994c6b71d71470ae066fc3969b;p=gedcom-parse.git

diff --git a/gedcom/multilex.c b/gedcom/multilex.c
index 420bfcb..393516a 100644
--- a/gedcom/multilex.c
+++ b/gedcom/multilex.c
@@ -24,6 +24,7 @@
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "xref.h"
 
 int line_no = 0;
@@ -38,19 +39,19 @@ int lexer_init(Encoding enc, FILE* f)
   if (enc == ONE_BYTE) {
     lf  = &gedcom_1byte_lex;
     gedcom_1byte_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("ASCII");
   }
   else if (enc == TWO_BYTE_HILO) {
     lf  = &gedcom_hilo_lex;
     gedcom_hilo_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("UNICODE");
   }
   else if (enc == TWO_BYTE_LOHI) {
     lf  = &gedcom_lohi_lex;
     gedcom_lohi_myinit(f);
-    set_encoding_width(enc);
+    set_read_encoding_width(enc);
     return open_conv_to_internal("UNICODE");
   }
   else {
@@ -79,6 +80,7 @@ int determine_encoding(FILE* f)
   char first[2];
   int read;
 
+  set_read_encoding_bom(WITHOUT_BOM);
   read = fread(first, 1, 2, f);
   if (read != 2) {
     gedcom_warning(_("Error reading from input file: %s"), strerror(errno));
@@ -86,26 +88,28 @@ int determine_encoding(FILE* f)
     return ONE_BYTE;
   }
   else if ((first[0] == '0') && (first[1] == ' ')) {
-    gedcom_debug_print(_("One-byte encoding"));
+    gedcom_debug_print("One-byte encoding");
     rewind_file(f);
     return ONE_BYTE;
   }
   else if ((first[0] == '\0') && (first[1] == '0')) {
-    gedcom_debug_print(_("Two-byte encoding, high-low"));
+    gedcom_debug_print("Two-byte encoding, high-low");
     rewind_file(f);
     return TWO_BYTE_HILO;
   }
   else if ((first[0] == '\xFE') && (first[1] == '\xFF')) {
-    gedcom_debug_print(_("Two-byte encoding, high-low, with BOM"));
+    gedcom_debug_print("Two-byte encoding, high-low, with BOM");
+    set_read_encoding_bom(WITH_BOM);
     return TWO_BYTE_HILO;
   }
   else if ((first[0] == '0') && (first[1] == '\0')) {
-    gedcom_debug_print(_("Two-byte encoding, low-high"));
+    gedcom_debug_print("Two-byte encoding, low-high");
     rewind_file(f);
     return TWO_BYTE_LOHI;
   }
   else if ((first[0] == '\xFF') && (first[1] == '\xFE')) {
-    gedcom_debug_print(_("Two-byte encoding, low-high, with BOM"));
+    gedcom_debug_print("Two-byte encoding, low-high, with BOM");
+    set_read_encoding_bom(WITH_BOM);
     return TWO_BYTE_LOHI;
   }
   else if ((first[0] == '\xEF') && (first[1] == '\xBB')) {
@@ -115,7 +119,12 @@ int determine_encoding(FILE* f)
       rewind_file(f);
     }
     else if (first[0] == '\xBF') {
-      gedcom_debug_print(_("UTF-8 encoding, with BOM"));
+      set_read_encoding_bom(WITH_BOM);
+      gedcom_debug_print("UTF-8 encoding, with BOM");
+    }
+    else {
+      gedcom_warning(_("Unknown encoding, falling back to one-byte"));
+      rewind_file(f);
     }
     return ONE_BYTE;
   }
@@ -128,6 +137,25 @@ int determine_encoding(FILE* f)
 
 int init_called = 0;
 
+/** This function initializes the Gedcom parser library and must be called
+    before any other function in this library.
+
+    The function also initializes locale handling by calling
+    <tt> setlocale(LC_ALL, "") </tt>, in case the application would not do this
+    (it doesn't hurt for the application to do the same).
+
+    \attention This function should be called as early as possible.  The
+    requirement
+    is that it should come before the first call to \c iconv_open (part of the
+    generic character set conversion feature) in the program, either by your
+    program itself, or indirectly by the library calls it makes.
+    \attention Practically,
+    it should e.g. come before any calls to any GTK functions, because GTK
+    uses \c iconv_open in its initialization.
+
+    \retval 0 in case of success
+    \retval nonzero in case of failure (e.g. failure to set locale)
+ */
 int gedcom_init()
 {
   init_called = 1;
@@ -141,6 +169,28 @@ int gedcom_init()
     return 0;
 }
 
+/** This function parses the given file.  By itself, it doesn't provide any
+    other information than the parse result.
+
+    The function also empties the cross-reference table before parsing, and
+    checks the validity of the
+    cross-references if the parse was successful.
+    The following conditions can occur in the cross-reference table:
+      - An xref was defined, but not used (warning)
+      - An xref was used, but not defined (error)
+      - An xref was used as a different type than the defined type (error)
+
+    \param file_name The name of the Gedcom file to parse
+
+    \retval 0 if the parse was successful and no errors were found in the
+    cross-reference table
+    \retval nonzero on errors, which can include:
+            - \ref gedcom_init() was not called
+	    - The given file was not found
+	    - The parse of the given file failed
+	    - There were errors found in the cross-reference table
+ */
+
 int gedcom_parse_file(const char* file_name)
 {
   Encoding enc;
@@ -176,6 +226,15 @@ int gedcom_parse_file(const char* file_name)
   return result;
 }
 
+/** This function starts a new model.  It does this by parsing the \c new.ged
+    file in the data directory of the library (\c $PREFIX/share/gedcom-parse).
+    This can be used to start from an empty model, and to build up the model
+    by adding new records yourself.
+
+    \retval 0 on success
+    \retval nonzero on errors (mainly the errors from \ref gedcom_parse_file()).
+ */
+
 int gedcom_new_model()
 {
   int result = 1;