Libiconv changes for ANSEL encoding.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Fri, 4 Oct 2002 19:18:33 +0000 (19:18 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Fri, 4 Oct 2002 19:18:33 +0000 (19:18 +0000)
13 files changed:
iconv/libiconv/ChangeLog.gedcom [new file with mode: 0644]
iconv/libiconv/Makefile.in.patch [new file with mode: 0644]
iconv/libiconv/NEWS.gedcom [new file with mode: 0644]
iconv/libiconv/README.patch [new file with mode: 0644]
iconv/libiconv/ansi_z39_47.h [new file with mode: 0644]
iconv/libiconv/configure.in.patch [new file with mode: 0644]
iconv/libiconv/configure.patch [new file with mode: 0644]
iconv/libiconv/converters.h.patch [new file with mode: 0644]
iconv/libiconv/encodings.def.patch [new file with mode: 0644]
iconv/libiconv/get_patches [new file with mode: 0755]
iconv/libiconv/iconv_open.3.html.patch [new file with mode: 0644]
iconv/libiconv/iconv_open.3.patch [new file with mode: 0644]
iconv/libiconv/patch_dist [new file with mode: 0755]

diff --git a/iconv/libiconv/ChangeLog.gedcom b/iconv/libiconv/ChangeLog.gedcom
new file mode 100644 (file)
index 0000000..f16b07d
--- /dev/null
@@ -0,0 +1,5 @@
+2002-10-04  Peter Verthez  <Peter.Verthez@advalvas.be>
+
+       * lib/ansi_z39_47.h, lib/ANSI_Z39.47-tables.h: Conversion from ANSEL
+       to UTF-8, based on libiconv 1.8.
+
diff --git a/iconv/libiconv/Makefile.in.patch b/iconv/libiconv/Makefile.in.patch
new file mode 100644 (file)
index 0000000..49769cb
--- /dev/null
@@ -0,0 +1,11 @@
+--- Makefile.in.orig   Thu May 23 13:48:42 2002
++++ Makefile.in        Fri Oct  4 20:36:35 2002
+@@ -39,7 +39,7 @@
+ # Before making a release, change this according to the libtool documentation,
+ # section "Library interface versions".
+-LIBICONV_VERSION_INFO = 3:0:1
++LIBICONV_VERSION_INFO = 3:1:1
+ # Needed by $(LIBTOOL).
+ top_builddir = ..
diff --git a/iconv/libiconv/NEWS.gedcom b/iconv/libiconv/NEWS.gedcom
new file mode 100644 (file)
index 0000000..4034c3a
--- /dev/null
@@ -0,0 +1,2 @@
+New in 1.8.1:
+* Added ANSEL converter.
diff --git a/iconv/libiconv/README.patch b/iconv/libiconv/README.patch
new file mode 100644 (file)
index 0000000..bebd6fe
--- /dev/null
@@ -0,0 +1,35 @@
+--- README.orig        Wed May 29 16:06:52 2002
++++ README     Fri Oct  4 20:36:35 2002
+@@ -12,6 +12,7 @@
+         Mac{Roman,CentralEurope,Iceland,Croatian,Romania},
+         Mac{Cyrillic,Ukraine,Greek,Turkish},
+         Macintosh
++      ANSEL
+     Semitic languages
+         ISO-8859-{6,8}, CP{1255,1256}, CP862, Mac{Hebrew,Arabic}
+     Japanese
+@@ -132,15 +133,22 @@
+     recompiled. Just set the LD_PRELOAD environment variable, that's it!
+-Distribution:
++Distribution of original package (libiconv):
+     ftp://ftp.gnu.org/pub/gnu/libiconv/libiconv-1.8.tar.gz
+     ftp://ftp.ilog.fr/pub/Users/haible/gnu/libiconv-1.8.tar.gz
+-Homepage:
++Homepage of original package (libiconv):
+     http://www.gnu.org/software/libiconv/
++Distribution of modified package (libiconv-gedcom):
++    http://sourceforge.net/projects/gedcom-parse
++
+ Bug reports to:
+     <bug-gnu-libiconv@gnu.org>
++    For specific ANSEL bugs, please use the web interface at SourceForge
++    (link given above)
+ Bruno Haible <brunoe@clisp.org>
++
++Modifications: Peter Verthez <peter.verthez@advalvas.be>
diff --git a/iconv/libiconv/ansi_z39_47.h b/iconv/libiconv/ansi_z39_47.h
new file mode 100644 (file)
index 0000000..09ab4bb
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 1999-2002 Free Software Foundation, Inc.
+ * This file is part of the GNU LIBICONV Library.
+ *
+ * The GNU LIBICONV Library is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * The GNU LIBICONV Library is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
+ * Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * ANSI_Z39.47
+ */
+
+#include "ANSI_Z39.47-tables.h"
+#include <stdio.h>
+
+/* Omit first half of table: assume identity mapping (ASCII) */
+static const unsigned short ansi_z39_47_2uni[128] = TABLE_TO_UCS4_BASIC;
+
+/* The outer array range runs from 0xe0 to 0xfe, the inner range from 0x20
+   to 0x7f.  */
+static const unsigned short ansi_z39_47_2uni_comb[31][96] =
+{
+  /* 0xe0 (hook above) */                           TABLE_TO_UCS4_COMBINING_E0,
+  /* 0xe1 (grave) */                                TABLE_TO_UCS4_COMBINING_E1,
+  /* 0xe2 (acute) */                                TABLE_TO_UCS4_COMBINING_E2,
+  /* 0xe3 (circumflex) */                           TABLE_TO_UCS4_COMBINING_E3,
+  /* 0xe4 (tilde) */                                TABLE_TO_UCS4_COMBINING_E4,
+  /* 0xe5 (macron) */                               TABLE_TO_UCS4_COMBINING_E5,
+  /* 0xe6 (breve) */                                TABLE_TO_UCS4_COMBINING_E6,
+  /* 0xe7 (dot above) */                            TABLE_TO_UCS4_COMBINING_E7,
+  /* 0xe8 (umlaut, diaeresis) */                    TABLE_TO_UCS4_COMBINING_E8,
+  /* 0xe9 (caron, hacek) */                         TABLE_TO_UCS4_COMBINING_E9,
+  /* 0xea (ring above) */                           TABLE_TO_UCS4_COMBINING_EA,
+  /* 0xeb (ligature, left half) */                  TABLE_TO_UCS4_COMBINING_EB,
+  /* 0xec (ligature, right half) */                 TABLE_TO_UCS4_COMBINING_EC,
+  /* 0xed (comma above right) */                    TABLE_TO_UCS4_COMBINING_ED,
+  /* 0xee (double acute) */                         TABLE_TO_UCS4_COMBINING_EE,
+  /* 0xef (candrabindu) */                          TABLE_TO_UCS4_COMBINING_EF,
+  /* 0xf0 (cedilla) */                              TABLE_TO_UCS4_COMBINING_F0,
+  /* 0xf1 (ogonek, right hook) */                   TABLE_TO_UCS4_COMBINING_F1,
+  /* 0xf2 (dot below) */                            TABLE_TO_UCS4_COMBINING_F2,
+  /* 0xf3 (double dot below) */                     TABLE_TO_UCS4_COMBINING_F3,
+  /* 0xf4 (ring below) */                           TABLE_TO_UCS4_COMBINING_F4,
+  /* 0xf5 (double low line) */                      TABLE_TO_UCS4_COMBINING_F5,
+  /* 0xf6 (line below) */                           TABLE_TO_UCS4_COMBINING_F6,
+  /* 0xf7 (comma below, left hook) */               TABLE_TO_UCS4_COMBINING_F7,
+  /* 0xf8 (left half ring below, right cedilla) */  TABLE_TO_UCS4_COMBINING_F8,
+  /* 0xf9 (breve below, half circle below) */       TABLE_TO_UCS4_COMBINING_F9,
+  /* 0xfa (double tilde, left half) */              TABLE_TO_UCS4_COMBINING_FA,
+  /* 0xfb (double tilde, right half) */             TABLE_TO_UCS4_COMBINING_FB,
+  /* 0xfc */                                        TABLE_TO_UCS4_COMBINING_FC,
+  /* 0xfd */                                        TABLE_TO_UCS4_COMBINING_FD,
+  /* 0xfe (comma above, high centered comma) */     TABLE_TO_UCS4_COMBINING_FE,
+};
+
+#define BASE_PASSED 0x10000
+
+static int
+ansi_z39_47_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
+{
+  unsigned char c = *s;
+  ucs4_t last_wc = conv->istate;
+  int retval = 1;
+  if (last_wc & BASE_PASSED) {
+    /* base character was already output, reset the state and output the
+       diacritical mark */
+    unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
+    *pwc = ansi_z39_47_2uni[dc-0x80];
+    conv->istate = 0;
+    return 1;
+  }
+  if (last_wc) {
+    conv->istate |= BASE_PASSED;
+    retval = 0;
+  }
+  if (c < 0x80) {
+    if (last_wc && c >= 0x20) {
+      /* Check if we can combine the character with the diacritical mark */
+      unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED);
+      unsigned short wc = ansi_z39_47_2uni_comb[dc-0xe0][c-0x20];
+      if (wc != 0x0000) {
+       *pwc = (ucs4_t) wc;
+       conv->istate = 0;
+       return 1;
+      }
+    }
+    *pwc = (ucs4_t) c;
+    return retval;
+  }
+  else if (c < 0xe0) {
+    unsigned short wc = ansi_z39_47_2uni[c-0x80];
+    if (wc != 0x0000) {
+      *pwc = (ucs4_t) wc;
+      return retval;
+    }
+  }
+  else {
+    /* The range from 0xe0 to 0xfe are diacritical marks.
+       Note that in ANSEL they come *before* the base characters, in Unicode,
+       they come *after*, so we have to buffer them ... */
+    conv->istate = (state_t)c;
+    return RET_TOOFEW(1);
+  }
+  return RET_ILSEQ;
+}
+
+static const unsigned char ansi_z39_47_page080[][2] = TABLE_FROM_UCS4_BASIC;
+static const unsigned char ansi_z39_47_page01a[][2] = TABLE_FROM_UCS4_PAGE_01A;
+static const unsigned char ansi_z39_47_page022[][2] = TABLE_FROM_UCS4_PAGE_022;
+static const unsigned char ansi_z39_47_page02b[][2] = TABLE_FROM_UCS4_PAGE_02B;
+static const unsigned char ansi_z39_47_page030[][2] = TABLE_FROM_UCS4_PAGE_030;
+static const unsigned char ansi_z39_47_page1ea[][2] = TABLE_FROM_UCS4_PAGE_1EA;
+static const unsigned char ansi_z39_47_page200[][2] = TABLE_FROM_UCS4_PAGE_200;
+static const unsigned char ansi_z39_47_page211[][2] = TABLE_FROM_UCS4_PAGE_211;
+static const unsigned char ansi_z39_47_page266[][2] = TABLE_FROM_UCS4_PAGE_266;
+static const unsigned char ansi_z39_47_pagefe2[][2] = TABLE_FROM_UCS4_PAGE_FE2;
+
+static int
+ansi_z39_47_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
+{
+  const unsigned char* ch = NULL;
+  int output = 0;
+  
+#define OUTPUT(c)  ++output; if (n < output) return RET_TOOSMALL; *(r++) = (c);
+
+  /* Since in UTF-8 diacritical marks come after the base character and in
+     ANSEL before, we need to buffer possible base characters (0x20 to 0x7f)
+     to put the diacritical mark before it if there is one following */
+  if (wc < 0x0080) {
+    if (conv->ostate) {
+      OUTPUT(conv->ostate);
+      conv->ostate = 0;
+    }
+    if (wc >= 0x0020) {
+      conv->ostate = (state_t) wc;
+    }
+    else {
+      OUTPUT(wc);
+    }
+    return output;
+  }
+  else if (wc >= 0x0080 && wc < 0x017f)
+    ch = ansi_z39_47_page080[wc-0x0080];
+  else if (wc >= 0x01a0 && wc < 0x01b4)
+    ch = ansi_z39_47_page01a[wc-0x01a0];
+  else if (wc >= 0x0220 && wc < 0x0234)
+    ch = ansi_z39_47_page022[wc-0x0220];
+  else if (wc >= 0x02b0 && wc < 0x02e2)
+    ch = ansi_z39_47_page02b[wc-0x02b0];
+  else if (wc >= 0x0300 && wc < 0x0337)
+    ch = ansi_z39_47_page030[wc-0x0300];
+  else if (wc >= 0x1ea0 && wc < 0x1efa)
+    ch = ansi_z39_47_page1ea[wc-0x1ea0];
+  else if (wc >= 0x2000 && wc < 0x200f)
+    ch = ansi_z39_47_page200[wc-0x2000];
+  else if (wc >= 0x2110 && wc < 0x211a)
+    ch = ansi_z39_47_page211[wc-0x2110];
+  else if (wc >= 0x2660 && wc < 0x2674)
+    ch = ansi_z39_47_page266[wc-0x2660];
+  else if (wc >= 0xfe20 && wc < 0xfe25)
+    ch = ansi_z39_47_pagefe2[wc-0xfe20];
+  if (ch && ch[0] != 0) {
+    if (ch[1] == 0 && ch[0] >= 0xe0 && ch[0] <= 0xfe) {
+      /* Diacritical mark following a base character, buffered in ostate */
+      /* Output diacritical mark, then base character */
+      if (conv->ostate) {
+       OUTPUT(ch[0]);
+       OUTPUT(conv->ostate);
+       conv->ostate = 0;
+      }
+      else
+       return RET_ILUNI;
+    }
+    else {
+      if (conv->ostate) {
+       OUTPUT(conv->ostate);
+       conv->ostate = 0;
+      }
+      OUTPUT(ch[0]);
+    }
+    if (ch[1] != 0) {
+      OUTPUT(ch[1]);
+    }
+    return output;
+  }
+  return RET_ILUNI;
+}
diff --git a/iconv/libiconv/configure.in.patch b/iconv/libiconv/configure.in.patch
new file mode 100644 (file)
index 0000000..7bd3b38
--- /dev/null
@@ -0,0 +1,11 @@
+--- configure.in.orig  Fri Oct  4 20:46:54 2002
++++ configure.in       Fri Oct  4 20:47:02 2002
+@@ -18,7 +18,7 @@
+ AC_PREREQ(2.52)
+-AC_INIT(libiconv,1.8)
++AC_INIT(libiconv-gedcom,1.8.1)
+ PACKAGE=$PACKAGE_NAME
+ VERSION=$PACKAGE_VERSION
diff --git a/iconv/libiconv/configure.patch b/iconv/libiconv/configure.patch
new file mode 100644 (file)
index 0000000..71b9833
--- /dev/null
@@ -0,0 +1,17 @@
+--- configure.orig     Fri May 24 21:23:09 2002
++++ configure  Fri Oct  4 20:36:35 2002
+@@ -302,10 +302,10 @@
+ mandir='${prefix}/man'
+ # Identity of this package.
+-PACKAGE_NAME='libiconv'
+-PACKAGE_TARNAME='libiconv'
+-PACKAGE_VERSION='1.8'
+-PACKAGE_STRING='libiconv 1.8'
++PACKAGE_NAME='libiconv-gedcom'
++PACKAGE_TARNAME='libiconv-gedcom'
++PACKAGE_VERSION='1.8.1'
++PACKAGE_STRING='libiconv-gedcom 1.8.1'
+ PACKAGE_BUGREPORT=''
+ ac_prev=
diff --git a/iconv/libiconv/converters.h.patch b/iconv/libiconv/converters.h.patch
new file mode 100644 (file)
index 0000000..9af7a80
--- /dev/null
@@ -0,0 +1,10 @@
+--- converters.h.orig  Wed May 29 16:06:52 2002
++++ converters.h       Fri Oct  4 20:36:35 2002
+@@ -172,6 +172,7 @@
+ #include "mac_thai.h"
+ #include "hp_roman8.h"
+ #include "nextstep.h"
++#include "ansi_z39_47.h"
+ #include "armscii_8.h"
+ #include "georgian_academy.h"
+ #include "georgian_ps.h"
diff --git a/iconv/libiconv/encodings.def.patch b/iconv/libiconv/encodings.def.patch
new file mode 100644 (file)
index 0000000..01726d9
--- /dev/null
@@ -0,0 +1,15 @@
+--- encodings.def.orig Wed May 29 16:06:53 2002
++++ encodings.def      Fri Oct  4 20:36:35 2002
+@@ -527,6 +527,12 @@
+             nextstep,
+             { nextstep_mbtowc, NULL },    { nextstep_wctomb, NULL })
++DEFENCODING(( "ANSI_Z39.47",           /* MARC-21 character set */
++              "ANSEL",
++            ),
++            ansi_z39_47,
++           { ansi_z39_47_mbtowc, NULL },  { ansi_z39_47_wctomb, NULL })
++
+ /* Regional 8-bit encodings used for a single language */
+ DEFENCODING(( "ARMSCII-8",
diff --git a/iconv/libiconv/get_patches b/iconv/libiconv/get_patches
new file mode 100755 (executable)
index 0000000..18d3c03
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+tarfile=`ls libiconv-*.tar.gz`
+topdir=`pwd`
+libiconvdir=`basename $tarfile .tar.gz`
+libiconvdir="$topdir/$libiconvdir"
+
+#############################################################################
+echo "Getting patches in top directory..."
+cd $libiconvdir
+for file in configure.in configure README
+do
+  echo "  $file"
+  diff -u $file.orig $file > $topdir/$file.patch
+done
+
+#############################################################################
+echo "Getting patches in lib subdirectory..."
+cd $libiconvdir/lib
+for file in encodings.def converters.h Makefile.in
+do
+  echo "  $file"
+  diff -u $file.orig $file > $topdir/$file.patch
+done
+
+#############################################################################
+echo "Getting patches in man subdirectory..."
+cd $libiconvdir/man
+for file in iconv_open.3 iconv_open.3.html
+do
+  echo "  $file"
+  diff -u $file.orig $file > $topdir/$file.patch
+done
diff --git a/iconv/libiconv/iconv_open.3.html.patch b/iconv/libiconv/iconv_open.3.html.patch
new file mode 100644 (file)
index 0000000..fe79f80
--- /dev/null
@@ -0,0 +1,12 @@
+--- iconv_open.3.html.orig     Wed May 29 16:17:03 2002
++++ iconv_open.3.html  Fri Oct  4 20:36:35 2002
+@@ -71,7 +71,8 @@
+ KOI8-U, KOI8-RU, CP{1250,1251,1252,1253,1254,1257},
+ CP{850,866},
+ Mac{Roman,CentralEurope,Iceland,Croatian,Romania},
+-Mac{Cyrillic,Ukraine,Greek,Turkish}, Macintosh</td></table>
++Mac{Cyrillic,Ukraine,Greek,Turkish}, Macintosh,
++ANSEL</td></table>
+ <table width="100%" border=0 rules="none" frame="void"
+        cols="2" cellspacing="0" cellpadding="0">
diff --git a/iconv/libiconv/iconv_open.3.patch b/iconv/libiconv/iconv_open.3.patch
new file mode 100644 (file)
index 0000000..a4f2c89
--- /dev/null
@@ -0,0 +1,10 @@
+--- iconv_open.3.orig  Wed May 29 16:06:53 2002
++++ iconv_open.3       Fri Oct  4 20:36:35 2002
+@@ -36,6 +36,7 @@
+ Mac{Roman,CentralEurope,Iceland,Croatian,Romania},
+ Mac{Cyrillic,Ukraine,Greek,Turkish},
+ Macintosh
++ANSEL
+ .TP
+ Semitic languages
+ .nf
diff --git a/iconv/libiconv/patch_dist b/iconv/libiconv/patch_dist
new file mode 100755 (executable)
index 0000000..4f9d3fc
--- /dev/null
@@ -0,0 +1,100 @@
+#!/bin/sh
+
+expected_version=1.8
+tarfile="libiconv-$expected_version.tar.gz"
+topdir=`pwd`
+libiconvdir=`basename $tarfile .tar.gz`
+libiconvdir="$topdir/$libiconvdir"
+
+#############################################################################
+echo "Checking tar file..."
+if [ ! -r $tarfile ]
+then
+  echo "Base package '$tarfile' not found"
+  if [ `which wget` ]
+  then
+    echo -n "Press ENTER to automatically download it..."
+    read
+    wget "ftp://ftp.gnu.org/pub/gnu/libiconv/$tarfile"
+  else
+    exit 1
+  fi
+fi
+
+#############################################################################
+echo "Unpacking tar file..."
+if [ -d $libiconvdir ]
+then
+  rm -rf $libiconvdir
+fi
+
+tar xvfz $tarfile > /dev/null
+
+#############################################################################
+echo "Adding top level files..."
+cd $libiconvdir
+cp $topdir/ChangeLog.gedcom .
+cp $topdir/NEWS.gedcom .
+
+#############################################################################
+echo "Creating header file..."
+cd $libiconvdir/lib
+ln -s $topdir/ansi_z39_47.h .
+ln -s $topdir/../ANSI_Z39.47-tables.h .
+
+#############################################################################
+echo "Patching files in top directory..."
+cd $libiconvdir
+for file in configure.in configure README
+do
+  echo "  $file"
+  patch -s -b -p0 < $topdir/$file.patch
+done
+
+#############################################################################
+echo "Patching files in lib subdirectory..."
+cd $libiconvdir/lib
+for file in encodings.def converters.h Makefile.in
+do
+  echo "  $file"
+  patch -s -b -p0 < $topdir/$file.patch
+done
+
+
+#############################################################################
+echo "Patching files in man subdirectory..."
+cd $libiconvdir/man
+for file in iconv_open.3 iconv_open.3.html
+do
+  echo "  $file"
+  patch -s -b -p0 < $topdir/$file.patch
+done
+
+#############################################################################
+echo "Generating flags..."
+cd $libiconvdir/lib
+gcc -o genflags genflags.c
+./genflags > flags.h
+
+#############################################################################
+echo "Generating aliases..."
+gcc -o genaliases genaliases.c
+./genaliases > aliases.gperf
+
+#############################################################################
+echo "Generating hash function..."
+gperf -t -L ANSI-C -H aliases_hash -N aliases_lookup -G -W aliases -7 -C -k '1,3-11,$' -i 1 aliases.gperf > aliases.h
+
+#############################################################################
+echo "Making distribution..."
+
+package_name=`grep 'PACKAGE_NAME=' $libiconvdir/configure`
+package_name=`echo $package_name | sed "s/^PACKAGE_NAME='//" | sed "s/'$//"`
+
+package_ver=`grep 'PACKAGE_VERSION=' $libiconvdir/configure`
+package_ver=`echo $package_ver | sed "s/^PACKAGE_VERSION='//" | sed "s/'$//"`
+
+cd $topdir
+cp -RL $libiconvdir $package_name-$package_ver
+tar cvfz $package_name-$package_ver.tar.gz $package_name-$package_ver >/dev/null
+rm -rf $package_name-$package_ver