From 2cd5c4dfbd08d35c3a87f6d07e1c3e2e7295aac8 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Fri, 4 Oct 2002 19:18:33 +0000 Subject: [PATCH] Libiconv changes for ANSEL encoding. --- iconv/libiconv/ChangeLog.gedcom | 5 + iconv/libiconv/Makefile.in.patch | 11 ++ iconv/libiconv/NEWS.gedcom | 2 + iconv/libiconv/README.patch | 35 +++++ iconv/libiconv/ansi_z39_47.h | 199 +++++++++++++++++++++++++ iconv/libiconv/configure.in.patch | 11 ++ iconv/libiconv/configure.patch | 17 +++ iconv/libiconv/converters.h.patch | 10 ++ iconv/libiconv/encodings.def.patch | 15 ++ iconv/libiconv/get_patches | 33 ++++ iconv/libiconv/iconv_open.3.html.patch | 12 ++ iconv/libiconv/iconv_open.3.patch | 10 ++ iconv/libiconv/patch_dist | 100 +++++++++++++ 13 files changed, 460 insertions(+) create mode 100644 iconv/libiconv/ChangeLog.gedcom create mode 100644 iconv/libiconv/Makefile.in.patch create mode 100644 iconv/libiconv/NEWS.gedcom create mode 100644 iconv/libiconv/README.patch create mode 100644 iconv/libiconv/ansi_z39_47.h create mode 100644 iconv/libiconv/configure.in.patch create mode 100644 iconv/libiconv/configure.patch create mode 100644 iconv/libiconv/converters.h.patch create mode 100644 iconv/libiconv/encodings.def.patch create mode 100755 iconv/libiconv/get_patches create mode 100644 iconv/libiconv/iconv_open.3.html.patch create mode 100644 iconv/libiconv/iconv_open.3.patch create mode 100755 iconv/libiconv/patch_dist diff --git a/iconv/libiconv/ChangeLog.gedcom b/iconv/libiconv/ChangeLog.gedcom new file mode 100644 index 0000000..f16b07d --- /dev/null +++ b/iconv/libiconv/ChangeLog.gedcom @@ -0,0 +1,5 @@ +2002-10-04 Peter Verthez + + * lib/ansi_z39_47.h, lib/ANSI_Z39.47-tables.h: Conversion from ANSEL + to UTF-8, based on libiconv 1.8. + diff --git a/iconv/libiconv/Makefile.in.patch b/iconv/libiconv/Makefile.in.patch new file mode 100644 index 0000000..49769cb --- /dev/null +++ b/iconv/libiconv/Makefile.in.patch @@ -0,0 +1,11 @@ +--- Makefile.in.orig Thu May 23 13:48:42 2002 ++++ Makefile.in Fri Oct 4 20:36:35 2002 +@@ -39,7 +39,7 @@ + + # Before making a release, change this according to the libtool documentation, + # section "Library interface versions". +-LIBICONV_VERSION_INFO = 3:0:1 ++LIBICONV_VERSION_INFO = 3:1:1 + + # Needed by $(LIBTOOL). + top_builddir = .. diff --git a/iconv/libiconv/NEWS.gedcom b/iconv/libiconv/NEWS.gedcom new file mode 100644 index 0000000..4034c3a --- /dev/null +++ b/iconv/libiconv/NEWS.gedcom @@ -0,0 +1,2 @@ +New in 1.8.1: +* Added ANSEL converter. diff --git a/iconv/libiconv/README.patch b/iconv/libiconv/README.patch new file mode 100644 index 0000000..bebd6fe --- /dev/null +++ b/iconv/libiconv/README.patch @@ -0,0 +1,35 @@ +--- README.orig Wed May 29 16:06:52 2002 ++++ README Fri Oct 4 20:36:35 2002 +@@ -12,6 +12,7 @@ + Mac{Roman,CentralEurope,Iceland,Croatian,Romania}, + Mac{Cyrillic,Ukraine,Greek,Turkish}, + Macintosh ++ ANSEL + Semitic languages + ISO-8859-{6,8}, CP{1255,1256}, CP862, Mac{Hebrew,Arabic} + Japanese +@@ -132,15 +133,22 @@ + recompiled. Just set the LD_PRELOAD environment variable, that's it! + + +-Distribution: ++Distribution of original package (libiconv): + ftp://ftp.gnu.org/pub/gnu/libiconv/libiconv-1.8.tar.gz + ftp://ftp.ilog.fr/pub/Users/haible/gnu/libiconv-1.8.tar.gz + +-Homepage: ++Homepage of original package (libiconv): + http://www.gnu.org/software/libiconv/ + ++Distribution of modified package (libiconv-gedcom): ++ http://sourceforge.net/projects/gedcom-parse ++ + Bug reports to: + ++ For specific ANSEL bugs, please use the web interface at SourceForge ++ (link given above) + + + Bruno Haible ++ ++Modifications: Peter Verthez diff --git a/iconv/libiconv/ansi_z39_47.h b/iconv/libiconv/ansi_z39_47.h new file mode 100644 index 0000000..09ab4bb --- /dev/null +++ b/iconv/libiconv/ansi_z39_47.h @@ -0,0 +1,199 @@ +/* + * Copyright (C) 1999-2002 Free Software Foundation, Inc. + * This file is part of the GNU LIBICONV Library. + * + * The GNU LIBICONV Library is free software; you can redistribute it + * and/or modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * The GNU LIBICONV Library is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with the GNU LIBICONV Library; see the file COPYING.LIB. + * If not, write to the Free Software Foundation, Inc., 59 Temple Place - + * Suite 330, Boston, MA 02111-1307, USA. + */ + +/* + * ANSI_Z39.47 + */ + +#include "ANSI_Z39.47-tables.h" +#include + +/* Omit first half of table: assume identity mapping (ASCII) */ +static const unsigned short ansi_z39_47_2uni[128] = TABLE_TO_UCS4_BASIC; + +/* The outer array range runs from 0xe0 to 0xfe, the inner range from 0x20 + to 0x7f. */ +static const unsigned short ansi_z39_47_2uni_comb[31][96] = +{ + /* 0xe0 (hook above) */ TABLE_TO_UCS4_COMBINING_E0, + /* 0xe1 (grave) */ TABLE_TO_UCS4_COMBINING_E1, + /* 0xe2 (acute) */ TABLE_TO_UCS4_COMBINING_E2, + /* 0xe3 (circumflex) */ TABLE_TO_UCS4_COMBINING_E3, + /* 0xe4 (tilde) */ TABLE_TO_UCS4_COMBINING_E4, + /* 0xe5 (macron) */ TABLE_TO_UCS4_COMBINING_E5, + /* 0xe6 (breve) */ TABLE_TO_UCS4_COMBINING_E6, + /* 0xe7 (dot above) */ TABLE_TO_UCS4_COMBINING_E7, + /* 0xe8 (umlaut, diaeresis) */ TABLE_TO_UCS4_COMBINING_E8, + /* 0xe9 (caron, hacek) */ TABLE_TO_UCS4_COMBINING_E9, + /* 0xea (ring above) */ TABLE_TO_UCS4_COMBINING_EA, + /* 0xeb (ligature, left half) */ TABLE_TO_UCS4_COMBINING_EB, + /* 0xec (ligature, right half) */ TABLE_TO_UCS4_COMBINING_EC, + /* 0xed (comma above right) */ TABLE_TO_UCS4_COMBINING_ED, + /* 0xee (double acute) */ TABLE_TO_UCS4_COMBINING_EE, + /* 0xef (candrabindu) */ TABLE_TO_UCS4_COMBINING_EF, + /* 0xf0 (cedilla) */ TABLE_TO_UCS4_COMBINING_F0, + /* 0xf1 (ogonek, right hook) */ TABLE_TO_UCS4_COMBINING_F1, + /* 0xf2 (dot below) */ TABLE_TO_UCS4_COMBINING_F2, + /* 0xf3 (double dot below) */ TABLE_TO_UCS4_COMBINING_F3, + /* 0xf4 (ring below) */ TABLE_TO_UCS4_COMBINING_F4, + /* 0xf5 (double low line) */ TABLE_TO_UCS4_COMBINING_F5, + /* 0xf6 (line below) */ TABLE_TO_UCS4_COMBINING_F6, + /* 0xf7 (comma below, left hook) */ TABLE_TO_UCS4_COMBINING_F7, + /* 0xf8 (left half ring below, right cedilla) */ TABLE_TO_UCS4_COMBINING_F8, + /* 0xf9 (breve below, half circle below) */ TABLE_TO_UCS4_COMBINING_F9, + /* 0xfa (double tilde, left half) */ TABLE_TO_UCS4_COMBINING_FA, + /* 0xfb (double tilde, right half) */ TABLE_TO_UCS4_COMBINING_FB, + /* 0xfc */ TABLE_TO_UCS4_COMBINING_FC, + /* 0xfd */ TABLE_TO_UCS4_COMBINING_FD, + /* 0xfe (comma above, high centered comma) */ TABLE_TO_UCS4_COMBINING_FE, +}; + +#define BASE_PASSED 0x10000 + +static int +ansi_z39_47_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) +{ + unsigned char c = *s; + ucs4_t last_wc = conv->istate; + int retval = 1; + if (last_wc & BASE_PASSED) { + /* base character was already output, reset the state and output the + diacritical mark */ + unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED); + *pwc = ansi_z39_47_2uni[dc-0x80]; + conv->istate = 0; + return 1; + } + if (last_wc) { + conv->istate |= BASE_PASSED; + retval = 0; + } + if (c < 0x80) { + if (last_wc && c >= 0x20) { + /* Check if we can combine the character with the diacritical mark */ + unsigned char dc = (unsigned char)(last_wc & ~BASE_PASSED); + unsigned short wc = ansi_z39_47_2uni_comb[dc-0xe0][c-0x20]; + if (wc != 0x0000) { + *pwc = (ucs4_t) wc; + conv->istate = 0; + return 1; + } + } + *pwc = (ucs4_t) c; + return retval; + } + else if (c < 0xe0) { + unsigned short wc = ansi_z39_47_2uni[c-0x80]; + if (wc != 0x0000) { + *pwc = (ucs4_t) wc; + return retval; + } + } + else { + /* The range from 0xe0 to 0xfe are diacritical marks. + Note that in ANSEL they come *before* the base characters, in Unicode, + they come *after*, so we have to buffer them ... */ + conv->istate = (state_t)c; + return RET_TOOFEW(1); + } + return RET_ILSEQ; +} + +static const unsigned char ansi_z39_47_page080[][2] = TABLE_FROM_UCS4_BASIC; +static const unsigned char ansi_z39_47_page01a[][2] = TABLE_FROM_UCS4_PAGE_01A; +static const unsigned char ansi_z39_47_page022[][2] = TABLE_FROM_UCS4_PAGE_022; +static const unsigned char ansi_z39_47_page02b[][2] = TABLE_FROM_UCS4_PAGE_02B; +static const unsigned char ansi_z39_47_page030[][2] = TABLE_FROM_UCS4_PAGE_030; +static const unsigned char ansi_z39_47_page1ea[][2] = TABLE_FROM_UCS4_PAGE_1EA; +static const unsigned char ansi_z39_47_page200[][2] = TABLE_FROM_UCS4_PAGE_200; +static const unsigned char ansi_z39_47_page211[][2] = TABLE_FROM_UCS4_PAGE_211; +static const unsigned char ansi_z39_47_page266[][2] = TABLE_FROM_UCS4_PAGE_266; +static const unsigned char ansi_z39_47_pagefe2[][2] = TABLE_FROM_UCS4_PAGE_FE2; + +static int +ansi_z39_47_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) +{ + const unsigned char* ch = NULL; + int output = 0; + +#define OUTPUT(c) ++output; if (n < output) return RET_TOOSMALL; *(r++) = (c); + + /* Since in UTF-8 diacritical marks come after the base character and in + ANSEL before, we need to buffer possible base characters (0x20 to 0x7f) + to put the diacritical mark before it if there is one following */ + if (wc < 0x0080) { + if (conv->ostate) { + OUTPUT(conv->ostate); + conv->ostate = 0; + } + if (wc >= 0x0020) { + conv->ostate = (state_t) wc; + } + else { + OUTPUT(wc); + } + return output; + } + else if (wc >= 0x0080 && wc < 0x017f) + ch = ansi_z39_47_page080[wc-0x0080]; + else if (wc >= 0x01a0 && wc < 0x01b4) + ch = ansi_z39_47_page01a[wc-0x01a0]; + else if (wc >= 0x0220 && wc < 0x0234) + ch = ansi_z39_47_page022[wc-0x0220]; + else if (wc >= 0x02b0 && wc < 0x02e2) + ch = ansi_z39_47_page02b[wc-0x02b0]; + else if (wc >= 0x0300 && wc < 0x0337) + ch = ansi_z39_47_page030[wc-0x0300]; + else if (wc >= 0x1ea0 && wc < 0x1efa) + ch = ansi_z39_47_page1ea[wc-0x1ea0]; + else if (wc >= 0x2000 && wc < 0x200f) + ch = ansi_z39_47_page200[wc-0x2000]; + else if (wc >= 0x2110 && wc < 0x211a) + ch = ansi_z39_47_page211[wc-0x2110]; + else if (wc >= 0x2660 && wc < 0x2674) + ch = ansi_z39_47_page266[wc-0x2660]; + else if (wc >= 0xfe20 && wc < 0xfe25) + ch = ansi_z39_47_pagefe2[wc-0xfe20]; + if (ch && ch[0] != 0) { + if (ch[1] == 0 && ch[0] >= 0xe0 && ch[0] <= 0xfe) { + /* Diacritical mark following a base character, buffered in ostate */ + /* Output diacritical mark, then base character */ + if (conv->ostate) { + OUTPUT(ch[0]); + OUTPUT(conv->ostate); + conv->ostate = 0; + } + else + return RET_ILUNI; + } + else { + if (conv->ostate) { + OUTPUT(conv->ostate); + conv->ostate = 0; + } + OUTPUT(ch[0]); + } + if (ch[1] != 0) { + OUTPUT(ch[1]); + } + return output; + } + return RET_ILUNI; +} diff --git a/iconv/libiconv/configure.in.patch b/iconv/libiconv/configure.in.patch new file mode 100644 index 0000000..7bd3b38 --- /dev/null +++ b/iconv/libiconv/configure.in.patch @@ -0,0 +1,11 @@ +--- configure.in.orig Fri Oct 4 20:46:54 2002 ++++ configure.in Fri Oct 4 20:47:02 2002 +@@ -18,7 +18,7 @@ + + AC_PREREQ(2.52) + +-AC_INIT(libiconv,1.8) ++AC_INIT(libiconv-gedcom,1.8.1) + + PACKAGE=$PACKAGE_NAME + VERSION=$PACKAGE_VERSION diff --git a/iconv/libiconv/configure.patch b/iconv/libiconv/configure.patch new file mode 100644 index 0000000..71b9833 --- /dev/null +++ b/iconv/libiconv/configure.patch @@ -0,0 +1,17 @@ +--- configure.orig Fri May 24 21:23:09 2002 ++++ configure Fri Oct 4 20:36:35 2002 +@@ -302,10 +302,10 @@ + mandir='${prefix}/man' + + # Identity of this package. +-PACKAGE_NAME='libiconv' +-PACKAGE_TARNAME='libiconv' +-PACKAGE_VERSION='1.8' +-PACKAGE_STRING='libiconv 1.8' ++PACKAGE_NAME='libiconv-gedcom' ++PACKAGE_TARNAME='libiconv-gedcom' ++PACKAGE_VERSION='1.8.1' ++PACKAGE_STRING='libiconv-gedcom 1.8.1' + PACKAGE_BUGREPORT='' + + ac_prev= diff --git a/iconv/libiconv/converters.h.patch b/iconv/libiconv/converters.h.patch new file mode 100644 index 0000000..9af7a80 --- /dev/null +++ b/iconv/libiconv/converters.h.patch @@ -0,0 +1,10 @@ +--- converters.h.orig Wed May 29 16:06:52 2002 ++++ converters.h Fri Oct 4 20:36:35 2002 +@@ -172,6 +172,7 @@ + #include "mac_thai.h" + #include "hp_roman8.h" + #include "nextstep.h" ++#include "ansi_z39_47.h" + #include "armscii_8.h" + #include "georgian_academy.h" + #include "georgian_ps.h" diff --git a/iconv/libiconv/encodings.def.patch b/iconv/libiconv/encodings.def.patch new file mode 100644 index 0000000..01726d9 --- /dev/null +++ b/iconv/libiconv/encodings.def.patch @@ -0,0 +1,15 @@ +--- encodings.def.orig Wed May 29 16:06:53 2002 ++++ encodings.def Fri Oct 4 20:36:35 2002 +@@ -527,6 +527,12 @@ + nextstep, + { nextstep_mbtowc, NULL }, { nextstep_wctomb, NULL }) + ++DEFENCODING(( "ANSI_Z39.47", /* MARC-21 character set */ ++ "ANSEL", ++ ), ++ ansi_z39_47, ++ { ansi_z39_47_mbtowc, NULL }, { ansi_z39_47_wctomb, NULL }) ++ + /* Regional 8-bit encodings used for a single language */ + + DEFENCODING(( "ARMSCII-8", diff --git a/iconv/libiconv/get_patches b/iconv/libiconv/get_patches new file mode 100755 index 0000000..18d3c03 --- /dev/null +++ b/iconv/libiconv/get_patches @@ -0,0 +1,33 @@ +#!/bin/sh + +tarfile=`ls libiconv-*.tar.gz` +topdir=`pwd` +libiconvdir=`basename $tarfile .tar.gz` +libiconvdir="$topdir/$libiconvdir" + +############################################################################# +echo "Getting patches in top directory..." +cd $libiconvdir +for file in configure.in configure README +do + echo " $file" + diff -u $file.orig $file > $topdir/$file.patch +done + +############################################################################# +echo "Getting patches in lib subdirectory..." +cd $libiconvdir/lib +for file in encodings.def converters.h Makefile.in +do + echo " $file" + diff -u $file.orig $file > $topdir/$file.patch +done + +############################################################################# +echo "Getting patches in man subdirectory..." +cd $libiconvdir/man +for file in iconv_open.3 iconv_open.3.html +do + echo " $file" + diff -u $file.orig $file > $topdir/$file.patch +done diff --git a/iconv/libiconv/iconv_open.3.html.patch b/iconv/libiconv/iconv_open.3.html.patch new file mode 100644 index 0000000..fe79f80 --- /dev/null +++ b/iconv/libiconv/iconv_open.3.html.patch @@ -0,0 +1,12 @@ +--- iconv_open.3.html.orig Wed May 29 16:17:03 2002 ++++ iconv_open.3.html Fri Oct 4 20:36:35 2002 +@@ -71,7 +71,8 @@ + KOI8-U, KOI8-RU, CP{1250,1251,1252,1253,1254,1257}, + CP{850,866}, + Mac{Roman,CentralEurope,Iceland,Croatian,Romania}, +-Mac{Cyrillic,Ukraine,Greek,Turkish}, Macintosh ++Mac{Cyrillic,Ukraine,Greek,Turkish}, Macintosh, ++ANSEL + + diff --git a/iconv/libiconv/iconv_open.3.patch b/iconv/libiconv/iconv_open.3.patch new file mode 100644 index 0000000..a4f2c89 --- /dev/null +++ b/iconv/libiconv/iconv_open.3.patch @@ -0,0 +1,10 @@ +--- iconv_open.3.orig Wed May 29 16:06:53 2002 ++++ iconv_open.3 Fri Oct 4 20:36:35 2002 +@@ -36,6 +36,7 @@ + Mac{Roman,CentralEurope,Iceland,Croatian,Romania}, + Mac{Cyrillic,Ukraine,Greek,Turkish}, + Macintosh ++ANSEL + .TP + Semitic languages + .nf diff --git a/iconv/libiconv/patch_dist b/iconv/libiconv/patch_dist new file mode 100755 index 0000000..4f9d3fc --- /dev/null +++ b/iconv/libiconv/patch_dist @@ -0,0 +1,100 @@ +#!/bin/sh + +expected_version=1.8 +tarfile="libiconv-$expected_version.tar.gz" +topdir=`pwd` +libiconvdir=`basename $tarfile .tar.gz` +libiconvdir="$topdir/$libiconvdir" + +############################################################################# +echo "Checking tar file..." +if [ ! -r $tarfile ] +then + echo "Base package '$tarfile' not found" + if [ `which wget` ] + then + echo -n "Press ENTER to automatically download it..." + read + wget "ftp://ftp.gnu.org/pub/gnu/libiconv/$tarfile" + else + exit 1 + fi +fi + +############################################################################# +echo "Unpacking tar file..." +if [ -d $libiconvdir ] +then + rm -rf $libiconvdir +fi + +tar xvfz $tarfile > /dev/null + +############################################################################# +echo "Adding top level files..." +cd $libiconvdir +cp $topdir/ChangeLog.gedcom . +cp $topdir/NEWS.gedcom . + +############################################################################# +echo "Creating header file..." +cd $libiconvdir/lib +ln -s $topdir/ansi_z39_47.h . +ln -s $topdir/../ANSI_Z39.47-tables.h . + +############################################################################# +echo "Patching files in top directory..." +cd $libiconvdir +for file in configure.in configure README +do + echo " $file" + patch -s -b -p0 < $topdir/$file.patch +done + +############################################################################# +echo "Patching files in lib subdirectory..." +cd $libiconvdir/lib +for file in encodings.def converters.h Makefile.in +do + echo " $file" + patch -s -b -p0 < $topdir/$file.patch +done + + +############################################################################# +echo "Patching files in man subdirectory..." +cd $libiconvdir/man +for file in iconv_open.3 iconv_open.3.html +do + echo " $file" + patch -s -b -p0 < $topdir/$file.patch +done + +############################################################################# +echo "Generating flags..." +cd $libiconvdir/lib +gcc -o genflags genflags.c +./genflags > flags.h + +############################################################################# +echo "Generating aliases..." +gcc -o genaliases genaliases.c +./genaliases > aliases.gperf + +############################################################################# +echo "Generating hash function..." +gperf -t -L ANSI-C -H aliases_hash -N aliases_lookup -G -W aliases -7 -C -k '1,3-11,$' -i 1 aliases.gperf > aliases.h + +############################################################################# +echo "Making distribution..." + +package_name=`grep 'PACKAGE_NAME=' $libiconvdir/configure` +package_name=`echo $package_name | sed "s/^PACKAGE_NAME='//" | sed "s/'$//"` + +package_ver=`grep 'PACKAGE_VERSION=' $libiconvdir/configure` +package_ver=`echo $package_ver | sed "s/^PACKAGE_VERSION='//" | sed "s/'$//"` + +cd $topdir +cp -RL $libiconvdir $package_name-$package_ver +tar cvfz $package_name-$package_ver.tar.gz $package_name-$package_ver >/dev/null +rm -rf $package_name-$package_ver -- 2.30.2