summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarkos Chandras <hwoarang@gentoo.org>2012-05-06 16:26:56 +0000
committerMarkos Chandras <hwoarang@gentoo.org>2012-05-06 16:26:56 +0000
commit28e7eb41c7dd6588fd9f5071b49a58cb03171e44 (patch)
tree18a77eccc95f2fc0d15ac1b6323eaf47db210d9e /dev-libs/icu
parentDrop ia64 keywords (diff)
downloadhistorical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.tar.gz
historical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.tar.bz2
historical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.zip
Fix support for regular expressions with look-behind assertions and case insensitive matching. Ebuild by Arfrever.
Package-Manager: portage-2.1.10.57/cvs/Linux x86_64
Diffstat (limited to 'dev-libs/icu')
-rw-r--r--dev-libs/icu/ChangeLog9
-rw-r--r--dev-libs/icu/files/icu-49.1.1-regex.patch128
-rw-r--r--dev-libs/icu/icu-49.1.1-r1.ebuild91
3 files changed, 227 insertions, 1 deletions
diff --git a/dev-libs/icu/ChangeLog b/dev-libs/icu/ChangeLog
index 278c3de78ee4..0700a1041c3b 100644
--- a/dev-libs/icu/ChangeLog
+++ b/dev-libs/icu/ChangeLog
@@ -1,6 +1,13 @@
# ChangeLog for dev-libs/icu
# Copyright 1999-2012 Gentoo Foundation; Distributed under the GPL v2
-# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/ChangeLog,v 1.182 2012/05/05 19:38:27 hwoarang Exp $
+# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/ChangeLog,v 1.183 2012/05/06 16:26:56 hwoarang Exp $
+
+*icu-49.1.1-r1 (06 May 2012)
+
+ 06 May 2012; Markos Chandras <hwoarang@gentoo.org>
+ +files/icu-49.1.1-regex.patch, +icu-49.1.1-r1.ebuild:
+ Fix support for regular expressions with look-behind assertions and case
+ insensitive matching. Ebuild by Arfrever.
05 May 2012; Markos Chandras <hwoarang@gentoo.org> metadata.xml:
remove description from metadata
diff --git a/dev-libs/icu/files/icu-49.1.1-regex.patch b/dev-libs/icu/files/icu-49.1.1-regex.patch
new file mode 100644
index 000000000000..e9d64e5c6f48
--- /dev/null
+++ b/dev-libs/icu/files/icu-49.1.1-regex.patch
@@ -0,0 +1,128 @@
+http://ssl.icu-project.org/trac/ticket/9283
+http://ssl.icu-project.org/trac/changeset/31782
+
+--- i18n/regexcmp.cpp
++++ i18n/regexcmp.cpp
+@@ -3306,10 +3306,31 @@
+ }
+
+ case URX_STRING_I:
+- // TODO: Is the case-folded string the longest?
+- // If so we can optimize this the same as URX_STRING.
+- loc++;
+- currentLen = INT32_MAX;
++ // TODO: This code assumes that any user string that matches will be no longer
++ // than our compiled string, with case insensitive matching.
++ // Our compiled string has been case-folded already.
++ //
++ // Any matching user string will have no more code points than our
++ // compiled (folded) string. Folding may add code points, but
++ // not remove them.
++ //
++ // There is a potential problem if a supplemental code point
++ // case-folds to a BMP code point. In this case our compiled string
++ // could be shorter (in code units) than a matching user string.
++ //
++ // At this time (Unicode 6.1) there are no such characters, and this case
++ // is not being handled. A test, intltest regex/Bug9283, will fail if
++ // any problematic characters are added to Unicode.
++ //
++ // If this happens, we can make a set of the BMP chars that the
++ // troublesome supplementals fold to, scan our string, and bump the
++ // currentLen one extra for each that is found.
++ //
++ {
++ loc++;
++ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc);
++ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp));
++ }
+ break;
+
+ case URX_CTR_INIT:
+--- test/intltest/regextst.cpp
++++ test/intltest/regextst.cpp
+@@ -26,6 +26,7 @@
+ #include "unicode/regex.h"
+ #include "unicode/uchar.h"
+ #include "unicode/ucnv.h"
++#include "unicode/uniset.h"
+ #include "unicode/ustring.h"
+ #include "regextst.h"
+ #include "uvector.h"
+@@ -127,6 +128,9 @@
+ case 20: name = "CheckInvBufSize";
+ if (exec) CheckInvBufSize();
+ break;
++ case 21: name = "Bug 9283";
++ if (exec) Bug9283();
++ break;
+
+ default: name = "";
+ break; //needed to end loop
+@@ -5184,6 +5188,34 @@
+ delete pMatcher;
+ }
+
++// Bug 9283
++// This test is checking for the existance of any supplemental characters that case-fold
++// to a bmp character.
++//
++// At the time of this writing there are none. If any should appear in a subsequent release
++// of Unicode, the code in regular expressions compilation that determines the longest
++// posssible match for a literal string will need to be enhanced.
++//
++// See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
++// for details on what to do in case of a failure of this test.
++//
++void RegexTest::Bug9283() {
++ UErrorCode status = U_ZERO_ERROR;
++ UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);
++ REGEX_CHECK_STATUS;
++ int32_t index;
++ UChar32 c;
++ for (index=0; ; index++) {
++ c = supplementalsWithCaseFolding.charAt(index);
++ if (c == -1) {
++ break;
++ }
++ UnicodeString cf = UnicodeString(c).foldCase();
++ REGEX_ASSERT(cf.length() >= 2);
++ }
++}
++
++
+ void RegexTest::CheckInvBufSize() {
+ if(inv_next>=INV_BUFSIZ) {
+ errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n",
+--- test/intltest/regextst.h
++++ test/intltest/regextst.h
+@@ -1,6 +1,6 @@
+ /********************************************************************
+ * COPYRIGHT:
+- * Copyright (c) 2002-2011, International Business Machines Corporation and
++ * Copyright (c) 2002-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+@@ -45,6 +45,7 @@
+ virtual void Bug7740();
+ virtual void Bug8479();
+ virtual void Bug7029();
++ virtual void Bug9283();
+ virtual void CheckInvBufSize();
+
+ // The following functions are internal to the regexp tests.
+--- test/testdata/regextst.txt
++++ test/testdata/regextst.txt
+@@ -1141,6 +1141,11 @@
+ "[\w]+" " <0>abc\u200cdef\u200dghi</0> "
+ "[\w]+" i " <0>abc\u200cdef\u200dghi</0> "
+
++# Bug 9283
++# uregex_open fails for look-behind assertion + case-insensitive
++
++"(ab)?(?<=ab)cd|ef" i "<0><1>ab</1>cd</0>"
++
+ # Random debugging, Temporary
+ #
+ #"^(?:a?b?)*$" "a--"
diff --git a/dev-libs/icu/icu-49.1.1-r1.ebuild b/dev-libs/icu/icu-49.1.1-r1.ebuild
new file mode 100644
index 000000000000..212e131d47be
--- /dev/null
+++ b/dev-libs/icu/icu-49.1.1-r1.ebuild
@@ -0,0 +1,91 @@
+# Copyright 1999-2012 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/icu-49.1.1-r1.ebuild,v 1.1 2012/05/06 16:26:56 hwoarang Exp $
+
+EAPI="4"
+
+inherit eutils versionator
+
+MAJOR_VERSION="$(get_version_component_range 1)"
+if [[ "${PV}" =~ ^[[:digit:]]+_rc[[:digit:]]*$ ]]; then
+ MINOR_VERSION="0"
+else
+ MINOR_VERSION="$(get_version_component_range 2)"
+fi
+
+DESCRIPTION="International Components for Unicode"
+HOMEPAGE="http://www.icu-project.org/"
+
+BASE_URI="http://download.icu-project.org/files/icu4c/${PV/_/}"
+SRC_ARCHIVE="icu4c-${PV//./_}-src.tgz"
+DOCS_ARCHIVE="icu4c-${PV//./_}-docs.zip"
+
+SRC_URI="${BASE_URI}/${SRC_ARCHIVE}
+ doc? ( ${BASE_URI}/${DOCS_ARCHIVE} )"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="~alpha ~amd64 ~arm ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~x86-fbsd"
+IUSE="debug doc examples static-libs"
+
+DEPEND="doc? ( app-arch/unzip )"
+RDEPEND=""
+
+S="${WORKDIR}/${PN}/source"
+
+QA_DT_NEEDED="/usr/lib.*/libicudata\.so\.${MAJOR_VERSION}\.${MINOR_VERSION}.*"
+
+src_unpack() {
+ unpack "${SRC_ARCHIVE}"
+ if use doc; then
+ mkdir docs
+ pushd docs > /dev/null
+ unpack "${DOCS_ARCHIVE}"
+ popd > /dev/null
+ fi
+}
+
+src_prepare() {
+ # Do not hardcode flags into icu-config.
+ # https://ssl.icu-project.org/trac/ticket/6102
+ local variable
+ for variable in CFLAGS CPPFLAGS CXXFLAGS FFLAGS LDFLAGS; do
+ sed -i -e "/^${variable} =.*/s:@${variable}@::" config/Makefile.inc.in || die "sed failed"
+ done
+
+ epatch "${FILESDIR}/${PN}-4.8.1-fix_binformat_fonts.patch"
+ epatch "${FILESDIR}/${PN}-4.8.1.1-fix_ltr.patch"
+ epatch "${FILESDIR}/${P}-regex.patch"
+}
+
+src_configure() {
+ econf \
+ $(use_enable debug) \
+ $(use_enable examples samples) \
+ $(use_enable static-libs static)
+}
+
+src_test() {
+ # INTLTEST_OPTS: intltest options
+ # -e: Exhaustive testing
+ # -l: Reporting of memory leaks
+ # -v: Increased verbosity
+ # IOTEST_OPTS: iotest options
+ # -e: Exhaustive testing
+ # -v: Increased verbosity
+ # CINTLTST_OPTS: cintltst options
+ # -e: Exhaustive testing
+ # -v: Increased verbosity
+ emake -j1 check
+}
+
+src_install() {
+ emake DESTDIR="${D}" install
+
+ dohtml ../readme.html
+ dodoc ../unicode-license.txt
+ if use doc; then
+ insinto /usr/share/doc/${PF}/html/api
+ doins -r "${WORKDIR}/docs/"*
+ fi
+}