diff options
author | Markos Chandras <hwoarang@gentoo.org> | 2012-05-06 16:26:56 +0000 |
---|---|---|
committer | Markos Chandras <hwoarang@gentoo.org> | 2012-05-06 16:26:56 +0000 |
commit | 28e7eb41c7dd6588fd9f5071b49a58cb03171e44 (patch) | |
tree | 18a77eccc95f2fc0d15ac1b6323eaf47db210d9e /dev-libs/icu | |
parent | Drop ia64 keywords (diff) | |
download | historical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.tar.gz historical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.tar.bz2 historical-28e7eb41c7dd6588fd9f5071b49a58cb03171e44.zip |
Fix support for regular expressions with look-behind assertions and case insensitive matching. Ebuild by Arfrever.
Package-Manager: portage-2.1.10.57/cvs/Linux x86_64
Diffstat (limited to 'dev-libs/icu')
-rw-r--r-- | dev-libs/icu/ChangeLog | 9 | ||||
-rw-r--r-- | dev-libs/icu/files/icu-49.1.1-regex.patch | 128 | ||||
-rw-r--r-- | dev-libs/icu/icu-49.1.1-r1.ebuild | 91 |
3 files changed, 227 insertions, 1 deletions
diff --git a/dev-libs/icu/ChangeLog b/dev-libs/icu/ChangeLog index 278c3de78ee4..0700a1041c3b 100644 --- a/dev-libs/icu/ChangeLog +++ b/dev-libs/icu/ChangeLog @@ -1,6 +1,13 @@ # ChangeLog for dev-libs/icu # Copyright 1999-2012 Gentoo Foundation; Distributed under the GPL v2 -# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/ChangeLog,v 1.182 2012/05/05 19:38:27 hwoarang Exp $ +# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/ChangeLog,v 1.183 2012/05/06 16:26:56 hwoarang Exp $ + +*icu-49.1.1-r1 (06 May 2012) + + 06 May 2012; Markos Chandras <hwoarang@gentoo.org> + +files/icu-49.1.1-regex.patch, +icu-49.1.1-r1.ebuild: + Fix support for regular expressions with look-behind assertions and case + insensitive matching. Ebuild by Arfrever. 05 May 2012; Markos Chandras <hwoarang@gentoo.org> metadata.xml: remove description from metadata diff --git a/dev-libs/icu/files/icu-49.1.1-regex.patch b/dev-libs/icu/files/icu-49.1.1-regex.patch new file mode 100644 index 000000000000..e9d64e5c6f48 --- /dev/null +++ b/dev-libs/icu/files/icu-49.1.1-regex.patch @@ -0,0 +1,128 @@ +http://ssl.icu-project.org/trac/ticket/9283 +http://ssl.icu-project.org/trac/changeset/31782 + +--- i18n/regexcmp.cpp ++++ i18n/regexcmp.cpp +@@ -3306,10 +3306,31 @@ + } + + case URX_STRING_I: +- // TODO: Is the case-folded string the longest? +- // If so we can optimize this the same as URX_STRING. +- loc++; +- currentLen = INT32_MAX; ++ // TODO: This code assumes that any user string that matches will be no longer ++ // than our compiled string, with case insensitive matching. ++ // Our compiled string has been case-folded already. ++ // ++ // Any matching user string will have no more code points than our ++ // compiled (folded) string. Folding may add code points, but ++ // not remove them. ++ // ++ // There is a potential problem if a supplemental code point ++ // case-folds to a BMP code point. In this case our compiled string ++ // could be shorter (in code units) than a matching user string. ++ // ++ // At this time (Unicode 6.1) there are no such characters, and this case ++ // is not being handled. A test, intltest regex/Bug9283, will fail if ++ // any problematic characters are added to Unicode. ++ // ++ // If this happens, we can make a set of the BMP chars that the ++ // troublesome supplementals fold to, scan our string, and bump the ++ // currentLen one extra for each that is found. ++ // ++ { ++ loc++; ++ int32_t stringLenOp = (int32_t)fRXPat->fCompiledPat->elementAti(loc); ++ currentLen = safeIncrement(currentLen, URX_VAL(stringLenOp)); ++ } + break; + + case URX_CTR_INIT: +--- test/intltest/regextst.cpp ++++ test/intltest/regextst.cpp +@@ -26,6 +26,7 @@ + #include "unicode/regex.h" + #include "unicode/uchar.h" + #include "unicode/ucnv.h" ++#include "unicode/uniset.h" + #include "unicode/ustring.h" + #include "regextst.h" + #include "uvector.h" +@@ -127,6 +128,9 @@ + case 20: name = "CheckInvBufSize"; + if (exec) CheckInvBufSize(); + break; ++ case 21: name = "Bug 9283"; ++ if (exec) Bug9283(); ++ break; + + default: name = ""; + break; //needed to end loop +@@ -5184,6 +5188,34 @@ + delete pMatcher; + } + ++// Bug 9283 ++// This test is checking for the existance of any supplemental characters that case-fold ++// to a bmp character. ++// ++// At the time of this writing there are none. If any should appear in a subsequent release ++// of Unicode, the code in regular expressions compilation that determines the longest ++// posssible match for a literal string will need to be enhanced. ++// ++// See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() ++// for details on what to do in case of a failure of this test. ++// ++void RegexTest::Bug9283() { ++ UErrorCode status = U_ZERO_ERROR; ++ UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status); ++ REGEX_CHECK_STATUS; ++ int32_t index; ++ UChar32 c; ++ for (index=0; ; index++) { ++ c = supplementalsWithCaseFolding.charAt(index); ++ if (c == -1) { ++ break; ++ } ++ UnicodeString cf = UnicodeString(c).foldCase(); ++ REGEX_ASSERT(cf.length() >= 2); ++ } ++} ++ ++ + void RegexTest::CheckInvBufSize() { + if(inv_next>=INV_BUFSIZ) { + errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %d )\n", +--- test/intltest/regextst.h ++++ test/intltest/regextst.h +@@ -1,6 +1,6 @@ + /******************************************************************** + * COPYRIGHT: +- * Copyright (c) 2002-2011, International Business Machines Corporation and ++ * Copyright (c) 2002-2012, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +@@ -45,6 +45,7 @@ + virtual void Bug7740(); + virtual void Bug8479(); + virtual void Bug7029(); ++ virtual void Bug9283(); + virtual void CheckInvBufSize(); + + // The following functions are internal to the regexp tests. +--- test/testdata/regextst.txt ++++ test/testdata/regextst.txt +@@ -1141,6 +1141,11 @@ + "[\w]+" " <0>abc\u200cdef\u200dghi</0> " + "[\w]+" i " <0>abc\u200cdef\u200dghi</0> " + ++# Bug 9283 ++# uregex_open fails for look-behind assertion + case-insensitive ++ ++"(ab)?(?<=ab)cd|ef" i "<0><1>ab</1>cd</0>" ++ + # Random debugging, Temporary + # + #"^(?:a?b?)*$" "a--" diff --git a/dev-libs/icu/icu-49.1.1-r1.ebuild b/dev-libs/icu/icu-49.1.1-r1.ebuild new file mode 100644 index 000000000000..212e131d47be --- /dev/null +++ b/dev-libs/icu/icu-49.1.1-r1.ebuild @@ -0,0 +1,91 @@ +# Copyright 1999-2012 Gentoo Foundation +# Distributed under the terms of the GNU General Public License v2 +# $Header: /var/cvsroot/gentoo-x86/dev-libs/icu/icu-49.1.1-r1.ebuild,v 1.1 2012/05/06 16:26:56 hwoarang Exp $ + +EAPI="4" + +inherit eutils versionator + +MAJOR_VERSION="$(get_version_component_range 1)" +if [[ "${PV}" =~ ^[[:digit:]]+_rc[[:digit:]]*$ ]]; then + MINOR_VERSION="0" +else + MINOR_VERSION="$(get_version_component_range 2)" +fi + +DESCRIPTION="International Components for Unicode" +HOMEPAGE="http://www.icu-project.org/" + +BASE_URI="http://download.icu-project.org/files/icu4c/${PV/_/}" +SRC_ARCHIVE="icu4c-${PV//./_}-src.tgz" +DOCS_ARCHIVE="icu4c-${PV//./_}-docs.zip" + +SRC_URI="${BASE_URI}/${SRC_ARCHIVE} + doc? ( ${BASE_URI}/${DOCS_ARCHIVE} )" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="~alpha ~amd64 ~arm ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sh ~sparc ~x86 ~x86-fbsd" +IUSE="debug doc examples static-libs" + +DEPEND="doc? ( app-arch/unzip )" +RDEPEND="" + +S="${WORKDIR}/${PN}/source" + +QA_DT_NEEDED="/usr/lib.*/libicudata\.so\.${MAJOR_VERSION}\.${MINOR_VERSION}.*" + +src_unpack() { + unpack "${SRC_ARCHIVE}" + if use doc; then + mkdir docs + pushd docs > /dev/null + unpack "${DOCS_ARCHIVE}" + popd > /dev/null + fi +} + +src_prepare() { + # Do not hardcode flags into icu-config. + # https://ssl.icu-project.org/trac/ticket/6102 + local variable + for variable in CFLAGS CPPFLAGS CXXFLAGS FFLAGS LDFLAGS; do + sed -i -e "/^${variable} =.*/s:@${variable}@::" config/Makefile.inc.in || die "sed failed" + done + + epatch "${FILESDIR}/${PN}-4.8.1-fix_binformat_fonts.patch" + epatch "${FILESDIR}/${PN}-4.8.1.1-fix_ltr.patch" + epatch "${FILESDIR}/${P}-regex.patch" +} + +src_configure() { + econf \ + $(use_enable debug) \ + $(use_enable examples samples) \ + $(use_enable static-libs static) +} + +src_test() { + # INTLTEST_OPTS: intltest options + # -e: Exhaustive testing + # -l: Reporting of memory leaks + # -v: Increased verbosity + # IOTEST_OPTS: iotest options + # -e: Exhaustive testing + # -v: Increased verbosity + # CINTLTST_OPTS: cintltst options + # -e: Exhaustive testing + # -v: Increased verbosity + emake -j1 check +} + +src_install() { + emake DESTDIR="${D}" install + + dohtml ../readme.html + dodoc ../unicode-license.txt + if use doc; then + insinto /usr/share/doc/${PF}/html/api + doins -r "${WORKDIR}/docs/"* + fi +} |