diff options
author | Michał Górny <mgorny@gentoo.org> | 2017-08-14 08:46:57 +0200 |
---|---|---|
committer | Michał Górny <mgorny@gentoo.org> | 2017-08-14 09:12:33 +0200 |
commit | 9741510885394a808ea58561b23a1711771f1f4a (patch) | |
tree | 65348766c622babca00d7afee1433e3c4d1902ab /dev-python/beautifulsoup | |
parent | dev-vcs/rcsi: Remove cvs-utils@ project, #627602 (diff) | |
download | gentoo-9741510885394a808ea58561b23a1711771f1f4a.tar.gz gentoo-9741510885394a808ea58561b23a1711771f1f4a.tar.bz2 gentoo-9741510885394a808ea58561b23a1711771f1f4a.zip |
dev-python/beautifulsoup: Drop masked ancient version
Diffstat (limited to 'dev-python/beautifulsoup')
4 files changed, 0 insertions, 1029 deletions
diff --git a/dev-python/beautifulsoup/Manifest b/dev-python/beautifulsoup/Manifest index a87f7f747d94..6ba59cf1880c 100644 --- a/dev-python/beautifulsoup/Manifest +++ b/dev-python/beautifulsoup/Manifest @@ -1,4 +1,3 @@ -DIST BeautifulSoup-3.1.0.1.tar.gz 71460 SHA256 820a80f473240d9d30047f36c959d530a699a732500662dd8b03e1d3ccad12a8 SHA512 812969faf454a58d849921836ed07ec9a950f34fb31e29e118cdf1a75a533370e430f417402b5a5016d23b2d3a1c44a1cf5fde5b3bfd1bc98c50036edd51c0d6 WHIRLPOOL a199585817dcabcc6327c3836a66128605ebf92a6663b5c660125061a797485a504d300791bcd43e0e94e4f08ca59c01f65f42481da07b1240350cbfc6ea6b0c DIST BeautifulSoup-3.2.1.tar.gz 31224 SHA256 f5ba85e907e7dfd78e44e4000b3eaef3a650aefc57831e8a645702db2e5b50db SHA512 365b7b045a2069cf437877543577bc0aa99256a6dc4c9743670b46bfceab5494a06628012d6eccecfe99c25d5c9e0c65814964b47026f15ba1a538444cfb7789 WHIRLPOOL c2f84b29421d0153fb1fecc87d63e00a61182e03bc0683132babca5d6c94143b4875a60a19124a36e4e6e78ce80bff9e1e81b37335700efc14084da933307e26 DIST beautifulsoup4-4.5.1.tar.gz 158039 SHA256 3c9474036afda9136aac6463def733f81017bf9ef3510d25634f335b0c87f5e1 SHA512 d560d7f743507084ec546708d29bb3764512f5b2c380004280dde813350bf48d1697fddce3bd3f95186407bf5142941d7adc7d0de8e7962eb5ca1278dbc7e93f WHIRLPOOL bf971596707c2ff69e93528164be01254258aa45601763c543246b67c5d31024b0e4de618382775a3cf313d255d8d1d6268a47542773531aacee9a2643412661 DIST beautifulsoup4-4.5.3.tar.gz 159185 SHA256 b21ca09366fa596043578fd4188b052b46634d22059e68dd0077d9ee77e08a3e SHA512 d31db0e3bb778a78c37882fcd55dc580eb5eeadfd48744eae6e2e0d0ef5983b216a4682af84a4971611b05fb99c45012ce094475f2d7c39a5b90dad99906ec84 WHIRLPOOL f8dbffd8e4a1dbee0a7ad8a4bcbe22a984f524474f0241a4c03ef5c37b291f9834a6ff1d076421c0cf1087588df1e49f5b99cd9afd7e81591c9063d92d4d097d diff --git a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild b/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild deleted file mode 100644 index a69a317f6d96..000000000000 --- a/dev-python/beautifulsoup/beautifulsoup-3.1.0.1-r2.ebuild +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 1999-2017 Gentoo Foundation -# Distributed under the terms of the GNU General Public License v2 - -EAPI="5" -# A few tests fail with python3.3/3.4 :( -PYTHON_COMPAT=( python3_4 pypy3 ) - -inherit distutils-r1 eutils - -MY_PN="BeautifulSoup" -MY_P="${MY_PN}-${PV}" - -DESCRIPTION="HTML/XML parser for quick-turnaround applications like screen-scraping" -HOMEPAGE="http://www.crummy.com/software/BeautifulSoup/ https://pypi.python.org/pypi/BeautifulSoup" -SRC_URI="http://www.crummy.com/software/${MY_PN}/download/${MY_P}.tar.gz" - -LICENSE="BSD" -SLOT="python-3" -KEYWORDS="alpha amd64 arm hppa ia64 ppc ppc64 s390 sh sparc x86 ~amd64-fbsd ~x86-fbsd ~amd64-linux ~x86-linux ~x86-macos ~x86-solaris" -IUSE="" - -DEPEND="" -RDEPEND="!dev-python/beautifulsoup:0" - -S="${WORKDIR}/${MY_P}" - -PATCHES=( - "${FILESDIR}/${P}-python-3.patch" - "${FILESDIR}/${P}-disable-tests.patch" -) - -python_test() { - "${PYTHON}" BeautifulSoupTests.py || die "Tests fail with ${EPYTHON}" -} - -python_install_all() { - distutils-r1_python_install_all - # Delete useless files. - rm -r "${ED%/}/usr/bin" || die -} diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch deleted file mode 100644 index c97cd76ee314..000000000000 --- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-disable-tests.patch +++ /dev/null @@ -1,39 +0,0 @@ ---- lib/BeautifulSoupTests.py.orig 2015-07-21 08:39:33.077000000 +0000 -+++ lib/BeautifulSoupTests.py 2015-07-21 08:41:19.285000000 +0000 -@@ -538,13 +538,13 @@ - text = "<td nowrap>foo</td>" - self.assertSoupEquals(text, text) - -- def testCData(self): -- xml = "<root>foo<![CDATA[foobar]]>bar</root>" -- self.assertSoupEquals(xml, xml) -- r = re.compile("foo.*bar") -- soup = BeautifulSoup(xml) -- self.assertEquals(soup.find(text=r).string, "foobar") -- self.assertEquals(soup.find(text=r).__class__, CData) -+ #def testCData(self): -+ # xml = "<root>foo<![CDATA[foobar]]>bar</root>" -+ # self.assertSoupEquals(xml, xml) -+ # r = re.compile("foo.*bar") -+ # soup = BeautifulSoup(xml) -+ # self.assertEquals(soup.find(text=r).string, "foobar") -+ # self.assertEquals(soup.find(text=r).__class__, CData) - - def testComments(self): - xml = "foo<!--foobar-->baz" -@@ -607,11 +607,11 @@ - def testWhitespaceInDeclaration(self): - self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') - -- def testJunkInDeclaration(self): -- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') -+ #def testJunkInDeclaration(self): -+ # self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a') - -- def testIncompleteDeclaration(self): -- self.assertSoupEquals('a<!b <p>c') -+ #def testIncompleteDeclaration(self): -+ # self.assertSoupEquals('a<!b <p>c') - - def testEntityReplacement(self): - self.assertSoupEquals('<b>hello there</b>') diff --git a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch b/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch deleted file mode 100644 index adcbb43dd078..000000000000 --- a/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch +++ /dev/null @@ -1,949 +0,0 @@ ---- BeautifulSoup.py -+++ BeautifulSoup.py -@@ -76,7 +76,7 @@ - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. - - """ --from __future__ import generators -+ - - __author__ = "Leonard Richardson (leonardr@segfault.org)" - __version__ = "3.1.0.1" -@@ -84,12 +84,12 @@ - __license__ = "New-style BSD" - - import codecs --import markupbase -+import _markupbase - import types - import re --from HTMLParser import HTMLParser, HTMLParseError -+from html.parser import HTMLParser, HTMLParseError - try: -- from htmlentitydefs import name2codepoint -+ from html.entities import name2codepoint - except ImportError: - name2codepoint = {} - try: -@@ -98,18 +98,18 @@ - from sets import Set as set - - #These hacks make Beautiful Soup able to parse XML with namespaces --markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match -+_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match - - DEFAULT_OUTPUT_ENCODING = "utf-8" - - # First, the classes that represent markup elements. - --def sob(unicode, encoding): -+def sob(str, encoding): - """Returns either the given Unicode string or its encoding.""" - if encoding is None: -- return unicode -+ return str - else: -- return unicode.encode(encoding) -+ return str.encode(encoding) - - class PageElement: - """Contains the navigational information for some part of the page -@@ -178,8 +178,8 @@ - return lastChild - - def insert(self, position, newChild): -- if (isinstance(newChild, basestring) -- or isinstance(newChild, unicode)) \ -+ if (isinstance(newChild, str) -+ or isinstance(newChild, str)) \ - and not isinstance(newChild, NavigableString): - newChild = NavigableString(newChild) - -@@ -334,7 +334,7 @@ - g = generator() - while True: - try: -- i = g.next() -+ i = g.__next__() - except StopIteration: - break - if i: -@@ -385,22 +385,22 @@ - def toEncoding(self, s, encoding=None): - """Encodes an object to a string in some encoding, or to Unicode. - .""" -- if isinstance(s, unicode): -+ if isinstance(s, str): - if encoding: - s = s.encode(encoding) - elif isinstance(s, str): - if encoding: - s = s.encode(encoding) - else: -- s = unicode(s) -+ s = str(s) - else: - if encoding: - s = self.toEncoding(str(s), encoding) - else: -- s = unicode(s) -+ s = str(s) - return s - --class NavigableString(unicode, PageElement): -+class NavigableString(str, PageElement): - - def __new__(cls, value): - """Create a new NavigableString. -@@ -410,12 +410,12 @@ - passed in to the superclass's __new__ or the superclass won't know - how to handle non-ASCII characters. - """ -- if isinstance(value, unicode): -- return unicode.__new__(cls, value) -- return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) -+ if isinstance(value, str): -+ return str.__new__(cls, value) -+ return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) - - def __getnewargs__(self): -- return (unicode(self),) -+ return (str(self),) - - def __getattr__(self, attr): - """text.string gives you text. This is for backwards -@@ -424,7 +424,7 @@ - if attr == 'string': - return self - else: -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) -+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)) - - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.decode().encode(encoding) -@@ -435,23 +435,23 @@ - class CData(NavigableString): - - def decodeGivenEventualEncoding(self, eventualEncoding): -- return u'<![CDATA[' + self + u']]>' -+ return '<![CDATA[' + self + ']]>' - - class ProcessingInstruction(NavigableString): - - def decodeGivenEventualEncoding(self, eventualEncoding): - output = self -- if u'%SOUP-ENCODING%' in output: -+ if '%SOUP-ENCODING%' in output: - output = self.substituteEncoding(output, eventualEncoding) -- return u'<?' + output + u'?>' -+ return '<?' + output + '?>' - - class Comment(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): -- return u'<!--' + self + u'-->' -+ return '<!--' + self + '-->' - - class Declaration(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): -- return u'<!' + self + u'>' -+ return '<!' + self + '>' - - class Tag(PageElement): - -@@ -460,7 +460,7 @@ - def _invert(h): - "Cheap function to invert a hash." - i = {} -- for k,v in h.items(): -+ for k,v in list(h.items()): - i[v] = k - return i - -@@ -479,23 +479,23 @@ - escaped.""" - x = match.group(1) - if self.convertHTMLEntities and x in name2codepoint: -- return unichr(name2codepoint[x]) -+ return chr(name2codepoint[x]) - elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: - if self.convertXMLEntities: - return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] - else: -- return u'&%s;' % x -+ return '&%s;' % x - elif len(x) > 0 and x[0] == '#': - # Handle numeric entities - if len(x) > 1 and x[1] == 'x': -- return unichr(int(x[2:], 16)) -+ return chr(int(x[2:], 16)) - else: -- return unichr(int(x[1:])) -+ return chr(int(x[1:])) - - elif self.escapeUnrecognizedEntities: -- return u'&%s;' % x -+ return '&%s;' % x - else: -- return u'&%s;' % x -+ return '&%s;' % x - - def __init__(self, parser, name, attrs=None, parent=None, - previous=None): -@@ -524,7 +524,7 @@ - return kval - return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", - self._convertEntities, val)) -- self.attrs = map(convert, self.attrs) -+ self.attrs = list(map(convert, self.attrs)) - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or -@@ -533,7 +533,7 @@ - return self._getAttrMap().get(key, default) - - def has_key(self, key): -- return self._getAttrMap().has_key(key) -+ return key in self._getAttrMap() - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, -@@ -551,7 +551,7 @@ - def __contains__(self, x): - return x in self.contents - -- def __nonzero__(self): -+ def __bool__(self): - "A tag is non-None even if it has no contents." - return True - -@@ -577,14 +577,14 @@ - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() -- if self.attrMap.has_key(key): -+ if key in self.attrMap: - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - findAll() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" -- return apply(self.findAll, args, kwargs) -+ return self.findAll(*args, **kwargs) - - def __getattr__(self, tag): - #print "Getattr %s.%s" % (self.__class__, tag) -@@ -592,7 +592,7 @@ - return self.find(tag[:-3]) - elif tag.find('__') != 0: - return self.find(tag) -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) -+ raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag)) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, -@@ -868,7 +868,7 @@ - if isinstance(markupName, Tag): - markup = markupName - markupAttrs = markup -- callFunctionWithTagData = callable(self.name) \ -+ callFunctionWithTagData = hasattr(self.name, '__call__') \ - and not isinstance(markupName, Tag) - - if (not self.name) \ -@@ -880,7 +880,7 @@ - else: - match = True - markupAttrMap = None -- for attr, matchAgainst in self.attrs.items(): -+ for attr, matchAgainst in list(self.attrs.items()): - if not markupAttrMap: - if hasattr(markupAttrs, 'get'): - markupAttrMap = markupAttrs -@@ -921,16 +921,16 @@ - if self._matches(markup, self.text): - found = markup - else: -- raise Exception, "I don't know how to match against a %s" \ -- % markup.__class__ -+ raise Exception("I don't know how to match against a %s" \ -+ % markup.__class__) - return found - - def _matches(self, markup, matchAgainst): - #print "Matching %s against %s" % (markup, matchAgainst) - result = False -- if matchAgainst == True and type(matchAgainst) == types.BooleanType: -+ if matchAgainst == True and type(matchAgainst) == bool: - result = markup != None -- elif callable(matchAgainst): -+ elif hasattr(matchAgainst, '__call__'): - result = matchAgainst(markup) - else: - #Custom match methods take the tag as an argument, but all -@@ -938,7 +938,7 @@ - if isinstance(markup, Tag): - markup = markup.name - if markup is not None and not isString(markup): -- markup = unicode(markup) -+ markup = str(markup) - #Now we know that chunk is either a string, or None. - if hasattr(matchAgainst, 'match'): - # It's a regexp object. -@@ -947,10 +947,10 @@ - and (markup is not None or not isString(matchAgainst))): - result = markup in matchAgainst - elif hasattr(matchAgainst, 'items'): -- result = markup.has_key(matchAgainst) -+ result = matchAgainst in markup - elif matchAgainst and isString(markup): -- if isinstance(markup, unicode): -- matchAgainst = unicode(matchAgainst) -+ if isinstance(markup, str): -+ matchAgainst = str(matchAgainst) - else: - matchAgainst = str(matchAgainst) - -@@ -971,13 +971,13 @@ - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return ((hasattr(l, '__iter__') and not isString(l)) -- or (type(l) in (types.ListType, types.TupleType))) -+ or (type(l) in (list, tuple))) - - def isString(s): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is stringlike.""" - try: -- return isinstance(s, unicode) or isinstance(s, basestring) -+ return isinstance(s, str) or isinstance(s, str) - except NameError: - return isinstance(s, str) - -@@ -989,7 +989,7 @@ - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. -- for k,v in portion.items(): -+ for k,v in list(portion.items()): - built[k] = v - elif isList(portion) and not isString(portion): - #It's a list. Map each item to the default. -@@ -1034,7 +1034,7 @@ - object, possibly one with a %SOUP-ENCODING% slot into which an - encoding will be plugged later.""" - if text[:3] == "xml": -- text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" -+ text = "xml version='1.0' encoding='%SOUP-ENCODING%'" - self._toStringSubclass(text, ProcessingInstruction) - - def handle_comment(self, text): -@@ -1044,7 +1044,7 @@ - def handle_charref(self, ref): - "Handle character references as data." - if self.soup.convertEntities: -- data = unichr(int(ref)) -+ data = chr(int(ref)) - else: - data = '&#%s;' % ref - self.handle_data(data) -@@ -1056,7 +1056,7 @@ - data = None - if self.soup.convertHTMLEntities: - try: -- data = unichr(name2codepoint[ref]) -+ data = chr(name2codepoint[ref]) - except KeyError: - pass - -@@ -1147,7 +1147,7 @@ - lambda x: '<!' + x.group(1) + '>') - ] - -- ROOT_TAG_NAME = u'[document]' -+ ROOT_TAG_NAME = '[document]' - - HTML_ENTITIES = "html" - XML_ENTITIES = "xml" -@@ -1236,14 +1236,14 @@ - def _feed(self, inDocumentEncoding=None, isHTML=False): - # Convert the document to Unicode. - markup = self.markup -- if isinstance(markup, unicode): -+ if isinstance(markup, str): - if not hasattr(self, 'originalEncoding'): - self.originalEncoding = None - else: - dammit = UnicodeDammit\ - (markup, [self.fromEncoding, inDocumentEncoding], - smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) -- markup = dammit.unicode -+ markup = dammit.str - self.originalEncoding = dammit.originalEncoding - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding - if markup: -@@ -1269,8 +1269,8 @@ - def isSelfClosingTag(self, name): - """Returns true iff the given string is the name of a - self-closing tag according to this parser.""" -- return self.SELF_CLOSING_TAGS.has_key(name) \ -- or self.instanceSelfClosingTags.has_key(name) -+ return name in self.SELF_CLOSING_TAGS \ -+ or name in self.instanceSelfClosingTags - - def reset(self): - Tag.__init__(self, self, self.ROOT_TAG_NAME) -@@ -1305,7 +1305,7 @@ - - def endData(self, containerClass=NavigableString): - if self.currentData: -- currentData = u''.join(self.currentData) -+ currentData = ''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.PRESERVE_WHITESPACE_TAGS)): -@@ -1368,7 +1368,7 @@ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None -- isResetNesting = self.RESET_NESTING_TAGS.has_key(name) -+ isResetNesting = name in self.RESET_NESTING_TAGS - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): -@@ -1381,7 +1381,7 @@ - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting -- and self.RESET_NESTING_TAGS.has_key(p.name)): -+ and p.name in self.RESET_NESTING_TAGS): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag -@@ -1399,7 +1399,7 @@ - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name -- attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) -+ attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs]) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() -@@ -1493,7 +1493,7 @@ - BeautifulStoneSoup before writing your own subclass.""" - - def __init__(self, *args, **kwargs): -- if not kwargs.has_key('smartQuotesTo'): -+ if 'smartQuotesTo' not in kwargs: - kwargs['smartQuotesTo'] = self.HTML_ENTITIES - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) -@@ -1677,7 +1677,7 @@ - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableString) and -- not parent.attrMap.has_key(tag.name)): -+ tag.name not in parent.attrMap): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -@@ -1751,9 +1751,9 @@ - self._detectEncoding(markup, isHTML) - self.smartQuotesTo = smartQuotesTo - self.triedEncodings = [] -- if markup == '' or isinstance(markup, unicode): -+ if markup == '' or isinstance(markup, str): - self.originalEncoding = None -- self.unicode = unicode(markup) -+ self.str = str(markup) - return - - u = None -@@ -1766,7 +1766,7 @@ - if u: break - - # If no luck and we have auto-detection library, try that: -- if not u and chardet and not isinstance(self.markup, unicode): -+ if not u and chardet and not isinstance(self.markup, str): - u = self._convertFrom(chardet.detect(self.markup)['encoding']) - - # As a last resort, try utf-8 and windows-1252: -@@ -1775,7 +1775,7 @@ - u = self._convertFrom(proposed_encoding) - if u: break - -- self.unicode = u -+ self.str = u - if not u: self.originalEncoding = None - - def _subMSChar(self, match): -@@ -1783,7 +1783,7 @@ - entity.""" - orig = match.group(1) - sub = self.MS_CHARS.get(orig) -- if type(sub) == types.TupleType: -+ if type(sub) == tuple: - if self.smartQuotesTo == 'xml': - sub = '&#x'.encode() + sub[1].encode() + ';'.encode() - else: -@@ -1804,7 +1804,7 @@ - if self.smartQuotesTo and proposed.lower() in("windows-1252", - "iso-8859-1", - "iso-8859-2"): -- smart_quotes_re = "([\x80-\x9f])" -+ smart_quotes_re = b"([\x80-\x9f])" - smart_quotes_compiled = re.compile(smart_quotes_re) - markup = smart_quotes_compiled.sub(self._subMSChar, markup) - -@@ -1813,7 +1813,7 @@ - u = self._toUnicode(markup, proposed) - self.markup = u - self.originalEncoding = proposed -- except Exception, e: -+ except Exception as e: - # print "That didn't work!" - # print e - return None -@@ -1842,7 +1842,7 @@ - elif data[:4] == '\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] -- newdata = unicode(data, encoding) -+ newdata = str(data, encoding) - return newdata - - def _detectEncoding(self, xml_data, isHTML=False): -@@ -1855,41 +1855,41 @@ - elif xml_data[:4] == '\x00\x3c\x00\x3f': - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' -- xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') -+ xml_data = str(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ - and (xml_data[2:4] != '\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' -- xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') -+ xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x3f\x00': - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' -- xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') -+ xml_data = str(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ - (xml_data[2:4] != '\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' -- xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') -+ xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\x00\x3c': - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' -- xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') -+ xml_data = str(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\x3c\x00\x00\x00': - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' -- xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') -+ xml_data = str(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == '\x00\x00\xfe\xff': - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' -- xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') -+ xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == '\xff\xfe\x00\x00': - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' -- xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') -+ xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == '\xef\xbb\xbf': - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' -- xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') -+ xml_data = str(xml_data[3:], 'utf-8').encode('utf-8') - else: - sniffed_xml_encoding = 'ascii' - pass -@@ -1954,41 +1954,41 @@ - 250,251,252,253,254,255) - import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ -- ''.join(map(chr, range(256))), ''.join(map(chr, emap))) -+ ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) - -- MS_CHARS = { '\x80' : ('euro', '20AC'), -- '\x81' : ' ', -- '\x82' : ('sbquo', '201A'), -- '\x83' : ('fnof', '192'), -- '\x84' : ('bdquo', '201E'), -- '\x85' : ('hellip', '2026'), -- '\x86' : ('dagger', '2020'), -- '\x87' : ('Dagger', '2021'), -- '\x88' : ('circ', '2C6'), -- '\x89' : ('permil', '2030'), -- '\x8A' : ('Scaron', '160'), -- '\x8B' : ('lsaquo', '2039'), -- '\x8C' : ('OElig', '152'), -- '\x8D' : '?', -- '\x8E' : ('#x17D', '17D'), -- '\x8F' : '?', -- '\x90' : '?', -- '\x91' : ('lsquo', '2018'), -- '\x92' : ('rsquo', '2019'), -- '\x93' : ('ldquo', '201C'), -- '\x94' : ('rdquo', '201D'), -- '\x95' : ('bull', '2022'), -- '\x96' : ('ndash', '2013'), -- '\x97' : ('mdash', '2014'), -- '\x98' : ('tilde', '2DC'), -- '\x99' : ('trade', '2122'), -- '\x9a' : ('scaron', '161'), -- '\x9b' : ('rsaquo', '203A'), -- '\x9c' : ('oelig', '153'), -- '\x9d' : '?', -- '\x9e' : ('#x17E', '17E'), -- '\x9f' : ('Yuml', ''),} -+ MS_CHARS = { b'\x80' : ('euro', '20AC'), -+ b'\x81' : ' ', -+ b'\x82' : ('sbquo', '201A'), -+ b'\x83' : ('fnof', '192'), -+ b'\x84' : ('bdquo', '201E'), -+ b'\x85' : ('hellip', '2026'), -+ b'\x86' : ('dagger', '2020'), -+ b'\x87' : ('Dagger', '2021'), -+ b'\x88' : ('circ', '2C6'), -+ b'\x89' : ('permil', '2030'), -+ b'\x8A' : ('Scaron', '160'), -+ b'\x8B' : ('lsaquo', '2039'), -+ b'\x8C' : ('OElig', '152'), -+ b'\x8D' : '?', -+ b'\x8E' : ('#x17D', '17D'), -+ b'\x8F' : '?', -+ b'\x90' : '?', -+ b'\x91' : ('lsquo', '2018'), -+ b'\x92' : ('rsquo', '2019'), -+ b'\x93' : ('ldquo', '201C'), -+ b'\x94' : ('rdquo', '201D'), -+ b'\x95' : ('bull', '2022'), -+ b'\x96' : ('ndash', '2013'), -+ b'\x97' : ('mdash', '2014'), -+ b'\x98' : ('tilde', '2DC'), -+ b'\x99' : ('trade', '2122'), -+ b'\x9a' : ('scaron', '161'), -+ b'\x9b' : ('rsaquo', '203A'), -+ b'\x9c' : ('oelig', '153'), -+ b'\x9d' : '?', -+ b'\x9e' : ('#x17E', '17E'), -+ b'\x9f' : ('Yuml', ''),} - - ####################################################################### - -@@ -1997,4 +1997,4 @@ - if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) -- print soup.prettify() -+ print(soup.prettify()) ---- BeautifulSoupTests.py -+++ BeautifulSoupTests.py -@@ -82,7 +82,7 @@ - def testFindAllText(self): - soup = BeautifulSoup("<html>\xbb</html>") - self.assertEqual(soup.findAll(text=re.compile('.*')), -- [u'\xbb']) -+ ['\xbb']) - - def testFindAllByRE(self): - import re -@@ -215,7 +215,7 @@ - soup = BeautifulSoup(self.x, parseOnlyThese=strainer) - self.assertEquals(len(soup), 10) - -- strainer = SoupStrainer(text=lambda(x):x[8]=='3') -+ strainer = SoupStrainer(text=lambda x:x[8]=='3') - soup = BeautifulSoup(self.x, parseOnlyThese=strainer) - self.assertEquals(len(soup), 3) - -@@ -256,7 +256,7 @@ - self.assertEqual(copied.decode(), self.soup.decode()) - - def testUnicodePickle(self): -- import cPickle as pickle -+ import pickle as pickle - html = "<b>" + chr(0xc3) + "</b>" - soup = BeautifulSoup(html) - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) -@@ -586,23 +586,23 @@ - self.assertEquals(soup.decode(), "<<sacré bleu!>>") - - soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) -- self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>") -+ self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>") - - # Make sure the "XML", "HTML", and "XHTML" settings work. - text = "<™'" - soup = BeautifulStoneSoup(text, convertEntities=xmlEnt) -- self.assertEquals(soup.decode(), u"<™'") -+ self.assertEquals(soup.decode(), "<™'") - - soup = BeautifulStoneSoup(text, convertEntities=htmlEnt) -- self.assertEquals(soup.decode(), u"<\u2122'") -+ self.assertEquals(soup.decode(), "<\u2122'") - - soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt) -- self.assertEquals(soup.decode(), u"<\u2122'") -+ self.assertEquals(soup.decode(), "<\u2122'") - - def testNonBreakingSpaces(self): - soup = BeautifulSoup("<a> </a>", - convertEntities=BeautifulStoneSoup.HTML_ENTITIES) -- self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>") -+ self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>") - - def testWhitespaceInDeclaration(self): - self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>') -@@ -617,27 +617,27 @@ - self.assertSoupEquals('<b>hello there</b>') - - def testEntitiesInAttributeValues(self): -- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', -+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', - encoding='utf-8') -- self.assertSoupEquals('<x t="xñ">', '<x t="x\xc3\xb1"></x>', -+ self.assertSoupEquals('<x t="xñ">', b'<x t="x\xc3\xb1"></x>', - encoding='utf-8') - - soup = BeautifulSoup('<x t=">™">', - convertEntities=BeautifulStoneSoup.HTML_ENTITIES) -- self.assertEquals(soup.decode(), u'<x t=">\u2122"></x>') -+ self.assertEquals(soup.decode(), '<x t=">\u2122"></x>') - - uri = "http://crummy.com?sacré&bleu" - link = '<a href="%s"></a>' % uri - - soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) - self.assertEquals(soup.decode(), -- link.replace("é", u"\xe9")) -+ link.replace("é", "\xe9")) - - uri = "http://crummy.com?sacré&bleu" - link = '<a href="%s"></a>' % uri - soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES) - self.assertEquals(soup.a['href'], -- uri.replace("é", u"\xe9")) -+ uri.replace("é", "\xe9")) - - def testNakedAmpersands(self): - html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES} -@@ -663,13 +663,13 @@ - smart quote fixes.""" - - def testUnicodeDammitStandalone(self): -- markup = "<foo>\x92</foo>" -+ markup = b"<foo>\x92</foo>" - dammit = UnicodeDammit(markup) -- self.assertEquals(dammit.unicode, "<foo>’</foo>") -+ self.assertEquals(dammit.str, "<foo>’</foo>") - -- hebrew = "\xed\xe5\xec\xf9" -+ hebrew = b"\xed\xe5\xec\xf9" - dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) -- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9') -+ self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9') - self.assertEquals(dammit.originalEncoding, 'iso-8859-8') - - def testGarbageInGarbageOut(self): -@@ -677,13 +677,13 @@ - asciiSoup = BeautifulStoneSoup(ascii) - self.assertEquals(ascii, asciiSoup.decode()) - -- unicodeData = u"<foo>\u00FC</foo>" -+ unicodeData = "<foo>\u00FC</foo>" - utf8 = unicodeData.encode("utf-8") -- self.assertEquals(utf8, '<foo>\xc3\xbc</foo>') -+ self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>') - - unicodeSoup = BeautifulStoneSoup(unicodeData) - self.assertEquals(unicodeData, unicodeSoup.decode()) -- self.assertEquals(unicodeSoup.foo.string, u'\u00FC') -+ self.assertEquals(unicodeSoup.foo.string, '\u00FC') - - utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8') - self.assertEquals(utf8, utf8Soup.encode('utf-8')) -@@ -696,18 +696,18 @@ - - def testHandleInvalidCodec(self): - for bad_encoding in ['.utf8', '...', 'utF---16.!']: -- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"), -+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"), - fromEncoding=bad_encoding) - self.assertEquals(soup.originalEncoding, 'utf-8') - - def testUnicodeSearch(self): -- html = u'<html><body><h1>Räksmörgås</h1></body></html>' -+ html = '<html><body><h1>Räksmörgås</h1></body></html>' - soup = BeautifulSoup(html) -- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås') -+ self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås') - - def testRewrittenXMLHeader(self): -- euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' -- utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" -+ euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n' -+ utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n" - soup = BeautifulStoneSoup(euc_jp) - if soup.originalEncoding != "euc-jp": - raise Exception("Test failed when parsing euc-jp document. " -@@ -718,12 +718,12 @@ - self.assertEquals(soup.originalEncoding, "euc-jp") - self.assertEquals(soup.renderContents('utf-8'), utf8) - -- old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>" -+ old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>" - new_text = "<?xml version='1.0' encoding='utf-8'?><foo>’</foo>" - self.assertSoupEquals(old_text, new_text) - - def testRewrittenMetaTag(self): -- no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' -+ no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>''' - soup = BeautifulSoup(no_shift_jis_html) - - # Beautiful Soup used to try to rewrite the meta tag even if the -@@ -733,16 +733,16 @@ - soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer) - self.assertEquals(soup.contents[0].name, 'pre') - -- meta_tag = ('<meta content="text/html; charset=x-sjis" ' -- 'http-equiv="Content-type" />') -+ meta_tag = (b'<meta content="text/html; charset=x-sjis" ' -+ b'http-equiv="Content-type" />') - shift_jis_html = ( -- '<html><head>\n%s\n' -- '<meta http-equiv="Content-language" content="ja" />' -- '</head><body><pre>\n' -- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' -- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' -- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' -- '</pre></body></html>') % meta_tag -+ b'<html><head>\n' + meta_tag + b'\n' -+ b'<meta http-equiv="Content-language" content="ja" />' -+ b'</head><body><pre>\n' -+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f' -+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c' -+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n' -+ b'</pre></body></html>') - soup = BeautifulSoup(shift_jis_html) - if soup.originalEncoding != "shift-jis": - raise Exception("Test failed when parsing shift-jis document " -@@ -755,59 +755,59 @@ - content_type_tag = soup.meta['content'] - self.assertEquals(content_type_tag[content_type_tag.find('charset='):], - 'charset=%SOUP-ENCODING%') -- content_type = str(soup.meta) -+ content_type = soup.meta.decode() - index = content_type.find('charset=') - self.assertEqual(content_type[index:index+len('charset=utf8')+1], - 'charset=utf-8') - content_type = soup.meta.encode('shift-jis') -- index = content_type.find('charset=') -+ index = content_type.find(b'charset=') - self.assertEqual(content_type[index:index+len('charset=shift-jis')], - 'charset=shift-jis'.encode()) - - self.assertEquals(soup.encode('utf-8'), ( -- '<html><head>\n' -- '<meta content="text/html; charset=utf-8" ' -- 'http-equiv="Content-type" />\n' -- '<meta http-equiv="Content-language" content="ja" />' -- '</head><body><pre>\n' -- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' -- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' -- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' -- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' -- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' -- '</pre></body></html>')) -+ b'<html><head>\n' -+ b'<meta content="text/html; charset=utf-8" ' -+ b'http-equiv="Content-type" />\n' -+ b'<meta http-equiv="Content-language" content="ja" />' -+ b'</head><body><pre>\n' -+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3' -+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3' -+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6' -+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3' -+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n' -+ b'</pre></body></html>')) - self.assertEquals(soup.encode("shift-jis"), - shift_jis_html.replace('x-sjis'.encode(), - 'shift-jis'.encode())) - -- isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" -+ isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>""" - soup = BeautifulSoup(isolatin) - - utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode()) -- utf8 = utf8.replace("\xe9", "\xc3\xa9") -+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9") - self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8') - - def testHebrew(self): -- iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' -- utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' -+ iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n' -+ utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n' - soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8") - self.assertEquals(soup.encode('utf-8'), utf8) - - def testSmartQuotesNotSoSmartAnymore(self): -- self.assertSoupEquals("\x91Foo\x92 <!--blah-->", -+ self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->", - '‘Foo’ <!--blah-->') - - def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self): -- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b" -+ smartQuotes = b"Il a dit, \x8BSacré bleu!\x9b" - soup = BeautifulSoup(smartQuotes) - self.assertEquals(soup.decode(), - 'Il a dit, ‹Sacré bleu!›') - soup = BeautifulSoup(smartQuotes, convertEntities="html") - self.assertEquals(soup.encode('utf-8'), -- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') -+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba') - - def testDontSeeSmartQuotesWhereThereAreNone(self): -- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch" -+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" - self.assertSoupEquals(utf_8, encoding='utf-8') - - ---- setup.py -+++ setup.py -@@ -19,19 +19,19 @@ - suite = loader.loadTestsFromModule(BeautifulSoupTests) - suite.run(result) - if not result.wasSuccessful(): -- print "Unit tests have failed!" -+ print("Unit tests have failed!") - for l in result.errors, result.failures: - for case, error in l: -- print "-" * 80 -+ print("-" * 80) - desc = case.shortDescription() - if desc: -- print desc -- print error -- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''' -- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup." -+ print(desc) -+ print(error) -+ print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''') -+ print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.") - if sys.argv[1] == 'sdist': -- print -- print "I'm not going to make a source distribution since the tests don't pass." -+ print() -+ print("I'm not going to make a source distribution since the tests don't pass.") - sys.exit(1) - - setup(name="BeautifulSoup", |