pypy/objspace/std/stringobject.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978

from pypy.objspace.std.model import registerimplementation, W_Object
from pypy.objspace.std.register_all import register_all
from pypy.objspace.std.multimethod import FailedToImplement
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.interpreter import gateway
from pypy.rlib.rarithmetic import ovfcheck
from pypy.rlib.objectmodel import we_are_translated, compute_hash
from pypy.objspace.std.inttype import wrapint
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
from pypy.objspace.std import slicetype
from pypy.objspace.std.listobject import W_ListObject
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.tupleobject import W_TupleObject
from pypy.rlib.rstring import StringBuilder, string_repeat
from pypy.interpreter.buffer import StringBuffer

from pypy.objspace.std.stringtype import sliced, wrapstr, wrapchar, \
     stringendswith, stringstartswith, joined2

from pypy.objspace.std.formatting import mod_format

class W_StringObject(W_Object):
    from pypy.objspace.std.stringtype import str_typedef as typedef
    _immutable_ = True

    def __init__(w_self, str):
        w_self._value = str

    def __repr__(w_self):
        """ representation for debugging purposes """
        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)

    def unwrap(w_self, space):
        return w_self._value

registerimplementation(W_StringObject)

W_StringObject.EMPTY = W_StringObject('')
W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
del i

def unicode_w__String(space, w_self):
    # XXX should this use the default encoding?
    from pypy.objspace.std.unicodetype import plain_str2unicode
    return plain_str2unicode(space, w_self._value)

def _is_generic(space, w_self, fun): 
    v = w_self._value
    if len(v) == 0:
        return space.w_False
    if len(v) == 1:
        c = v[0]
        return space.newbool(fun(c))
    else:
        for idx in range(len(v)):
            if not fun(v[idx]):
                return space.w_False
        return space.w_True
_is_generic._annspecialcase_ = "specialize:arg(2)"

def _upper(ch):
    if ch.islower():
        o = ord(ch) - 32
        return chr(o)
    else:
        return ch
    
def _lower(ch):
    if ch.isupper():
        o = ord(ch) + 32
        return chr(o)
    else:
        return ch

_isspace = lambda c: c.isspace()
_isdigit = lambda c: c.isdigit()
_isalpha = lambda c: c.isalpha()
_isalnum = lambda c: c.isalnum()

def str_isspace__String(space, w_self):
    return _is_generic(space, w_self, _isspace)

def str_isdigit__String(space, w_self):
    return _is_generic(space, w_self, _isdigit)

def str_isalpha__String(space, w_self):
    return _is_generic(space, w_self, _isalpha)

def str_isalnum__String(space, w_self):
    return _is_generic(space, w_self, _isalnum)

def str_isupper__String(space, w_self):
    """Return True if all cased characters in S are uppercase and there is
at least one cased character in S, False otherwise."""
    v = w_self._value
    if len(v) == 1:
        c = v[0]
        return space.newbool(c.isupper())
    cased = False
    for idx in range(len(v)):
        if v[idx].islower():
            return space.w_False
        elif not cased and v[idx].isupper():
            cased = True
    return space.newbool(cased)

def str_islower__String(space, w_self):
    """Return True if all cased characters in S are lowercase and there is
at least one cased character in S, False otherwise."""
    v = w_self._value
    if len(v) == 1:
        c = v[0]
        return space.newbool(c.islower())
    cased = False
    for idx in range(len(v)):
        if v[idx].isupper():
            return space.w_False
        elif not cased and v[idx].islower():
            cased = True
    return space.newbool(cased)

def str_istitle__String(space, w_self):
    """Return True if S is a titlecased string and there is at least one
character in S, i.e. uppercase characters may only follow uncased
characters and lowercase characters only cased ones. Return False
otherwise."""
    input = w_self._value
    cased = False
    previous_is_cased = False

    for pos in range(0, len(input)):
        ch = input[pos]
        if ch.isupper():
            if previous_is_cased:
                return space.w_False
            previous_is_cased = True
            cased = True
        elif ch.islower():
            if not previous_is_cased:
                return space.w_False
            cased = True
        else:
            previous_is_cased = False

    return space.newbool(cased)

def str_upper__String(space, w_self):
    self = w_self._value
    return space.wrap(self.upper())

def str_lower__String(space, w_self):
    self = w_self._value
    return space.wrap(self.lower())

def str_swapcase__String(space, w_self):
    self = w_self._value
    res = [' '] * len(self)
    for i in range(len(self)):
        ch = self[i]
        if ch.isupper():
            o = ord(ch) + 32
            res[i] = chr(o)
        elif ch.islower():
            o = ord(ch) - 32
            res[i] = chr(o)
        else:
            res[i] = ch

    return space.wrap("".join(res))

    
def str_capitalize__String(space, w_self):
    input = w_self._value
    buffer = [' '] * len(input)
    if len(input) > 0:
        ch = input[0]
        if ch.islower():
            o = ord(ch) - 32
            buffer[0] = chr(o)
        else:
            buffer[0] = ch

        for i in range(1, len(input)):
            ch = input[i]
            if ch.isupper():
                o = ord(ch) + 32
                buffer[i] = chr(o)
            else:
                buffer[i] = ch

    return space.wrap("".join(buffer))
         
def str_title__String(space, w_self):
    input = w_self._value
    buffer = [' '] * len(input)
    prev_letter=' '

    for pos in range(0, len(input)):
        ch = input[pos]
        if not prev_letter.isalpha():
            buffer[pos] = _upper(ch)
        else:
            buffer[pos] = _lower(ch)

        prev_letter = buffer[pos]

    return space.wrap("".join(buffer))

def str_split__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):
    maxsplit = space.int_w(w_maxsplit)
    res_w = []
    value = w_self._value
    length = len(value)
    i = 0
    while True:
        # find the beginning of the next word
        while i < length:
            if not value[i].isspace():
                break   # found
            i += 1
        else:
            break  # end of string, finished

        # find the end of the word
        if maxsplit == 0:
            j = length   # take all the rest of the string
        else:
            j = i + 1
            while j < length and not value[j].isspace():
                j += 1
            maxsplit -= 1   # NB. if it's already < 0, it stays < 0

        # the word is value[i:j]
        res_w.append(sliced(space, value, i, j, w_self))

        # continue to look from the character following the space after the word
        i = j + 1

    return space.newlist(res_w)

def str_split__String_String_ANY(space, w_self, w_by, w_maxsplit=-1):
    maxsplit = space.int_w(w_maxsplit)
    value = w_self._value
    by = w_by._value
    bylen = len(by)
    if bylen == 0:
        raise OperationError(space.w_ValueError, space.wrap("empty separator"))

    res_w = []
    start = 0
    while maxsplit != 0:
        next = value.find(by, start)
        if next < 0:
            break
        res_w.append(sliced(space, value, start, next, w_self))
        start = next + bylen
        maxsplit -= 1   # NB. if it's already < 0, it stays < 0
    
    res_w.append(sliced(space, value, start, len(value), w_self))
    return space.newlist(res_w)

def str_rsplit__String_None_ANY(space, w_self, w_none, w_maxsplit=-1):
    maxsplit = space.int_w(w_maxsplit)
    res_w = []
    value = w_self._value
    i = len(value)-1
    while True:
        # starting from the end, find the end of the next word
        while i >= 0:
            if not value[i].isspace():
                break   # found
            i -= 1
        else:
            break  # end of string, finished

        # find the start of the word
        # (more precisely, 'j' will be the space character before the word)
        if maxsplit == 0:
            j = -1   # take all the rest of the string
        else:
            j = i - 1
            while j >= 0 and not value[j].isspace():
                j -= 1
            maxsplit -= 1   # NB. if it's already < 0, it stays < 0

        # the word is value[j+1:i+1]
        j1 = j + 1
        assert j1 >= 0
        res_w.append(sliced(space, value, j1, i+1, w_self))

        # continue to look from the character before the space before the word
        i = j - 1

    res_w.reverse()
    return space.newlist(res_w)

def make_rsplit_with_delim(funcname, sliced):
    from pypy.tool.sourcetools import func_with_new_name

    def fn(space, w_self, w_by, w_maxsplit=-1):
        maxsplit = space.int_w(w_maxsplit)
        res_w = []
        value = w_self._value
        end = len(value)
        by = w_by._value
        bylen = len(by)
        if bylen == 0:
            raise OperationError(space.w_ValueError, space.wrap("empty separator"))

        while maxsplit != 0:
            next = value.rfind(by, 0, end)
            if next < 0:
                break
            res_w.append(sliced(space, value, next+bylen, end, w_self))
            end = next
            maxsplit -= 1   # NB. if it's already < 0, it stays < 0

        res_w.append(sliced(space, value, 0, end, w_self))
        res_w.reverse()
        return space.newlist(res_w)
    
    return func_with_new_name(fn, funcname)

str_rsplit__String_String_ANY = make_rsplit_with_delim('str_rsplit__String_String_ANY',
                                                       sliced)

def str_join__String_ANY(space, w_self, w_list):
    list_w = space.listview(w_list)
    if list_w:
        self = w_self._value
        reslen = 0
        for i in range(len(list_w)):
            w_s = list_w[i]
            if not space.is_true(space.isinstance(w_s, space.w_str)):
                if space.is_true(space.isinstance(w_s, space.w_unicode)):
                    # we need to rebuild w_list here, because the original
                    # w_list might be an iterable which we already consumed
                    w_list = space.newlist(list_w)
                    w_u = space.call_function(space.w_unicode, w_self)
                    return space.call_method(w_u, "join", w_list)
                raise operationerrfmt(
                    space.w_TypeError,
                    "sequence item %d: expected string, %s "
                    "found", i, space.type(w_s).getname(space, '?'))
            reslen += len(space.str_w(w_s))
        reslen += len(self) * (len(list_w) - 1)
        sb = StringBuilder(reslen)
        for i in range(len(list_w)):
            if self and i != 0:
                sb.append(self)
            sb.append(space.str_w(list_w[i]))
        return space.wrap(sb.build())
    else:
        return W_StringObject.EMPTY

def str_rjust__String_ANY_ANY(space, w_self, w_arg, w_fillchar):
    u_arg = space.int_w(w_arg)
    u_self = w_self._value
    fillchar = space.str_w(w_fillchar)
    if len(fillchar) != 1:
        raise OperationError(space.w_TypeError,
            space.wrap("rjust() argument 2 must be a single character"))
    
    d = u_arg - len(u_self)
    if d>0:
        fillchar = fillchar[0]    # annotator hint: it's a single character
        u_self = d * fillchar + u_self
        
    return space.wrap(u_self)


def str_ljust__String_ANY_ANY(space, w_self, w_arg, w_fillchar):
    u_self = w_self._value
    u_arg = space.int_w(w_arg)
    fillchar = space.str_w(w_fillchar)
    if len(fillchar) != 1:
        raise OperationError(space.w_TypeError,
            space.wrap("ljust() argument 2 must be a single character"))

    d = u_arg - len(u_self)
    if d>0:
        fillchar = fillchar[0]    # annotator hint: it's a single character
        u_self += d * fillchar
        
    return space.wrap(u_self)

def _convert_idx_params(space, w_self, w_sub, w_start, w_end, upper_bound=False):
    self = w_self._value
    sub = w_sub._value
    if upper_bound:
        start = slicetype.adapt_bound(space, len(self), w_start)
        end = slicetype.adapt_bound(space, len(self), w_end)
    else:
        start = slicetype.adapt_lower_bound(space, len(self), w_start)
        end = slicetype.adapt_lower_bound(space, len(self), w_end)
    return (self, sub, start, end)
_convert_idx_params._annspecialcase_ = 'specialize:arg(5)'

def contains__String_String(space, w_self, w_sub):
    self = w_self._value
    sub = w_sub._value
    return space.newbool(self.find(sub) >= 0)

def str_find__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end):
    (self, sub, start, end) =  _convert_idx_params(space, w_self, w_sub, w_start, w_end)
    res = self.find(sub, start, end)
    return space.wrap(res)

def str_rfind__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end):
    (self, sub, start, end) =  _convert_idx_params(space, w_self, w_sub, w_start, w_end)
    res = self.rfind(sub, start, end)
    return space.wrap(res)

def str_partition__String_String(space, w_self, w_sub):
    self = w_self._value
    sub = w_sub._value
    if not sub:
        raise OperationError(space.w_ValueError,
                             space.wrap("empty separator"))
    pos = self.find(sub)
    if pos == -1:
        return space.newtuple([w_self, space.wrap(''), space.wrap('')])
    else:
        return space.newtuple([sliced(space, self, 0, pos, w_self),
                               w_sub,
                               sliced(space, self, pos+len(sub), len(self),
                                      w_self)])

def str_rpartition__String_String(space, w_self, w_sub):
    self = w_self._value
    sub = w_sub._value
    if not sub:
        raise OperationError(space.w_ValueError,
                             space.wrap("empty separator"))
    pos = self.rfind(sub)
    if pos == -1:
        return space.newtuple([space.wrap(''), space.wrap(''), w_self])
    else:
        return space.newtuple([sliced(space, self, 0, pos, w_self),
                               w_sub,
                               sliced(space, self, pos+len(sub), len(self), w_self)])


def str_index__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end):
    (self, sub, start, end) =  _convert_idx_params(space, w_self, w_sub, w_start, w_end)
    res = self.find(sub, start, end)
    if res < 0:
        raise OperationError(space.w_ValueError,
                             space.wrap("substring not found in string.index"))

    return space.wrap(res)


def str_rindex__String_String_ANY_ANY(space, w_self, w_sub, w_start, w_end):
    (self, sub, start, end) =  _convert_idx_params(space, w_self, w_sub, w_start, w_end)
    res = self.rfind(sub, start, end)
    if res < 0:
        raise OperationError(space.w_ValueError,
                             space.wrap("substring not found in string.rindex"))

    return space.wrap(res)

def _string_replace(space, input, sub, by, maxsplit):
    if maxsplit == 0:
        return space.wrap(input)

    #print "from replace, input: %s, sub: %s, by: %s" % (input, sub, by)

    if not sub:
        upper = len(input)
        if maxsplit > 0 and maxsplit < upper + 2:
            upper = maxsplit - 1
            assert upper >= 0
        substrings_w = [""]
        for i in range(upper):
            c = input[i]
            substrings_w.append(c)
        substrings_w.append(input[upper:])
    else:
        start = 0
        sublen = len(sub)
        substrings_w = []

        while maxsplit != 0:
            next = input.find(sub, start)
            if next < 0:
                break
            substrings_w.append(input[start:next])
            start = next + sublen
            maxsplit -= 1   # NB. if it's already < 0, it stays < 0
            
        substrings_w.append(input[start:])

    try:
        # XXX conservative estimate. If your strings are that close
        # to overflowing, bad luck.
        one = ovfcheck(len(substrings_w) * len(by))
        ovfcheck(one + len(input))
    except OverflowError:
        raise OperationError(
            space.w_OverflowError, 
            space.wrap("replace string is too long"))
    
    return space.wrap(by.join(substrings_w))
    

def str_replace__String_ANY_ANY_ANY(space, w_self, w_sub, w_by, w_maxsplit):
    return _string_replace(space, w_self._value, space.buffer_w(w_sub).as_str(),
                           space.buffer_w(w_by).as_str(),
                           space.int_w(w_maxsplit))

def str_replace__String_String_String_ANY(space, w_self, w_sub, w_by, w_maxsplit=-1):
    input = w_self._value
    sub = w_sub._value
    by = w_by._value
    maxsplit = space.int_w(w_maxsplit)
    return _string_replace(space, input, sub, by, maxsplit)

def _strip(space, w_self, w_chars, left, right):
    "internal function called by str_xstrip methods"
    u_self = w_self._value
    u_chars = w_chars._value
    
    lpos = 0
    rpos = len(u_self)
    
    if left:
        #print "while %d < %d and -%s- in -%s-:"%(lpos, rpos, u_self[lpos],w_chars)
        while lpos < rpos and u_self[lpos] in u_chars:
           lpos += 1
       
    if right:
        while rpos > lpos and u_self[rpos - 1] in u_chars:
           rpos -= 1
       
    assert rpos >= lpos    # annotator hint, don't remove
    return sliced(space, u_self, lpos, rpos, w_self)

def _strip_none(space, w_self, left, right):
    "internal function called by str_xstrip methods"
    u_self = w_self._value
    
    lpos = 0
    rpos = len(u_self)
    
    if left:
        #print "while %d < %d and -%s- in -%s-:"%(lpos, rpos, u_self[lpos],w_chars)
        while lpos < rpos and u_self[lpos].isspace():
           lpos += 1
       
    if right:
        while rpos > lpos and u_self[rpos - 1].isspace():
           rpos -= 1
       
    assert rpos >= lpos    # annotator hint, don't remove
    return sliced(space, u_self, lpos, rpos, w_self)

def str_strip__String_String(space, w_self, w_chars):
    return _strip(space, w_self, w_chars, left=1, right=1)

def str_strip__String_None(space, w_self, w_chars):
    return _strip_none(space, w_self, left=1, right=1)
   
def str_rstrip__String_String(space, w_self, w_chars):
    return _strip(space, w_self, w_chars, left=0, right=1)

def str_rstrip__String_None(space, w_self, w_chars):
    return _strip_none(space, w_self, left=0, right=1)

   
def str_lstrip__String_String(space, w_self, w_chars):
    return _strip(space, w_self, w_chars, left=1, right=0)

def str_lstrip__String_None(space, w_self, w_chars):
    return _strip_none(space, w_self, left=1, right=0)


def str_center__String_ANY_ANY(space, w_self, w_arg, w_fillchar):
    u_self = w_self._value
    u_arg  = space.int_w(w_arg)
    fillchar = space.str_w(w_fillchar)
    if len(fillchar) != 1:
        raise OperationError(space.w_TypeError,
            space.wrap("center() argument 2 must be a single character"))

    d = u_arg - len(u_self) 
    if d>0:
        offset = d//2 + (d & u_arg & 1)
        fillchar = fillchar[0]    # annotator hint: it's a single character
        u_centered = offset * fillchar + u_self + (d - offset) * fillchar
    else:
        u_centered = u_self

    return wrapstr(space, u_centered)

def str_count__String_String_ANY_ANY(space, w_self, w_arg, w_start, w_end): 
    u_self, u_arg, u_start, u_end = _convert_idx_params(space, w_self, w_arg,
                                                        w_start, w_end)
    return wrapint(space, u_self.count(u_arg, u_start, u_end))

def str_endswith__String_String_ANY_ANY(space, w_self, w_suffix, w_start, w_end):
    (u_self, suffix, start, end) = _convert_idx_params(space, w_self,
                                                       w_suffix, w_start,
                                                       w_end, True)
    return space.newbool(stringendswith(u_self, suffix, start, end))

def str_endswith__String_Tuple_ANY_ANY(space, w_self, w_suffixes, w_start, w_end):
    (u_self, _, start, end) = _convert_idx_params(space, w_self,
                                                  space.wrap(''), w_start,
                                                  w_end, True)
    for w_suffix in space.fixedview(w_suffixes):
        if space.is_true(space.isinstance(w_suffix, space.w_unicode)):
            w_u = space.call_function(space.w_unicode, w_self)
            return space.call_method(w_u, "endswith", w_suffixes, w_start,
                                     w_end)
        suffix = space.str_w(w_suffix) 
        if stringendswith(u_self, suffix, start, end):
            return space.w_True
    return space.w_False

def str_startswith__String_String_ANY_ANY(space, w_self, w_prefix, w_start, w_end):
    (u_self, prefix, start, end) = _convert_idx_params(space, w_self,
                                                       w_prefix, w_start,
                                                       w_end, True)
    return space.newbool(stringstartswith(u_self, prefix, start, end))

def str_startswith__String_Tuple_ANY_ANY(space, w_self, w_prefixes, w_start, w_end):
    (u_self, _, start, end) = _convert_idx_params(space, w_self, space.wrap(''),
                                                  w_start, w_end, True)
    for w_prefix in space.fixedview(w_prefixes):
        if space.is_true(space.isinstance(w_prefix, space.w_unicode)):
            w_u = space.call_function(space.w_unicode, w_self)
            return space.call_method(w_u, "startswith", w_prefixes, w_start,
                                     w_end)
        prefix = space.str_w(w_prefix)
        if stringstartswith(u_self, prefix, start, end):
            return space.w_True
    return space.w_False
    
def _tabindent(u_token, u_tabsize):
    "calculates distance behind the token to the next tabstop"
    
    distance = u_tabsize
    if u_token:    
        distance = 0
        offset = len(u_token)

        while 1:
            #no sophisticated linebreak support now, '\r' just for passing adapted CPython test
            if u_token[offset-1] == "\n" or u_token[offset-1] == "\r":
                break;
            distance += 1
            offset -= 1
            if offset == 0:
                break
                
        #the same like distance = len(u_token) - (offset + 1)
        #print '<offset:%d distance:%d tabsize:%d token:%s>' % (offset, distance, u_tabsize, u_token)
        distance = (u_tabsize-distance) % u_tabsize
        if distance == 0:
            distance=u_tabsize

    return distance    
    
    
def str_expandtabs__String_ANY(space, w_self, w_tabsize):   
    u_self = w_self._value
    u_tabsize  = space.int_w(w_tabsize)
    
    u_expanded = ""
    if u_self:
        split = u_self.split("\t")
        u_expanded =oldtoken = split.pop(0)

        for token in split:  
            #print  "%d#%d -%s-" % (_tabindent(oldtoken,u_tabsize), u_tabsize, token)
            u_expanded += " " * _tabindent(oldtoken,u_tabsize) + token
            oldtoken = token
            
    return wrapstr(space, u_expanded)        
 
 
def str_splitlines__String_ANY(space, w_self, w_keepends):
    u_keepends  = space.int_w(w_keepends)  # truth value, but type checked
    data = w_self._value
    selflen = len(data)
    strs_w = []
    i = j = 0
    while i < selflen:
        # Find a line and append it
        while i < selflen and data[i] != '\n' and data[i] != '\r':
            i += 1
        # Skip the line break reading CRLF as one line break
        eol = i
        i += 1
        if i < selflen and data[i-1] == '\r' and data[i] == '\n':
            i += 1
        if u_keepends:
            eol = i
        strs_w.append(sliced(space, data, j, eol, w_self))
        j = i

    if j < selflen:
        strs_w.append(sliced(space, data, j, len(data), w_self))
    return space.newlist(strs_w)

def str_zfill__String_ANY(space, w_self, w_width):
    input = w_self._value
    width = space.int_w(w_width)

    if len(input) >= width:
        # cannot return w_self, in case it is a subclass of str
        return space.wrap(input)

    buf = [' '] * width
    if len(input) > 0 and (input[0] == '+' or input[0] == '-'):
        buf[0] = input[0]
        start = 1
        middle = width - len(input) + 1
    else:
        start = 0
        middle = width - len(input)

    for i in range(start, middle):
        buf[i] = '0'

    for i in range(middle, width):
        buf[i] = input[start]
        start = start + 1
    
    return space.wrap("".join(buf))

def str_w__String(space, w_str):
    return w_str._value

def hash__String(space, w_str):
    s = w_str._value
    x = compute_hash(s)
    return wrapint(space, x)

def lt__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 < s2:
        return space.w_True
    else:
        return space.w_False    

def le__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 <= s2:
        return space.w_True
    else:
        return space.w_False

def eq__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 == s2:
        return space.w_True
    else:
        return space.w_False

def ne__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 != s2:
        return space.w_True
    else:
        return space.w_False

def gt__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 > s2:
        return space.w_True
    else:
        return space.w_False

def ge__String_String(space, w_str1, w_str2):
    s1 = w_str1._value
    s2 = w_str2._value
    if s1 >= s2:
        return space.w_True
    else:
        return space.w_False

def getitem__String_ANY(space, w_str, w_index):
    ival = space.getindex_w(w_index, space.w_IndexError, "string index")
    str = w_str._value
    slen = len(str)
    if ival < 0:
        ival += slen
    if ival < 0 or ival >= slen:
        raise OperationError(space.w_IndexError,
                             space.wrap("string index out of range"))
    return wrapchar(space, str[ival])

def getitem__String_Slice(space, w_str, w_slice):
    w = space.wrap
    s = w_str._value
    length = len(s)
    start, stop, step, sl = w_slice.indices4(space, length)
    if sl == 0:
        return W_StringObject.EMPTY
    elif step == 1:
        assert start >= 0 and stop >= 0
        return sliced(space, s, start, stop, w_str)
    else:
        str = "".join([s[start + i*step] for i in range(sl)])
    return wrapstr(space, str)

def getslice__String_ANY_ANY(space, w_str, w_start, w_stop):
    s = w_str._value
    start, stop = normalize_simple_slice(space, len(s), w_start, w_stop)
    if start == stop:
        return W_StringObject.EMPTY
    else:
        return sliced(space, s, start, stop, w_str)

def mul_string_times(space, w_str, w_times):
    try:
        mul = space.getindex_w(w_times, space.w_OverflowError)
    except OperationError, e:
        if e.match(space, space.w_TypeError):
            raise FailedToImplement
        raise
    if mul <= 0:
        return W_StringObject.EMPTY
    input = w_str._value
    if len(input) == 1:
        s = input[0] * mul
    else:
        s = string_repeat(input, mul)
    # xxx support again space.config.objspace.std.withstrjoin?
    return W_StringObject(s)

def mul__String_ANY(space, w_str, w_times):
    return mul_string_times(space, w_str, w_times)

def mul__ANY_String(space, w_times, w_str):
    return mul_string_times(space, w_str, w_times)

def add__String_String(space, w_left, w_right):
    right = w_right._value
    left = w_left._value
    return joined2(space, left, right)

def len__String(space, w_str):
    return space.wrap(len(w_str._value))

def str__String(space, w_str):
    if type(w_str) is W_StringObject:
        return w_str
    return wrapstr(space, w_str._value)

def ord__String(space, w_str):
    u_str = w_str._value
    if len(u_str) != 1:
        raise operationerrfmt(
            space.w_TypeError,
            "ord() expected a character, but string "
            "of length %d found", len(u_str))
    return space.wrap(ord(u_str[0]))

def getnewargs__String(space, w_str):
    return space.newtuple([wrapstr(space, w_str._value)])

def repr__String(space, w_str):
    s = w_str._value

    buf = StringBuilder(50)

    quote = "'"
    if quote in s and '"' not in s:
        quote = '"'

    buf.append(quote)
    startslice = 0

    for i in range(len(s)):
        c = s[i]
        use_bs_char = False # character quoted by backspace

        if c == '\\' or c == quote:
            bs_char = c
            use_bs_char = True
        elif c == '\t':
            bs_char = 't'
            use_bs_char = True
        elif c == '\r':
            bs_char = 'r'
            use_bs_char = True
        elif c == '\n':
            bs_char = 'n'
            use_bs_char = True
        elif not '\x20' <= c < '\x7f':
            n = ord(c)
            if i != startslice:
                buf.append_slice(s, startslice, i)
            startslice = i + 1
            buf.append('\\x')
            buf.append("0123456789abcdef"[n>>4])
            buf.append("0123456789abcdef"[n&0xF])

        if use_bs_char:
            if i != startslice:
                buf.append_slice(s, startslice, i)
            startslice = i + 1
            buf.append('\\')
            buf.append(bs_char)

    if len(s) != startslice:
        buf.append_slice(s, startslice, len(s))

    buf.append(quote)

    return space.wrap(buf.build())

   
def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''):
    """charfilter - unicode handling is not implemented
    
    Return a copy of the string where all characters occurring 
    in the optional argument deletechars are removed, and the 
    remaining characters have been mapped through the given translation table, 
    which must be a string of length 256"""

    # XXX CPython accepts buffers, too, not sure what we should do
    table = space.str_w(w_table)
    if len(table) != 256:
        raise OperationError(
            space.w_ValueError,
            space.wrap("translation table must be 256 characters long"))

    string = w_string._value
    chars = []
    deletechars = space.str_w(w_deletechars)
    if len(deletechars) == 0:
        for char in string:
            chars.append(table[ord(char)])
    else:
        deletion_table = [False] * 256
        for c in deletechars:
            deletion_table[ord(c)] = True
        for char in string:
            if not deletion_table[ord(char)]:
                chars.append(table[ord(char)])
    return W_StringObject(''.join(chars))

def str_decode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
    from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
        unicode_from_string, decode_object
    encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
    if encoding is None and errors is None:
        return unicode_from_string(space, w_string)
    return decode_object(space, w_string, encoding, errors)

def str_encode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
    from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
        encode_object
    encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
    return encode_object(space, w_string, encoding, errors)

# CPython's logic for deciding if  ""%values  is
# an error (1 value, 0 %-formatters) or not
# (values is of a mapping type)
def mod__String_ANY(space, w_format, w_values):
    return mod_format(space, w_format, w_values, do_unicode=False)

def buffer__String(space, w_string):
    return space.wrap(StringBuffer(w_string._value))

# register all methods
from pypy.objspace.std import stringtype
register_all(vars(), stringtype)