print arabic characters

Peter Otten __peter__ at web.de
Mon Dec 22 13:53:23 EST 2003


Ahmad wrote:

>    I am a python newbie, I want to print on the console UTF-8 arabic
> characters. They print OK with
> print text.encode("UTF-8")
> 
> BUT, the characters are printed LTR, not RTL (right to left). How can
> I change the printing direction??

Are all arabic characters 2 byte in UTF-16? Then the following RTLStream
class should work in an otherwise left to right environment.
Call the script with a -d or --delay parameter to see it working.

<rtl.py>
import sys, time

def utfReverse(s):
    # CAVEAT: this will mess up characters that are
    # more than 2 bytes long in utf 16
    u = s.decode("utf-8")
    return u[::-1].encode("utf-8")

class RTLStream:
    """ Emulate a right-to-left printing console in a
        left-to-right environment
    """
    def __init__(self, out=sys.stdout, wrapwidth=40):
        self.out = out
        self.wrapwidth = wrapwidth
        self.curline = ""
    def _write(self, s):
        if len(s) == 0: return
        self.curline += utfReverse(s)
        self.out.write("\r")
        if len(self.curline) > self.wrapwidth:
            self.out.write(self.curline[:self.wrapwidth])
            self.out.write("\n")
            self.curline = self.curline[self.wrapwidth:]
        self.out.write(self.curline.rjust(self.wrapwidth))
    def _nl(self):
        self.out.write("\n")
        self.curline = ""
    def write(self, s):
        lines = s.split("\n")
        lines.reverse()
        for line in lines[:-1]:
            self._write(line)
            self._nl()
        self._write(lines[-1])

class SlowStream:
    """ delay the output to the target stream
    """
    def __init__(self, out=sys.stdout, delay=0.01):
        self.delay = delay
        self.out = out
    def write(self, s):
        for b in s:
            time.sleep(self.delay)
            self.out.write(b)
            self.out.flush()

if __name__ == "__main__":
    rtlstream = RTLStream(wrapwidth=36)
    args = sys.argv[1:]
    if "--delay" in  args or "-d" in args:
        rtlstream.out = SlowStream()

    # always save a copy of the original stdout
    orig_stdout = sys.stdout

    # redirect stdout
    sys.stdout = rtlstream

    print "sella ow",
    print "tsieweb kc\xc3\xbclg hcrud hcis"
    print "dnu", "thcsuat",
    print "kcilb ned", "hcuregniew mi\negnir eid thcsuat dnu",
    print "egnid red hcsuar mi"
    print "med kc\xc3\xbclgnegeg med ud tsneid"

    # restore stdout
    sys.stdout = orig_stdout

    # explicit redirection with
    # print >> rtlstream, some_text
    # is usually preferable
    print
    print "back to normal"
    print >> rtlstream, "a saner way to use it"
    print "that's all folks"
</rtl.py>

Disclaimer: As I know nothing about right-to-left printing languages, it's
likely that I have got it at least partially wrong.

Can anybody point me to a way to iterate over characters with a varying
number of bytes? Something like

for c in "Gru\xc3\x9f".characters("utf-8):
   print repr(c),
#should print 'G' 'r' 'u' '\xc3\x9f' 


Peter




More information about the Python-list mailing list