Attempting to parse free-form ANSI text.

Frederic Rentsch anthra.norell at vtxmail.ch
Mon Oct 23 15:16:44 EDT 2006


Frederic Rentsch wrote:
> Paul McGuire wrote:
>   
>> "Michael B. Trausch" <"mike$#at^&nospam!%trauschus"> wrote in message 
>>     

Sorry about the line wrap mess in the previous message. I try again with 
another setting:

Frederic

######################################################################

The following code does everything Mike needs to do, except interact 
with wx. It is written to run standing alone. To incorporate it in 
Mike's class the functions would be methods and the globals would be 
instance attributes. Running it does this:

>>> chunk_1 = """This is a test string containing some ANSI sequences.
Sequence 1 Valid code, invalid numbers: \x1b[10;12mEnd of sequence 1
Sequence 2 Not an 'm'-code: \x1b[30;4;77hEnd of sequence 2
Sequence 3 Color setting code: \x1b[30;45mEnd of sequence 3
Sequence 4 Parameter setting code: \x1b[7mEnd of sequence 4
Sequence 5 Color setting code spanning calls: \x1b[3"""

>>> chunk_2 = """7;42mEnd of sequence 5
Sequence 6 Invalid code: \x1b[End of sequence 6
Sequence 7 A valid code at the end: \x1b[9m
"""

>>> init ()
>>> process_text (chunk_1)
>>> process_text (chunk_2)
>>> print output

This is a test string containing some ANSI sequences.
Sequence 1 Valid code, invalid numbers:  >>!!!Ignoring unknown number 10!!!<<  >>!!!Ignoring unknown number 1!!!<< End of sequence 1
Sequence 2 Not an 'm'-code: End of sequence 2
Sequence 3 Color setting code:  >>setting foreground BLACK<<  >>setting background MAGENTA<< End of sequence 3
Sequence 4 Parameter setting code:  >>Calling parameter setting function 7<< End of sequence 4
Sequence 5 Color setting code spanning calls:  >>setting foreground GREY<<  >>setting background GREEN<< End of sequence 5
Sequence 6 Invalid code: nd of sequence 6
Sequence 7 A valid code at the end:  >>Calling parameter setting function 9<<


#################


def init ():              # To add to AnsiTextCtrl.__init__ ()

  import SE   # SEL is less import overhead but doesn't have interactive 
development features (not needed in production versions)

  global output  #-> For testing
  global Pre_Processor, digits_re, Colors, truncated_escape_hold   # 
global -> instance attributes

  # Screening out all ansi escape sequences except those controlling color
  grit = '\n'.join (['(%d)=' % i for i in range (128,255)]) + ' (13)= '  
# Makes 127 fixed expressions plus delete \r
  # Regular expression r'[\x80-\xff\r]' would work fine but is four 
times slower than 127 fixed expressions
  all_escapes   = r'\x1b\[\d*(;\d*)*[A-Za-z]'
  color_escapes = r'\x1b\[\d*(;\d*)*m'
  Pre_Processor = SE.SE ('%s ~%s~= ~%s~==' % (grit, all_escapes, 
color_escapes))  # SEL.SEL for production
  # 'all_escapes' also matches what 'color_escapes' matches. With 
identical regular expression matches the last regex definitions applies.

  # Isolating digits.
  digits_re = re.compile ('\d+')

  # Color numbers to color names
  Colors = SE.SE ('''
      30=BLACK    40=BLACK
      31=RED      41=RED
      32=GREEN    42=GREEN
      33=YELLOW   43=YELLOW
      34=BLUE     44=BLUE
      35=MAGENTA  45=MAGENTA
      36=CYAN     46=CYAN
      37=GREY     47=GREY
      39=GREY     49=BLACK
      <EAT>
  ''')

  truncated_escape_hold = ''  #-> self.truncated_escape_hold
  output                = ''  #-> For testing only


# What follows replaces all others of Mike's methods

def process_text (text):

  global output  #-> For testing
  global truncated_escape_hold, digits_re, Pre_Processor, Colors

  purged_text = truncated_escape_hold + Pre_Processor (text)
  # Text is now clean except for color codes beginning with ESC

  ansi_controlled_sections = purged_text.split ('\x1b')
  # Each ansi_controlled_section starts with a color control, except the 
first one (leftmost split-off)

  if ansi_controlled_sections:
     #-> self.AppendText(ansi_controlled_sections [0])             #-> 
For real
     output += ansi_controlled_sections [0]   #-> For testing      #-> 
For testing
     for section in ansi_controlled_sections [1:]:
        if section == '': continue
        try: escape_ansi_controlled_section, data = section.split ('m', 1)
        except ValueError:   # Truncated escape
           truncated_escape_hold = '\x1b' + section  # Restore ESC 
removed by split ('\x1b')
        else:
           escapes = escape_ansi_controlled_section.split (';')
           for escape in escapes:
              try: number = digits_re.search (escape).group ()
              except AttributeError:
                 output += ' >>!!!Invalid number %s!!!<<< ' % escape    
#-> For testing
                 continue
              _set_wx (number)
           #-> self.AppendText(data)     #-> For real
           output += data                #-> For testing


def _set_wx (n):

  global output  # For testing only
  global Colors

  int_n = int (n)
  if 0 <= int_n <= 9:
     #-> self._number_to_method (n)()                              #-> 
For real
     output += ' >>Calling parameter setting function %s<< ' % n   #-> 
For testing
     return
  color = Colors (n)
  if color:
     if 30 <= int_n < 50:
        if 40 <= int_n:
           #-> self.AnsiBGColor = color                            #-> 
For real
           output += ' >>setting background %s<< ' % color         #-> 
For testing
        else:
           #-> self.AnsiFGColor = color                            #-> 
For real
           output += ' >>setting foreground %s<< ' % color         #-> 
For testing
        return
  output += ' >>!!!Ignoring unknown number %s!!!<< ' % n           #-> 
For testing


#-> For real requires this in addition:
#->
#->   # Methods controlled by 'm' code 0 to 9:  # Presumably 'm'?
#->
#->   def _0 (self):
#->      self.AnsiFGColor = 'GREY'
#->      self.AnsiBGColor = 'BLACK'
#->      self.AnsiFontSize = 9
#->      self.AnsiFontFamily = wx.FONTFAMILY_TELETYPE
#->      self.AnsiFontStyle = wx.FONTSTYLE_NORMAL
#->      self.AnsiFontWeight = wx.FONTWEIGHT_NORMAL
#->      self.AnsiFontUnderline = False
#->
#->   def  _1 (self): self.AnsiFontWeight = wx.FONTWEIGHT_BOLD
#->   def  _2 (self): self.AnsiFontWeight = wx.FONTWEIGHT_LIGHT
#->   def  _3 (self): self.AnsiFontStyle = wx.FONTSTYLE_ITALIC
#->   def  _4 (self): self.AnsiFontUnderline = True
#->   def  _5 (self): pass
#->   def  _7 (self): self.AnsiFGColor, self.AnsiBGColor = 
self.AnsiBGColor, self.AnsiFGColor
#->   def  _8 (self): self.AnsiFGColor = self.AnsiBGColor
#->   def  _9 (self): pass
#->
#->
#->   _number_to_method = {
#->      '0' : _0,
#->      '1' : _1,
#->      '2' : _2,
#->      '3' : _3,
#->      '4' : _4,
#->      '7' : _7,
#->      '8' : _8,
#->      '9' : _9,
#->   }





More information about the Python-list mailing list