[Mailman-Developers] a translation checker

Simone Piunno pioppo@ferrara.linux.it
Wed, 15 May 2002 22:20:24 +0200


---------------------- multipart/mixed attachment

Hi everyone,

I'm trying to write a translation checker, a script that walks all
files of a given Mailman translation, searching for inconsistencies
such as wrong variable references, missing tags and so on.

I'm doing this because I'm lazy and periodically reviewing and 
validating a translation is a boring process :)

In attach you can find an early prototype, feel free to mail me
complaints, suggestions and requests.  If you think it's worth, 
I could try to generalize and do better exception handling.

At the moment I have 2 problems and I need your advice:

1. What is the best regexp to use instead of /%\(([^)]+)\)[0-9]*[sd]/
   to search for any Python %(variable)s?

2. Do you know of any existing parser I can use to read mailman.po?

Cheers,
   Simone

-- 
Simone Piunno, FerraraLUG - http://members.ferrara.linux.it/pioppo

---------------------- multipart/mixed attachment
#!/usr/bin/python
#
# transcheck - (c) 2002 by Simone Piunno <pioppo@ferrara.linux.it>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the version 2.0 of the GNU General Public License as
# published by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., 59
# Temple Place - Suite 330, Boston, MA 02111-1307, USA.

"""
Check a given Mailman translation making sure that variables and tags
referenced in translation are the same variables and tags in the 
original templates and catalog.

Usage:

cd $MAILMAN_DIR
/some/where/transcheck <lang>

Where <lang> is your country code (e.g. 'it' for Italy)
"""

import sys
import re
import os

MMTag = re.compile("<(MM-[^>]+)>");
PythonTag = re.compile("%\(([^)]+)\)[0-9]*[sd]");

def check_file(fileIT, fileEN, html=0):
	"""check a translated template against the original one
	   search also <MM-*> tags if html is not zero"""
	dict = {}
	errs = []
	it = open(fileIT)
	while 1:
		line = it.readline()
		if not line: break
		list = PythonTag.findall(line)
		if html:
			list = list + MMTag.findall(line)
		for key in list:
			if dict.has_key(key): 
				dict[key] = dict[key] + 1
			else:
				dict[key] = 1
	it.close()
	en = open(fileEN)
	while 1:
		line = en.readline()
		if not line: break
		list = PythonTag.findall(line)
		if html:
			list = list + MMTag.findall(line)
		for key in list:
			if dict.has_key(key):
				dict[key] = dict[key] - 1
			else:
				errs.append(
					"Tag \"%(tag)s\" was not found in %(file)s" % {
						'tag'  : key,
						'file' : fileIT
					}
				)
	en.close()

	for key in dict.keys():
		if dict[key] < 0:
			errs.append(
				"There are more \"%(tag)s\" tags in %(en)s than in %(it)s" % {
					'tag'  : key,
					'it' : fileIT,
					'en' : fileEN
				}
			)
		if dict[key] > 0:
			errs.append(
				"There are more \"%(tag)s\" tags in %(it)s than in %(en)s" % {
					'tag'  : key,
					'it' : fileIT,
					'en' : fileEN
				}
			)
	return errs


def check_po(file):
	status = "IDLE"
	f = open(file)
	while 1:
		line = f.readline()
		if not line: break
		# here I should parse the .po file
	f.close()

try:
	lang = sys.argv[1]
except:
	print "Usage: transcheck <lang>"
	sys.exit(1)

isHtml = re.compile("\.html$");
isTxt = re.compile("\.txt$");

for file in os.listdir("templates/" + lang + "/"):
	fileEN = "templates/en/" + file
	fileIT = "templates/" + lang + "/" + file
	errlist = []
	if isHtml.search(file):
		print "HTML checking " + fileIT + "... ",
		errlist = check_file(fileIT, fileEN, 1)
	else: 
		if isTxt.search(file):
			print "TXT  checking " + fileIT + "... ",
			errlist = check_file(fileIT, fileEN, 0)
		else:
			continue
	if errlist:
		print "FAILED"
		for err in errlist:
			print " - " + err
	else:
		print "OK"
		
file = "messages/" + lang + "/LC_MESSAGES/mailman.po"
print "PO   checking " + file + "... ",
errlist = check_po(file)
if errlist:
	print "FAILED"
	for err in errlist:
		print " - " + err
else:
	print "OK"

---------------------- multipart/mixed attachment--