[I18n-sig] pygettext dilemma
Bruno Haible
haible@ilog.fr
Fri, 27 Jul 2001 19:38:52 +0200 (CEST)
Barry A. Warsaw wrote:
> I tried to extract the two classes of files in two separate
> pygettext.py steps
That's most reasonable. It allows you to use different
xgettext/pygettext arguments for the two sets of files.
> but had trouble merging the resulting files. You
> can't merge them with msgmerge because that program seems to just drop
> all the entries from the second file (I'm guessing since there's no
> overlap between the first and second files).
msgcomm is not really made for this task. gettext-0.11 will contain an
'msgcat' command, which works well for these cases.
In the meantime, I can recommend to 'cat' the two pot files and run
'msguniq' on the result. 'msguniq' will also be in gettext-0.11, but
here is an equivalent implementation in a Python like language (<g>).
Bruno
============================ msguniq =============================
#!/usr/local/bin/clisp -C
;;; Remove duplicates in message catalogs.
;;; Bruno Haible 28.3.1997
;; This could roughly be implemented as
;; cp INPUT temp1
;; cp INPUT temp2
;; msgcomm --more-than=1 -w 1000 -o OUTPUT temp1 temp2
;; but this has the drawback that
;; - msgcomm doesn't seem to be made for this.
;; This could also be roughly implemented as
;; xgettext -d - --omit-header -w 1000 INPUT > OUTPUT
;; but this has the drawbacks that
;; - it sometimes reverses the list of lines belonging to the hunk,
;; - it removes the header.
;; When gettext-0.11 is releases, this could also be implemented as
;; msguniq INPUT -w 1000 -o OUTPUT
;; without any drawbacks!
;; Additionally, messages translations in OLD override the ones in INPUT.
(defstruct message
lines ; list of all lines belonging to the hunk
msgid ; nil or a string
msgstr ; nil or a string
occurs ; list of strings "file:nn" where the message occurs
)
(defun main (infilename outfilename &optional oldfilename)
(declare (type string infilename outfilename))
#+UNICODE (setq *default-file-encoding* charset:iso-8859-1)
(let ((hunk-list nil) ; list of all hunks
(hunk-table (make-hash-table :test #'equal))
; (gethash msgid hunk-table) is the hunk who has the given msgid
(eof "EOF")
)
(flet ((read-hunk (istream) ; reads a hunk, returns nil on eof
(let ((line nil) (lines nil) (occurs nil))
(loop
(setq line (read-line istream nil eof))
(when (eql line eof) (return))
(if (equal line "")
(when lines (return))
(progn
(push line lines)
(when (and (>= (length line) 3) (string= line "#: " :end1 3))
(push (subseq line 3) occurs)
) ) )
)
(when lines
(setq lines (nreverse lines))
(setq occurs (nreverse occurs))
(flet ((line-group (id &aux (idlen (length id)))
(let ((l (member-if
#'(lambda (line)
(and (>= (length line) idlen)
(string= line id :end1 idlen)
) )
lines
)) )
(when l
(setq l (cons (subseq (car l) idlen) (cdr l)))
(let ((i (position-if-not
#'(lambda (line)
(and (plusp (length line))
(eql (char line 0) #\")
) )
l
)) )
(subseq l 0 i)
)) ) ) )
(let ((msgid (line-group "msgid "))
(msgstr (line-group "msgstr ")))
(make-message :lines lines
:msgid msgid
:msgstr msgstr
:occurs occurs
) ) ) )
)) )
(with-open-file (istream infilename :direction :input)
(loop
(let ((hunk (read-hunk istream)))
(unless hunk (return))
(if (null (message-msgid hunk))
(push hunk hunk-list)
(let ((other-hunk (gethash (message-msgid hunk) hunk-table)))
(if (not other-hunk)
(progn
(push hunk hunk-list)
(setf (gethash (message-msgid hunk) hunk-table) hunk)
)
(progn
(unless (equal (message-msgstr hunk)
(message-msgstr other-hunk)
)
(warn "Same message, different translations: ~A and ~A"
(message-occurs hunk) (message-occurs other-hunk)
) )
(setf (message-occurs other-hunk)
(append (message-occurs other-hunk)
(message-occurs hunk)
) )
) ) ) ) ) )
(setq hunk-list (nreverse hunk-list))
)
(when oldfilename
(with-open-file (istream oldfilename :direction :input)
(loop
(let ((hunk (read-hunk istream)))
(unless hunk (return))
(unless (null (message-msgid hunk))
(let ((other-hunk (gethash (message-msgid hunk) hunk-table)))
(when other-hunk
(setf (message-msgstr other-hunk) (message-msgstr hunk))
) ) ) ) ) ) )
(with-open-file (ostream outfilename :direction :output)
(flet ((print-hunk (hunklistr)
(let* ((hunk (car hunklistr))
(lines (message-lines hunk))
(msgid (message-msgid hunk))
(msgstr (message-msgstr hunk))
(occurs (message-occurs hunk)))
(dolist (line lines)
(cond ((and (>= (length line) 3) (string= line "#: " :end1 3))
(when occurs
(format ostream "#: ~{~A~^ ~}~%" occurs)
(setq occurs nil)
))
((and (>= (length line) 1) (string= line "#" :end1 1))
(format ostream "~A~%" line)
)
((and (>= (length line) 6) (string= line "msgid " :end1 6))
(format ostream "msgid ~{~A~%~}" msgid)
)
((and (>= (length line) 7) (string= line "msgstr " :end1 7))
(format ostream "msgstr ~{~A~%~}" msgstr)
)
) )
(when (cdr hunklistr) (format ostream "~%"))
)) )
(mapl #'print-hunk hunk-list)
)
) ) ) )
(main (first *args*) (second *args*) (third *args*))