[I18n-sig] pygettext dilemma

Bruno Haible haible@ilog.fr
Fri, 27 Jul 2001 19:38:52 +0200 (CEST)


Barry A. Warsaw wrote:
> I tried to extract the two classes of files in two separate
> pygettext.py steps

That's most reasonable. It allows you to use different
xgettext/pygettext arguments for the two sets of files.

> but had trouble merging the resulting files.  You
> can't merge them with msgmerge because that program seems to just drop
> all the entries from the second file (I'm guessing since there's no
> overlap between the first and second files).

msgcomm is not really made for this task. gettext-0.11 will contain an
'msgcat' command, which works well for these cases.

In the meantime, I can recommend to 'cat' the two pot files and run
'msguniq' on the result. 'msguniq' will also be in gettext-0.11, but
here is an equivalent implementation in a Python like language (<g>).

Bruno

============================ msguniq =============================
#!/usr/local/bin/clisp -C

;;; Remove duplicates in message catalogs.
;;; Bruno Haible 28.3.1997

;; This could roughly be implemented as
;;   cp INPUT temp1
;;   cp INPUT temp2
;;   msgcomm --more-than=1 -w 1000 -o OUTPUT temp1 temp2
;; but this has the drawback that
;;  - msgcomm doesn't seem to be made for this.

;; This could also be roughly implemented as
;;   xgettext -d - --omit-header -w 1000 INPUT > OUTPUT
;; but this has the drawbacks that
;;  - it sometimes reverses the list of lines belonging to the hunk,
;;  - it removes the header.

;; When gettext-0.11 is releases, this could also be implemented as
;;   msguniq INPUT -w 1000 -o OUTPUT
;; without any drawbacks!

;; Additionally, messages translations in OLD override the ones in INPUT.

(defstruct message
  lines        ; list of all lines belonging to the hunk
  msgid        ; nil or a string
  msgstr       ; nil or a string
  occurs       ; list of strings "file:nn" where the message occurs
)

(defun main (infilename outfilename &optional oldfilename)
  (declare (type string infilename outfilename))
  #+UNICODE (setq *default-file-encoding* charset:iso-8859-1)
  (let ((hunk-list nil) ; list of all hunks
        (hunk-table (make-hash-table :test #'equal))
          ; (gethash msgid hunk-table) is the hunk who has the given msgid
        (eof "EOF")
       )
    (flet ((read-hunk (istream) ; reads a hunk, returns nil on eof
             (let ((line nil) (lines nil) (occurs nil))
               (loop
                 (setq line (read-line istream nil eof))
                 (when (eql line eof) (return))
                 (if (equal line "")
                   (when lines (return))
                   (progn
                     (push line lines)
                     (when (and (>= (length line) 3) (string= line "#: " :end1 3))
                       (push (subseq line 3) occurs)
                 ) ) )
               )
               (when lines
                 (setq lines (nreverse lines))
                 (setq occurs (nreverse occurs))
                 (flet ((line-group (id &aux (idlen (length id)))
                          (let ((l (member-if
                                     #'(lambda (line)
                                         (and (>= (length line) idlen)
                                              (string= line id :end1 idlen)
                                       ) )
                                     lines
                               ))  )
                            (when l
                              (setq l (cons (subseq (car l) idlen) (cdr l)))
                              (let ((i (position-if-not
                                         #'(lambda (line)
                                             (and (plusp (length line))
                                                  (eql (char line 0) #\")
                                           ) )
                                         l
                                   ))  )
                                (subseq l 0 i)
                       )) ) ) )
                   (let ((msgid (line-group "msgid "))
                         (msgstr (line-group "msgstr ")))
                     (make-message :lines lines
                                   :msgid msgid
                                   :msgstr msgstr
                                   :occurs occurs
               ) ) ) )
          )) )
      (with-open-file (istream infilename :direction :input)
        (loop
          (let ((hunk (read-hunk istream)))
            (unless hunk (return))
            (if (null (message-msgid hunk))
              (push hunk hunk-list)
              (let ((other-hunk (gethash (message-msgid hunk) hunk-table)))
                (if (not other-hunk)
                  (progn
                    (push hunk hunk-list)
                    (setf (gethash (message-msgid hunk) hunk-table) hunk)
                  )
                  (progn
                    (unless (equal (message-msgstr hunk)
                                   (message-msgstr other-hunk)
                            )
                      (warn "Same message, different translations: ~A and ~A"
                            (message-occurs hunk) (message-occurs other-hunk)
                    ) )
                    (setf (message-occurs other-hunk)
                          (append (message-occurs other-hunk)
                                  (message-occurs hunk)
                    )     )
        ) ) ) ) ) )
        (setq hunk-list (nreverse hunk-list))
      )
      (when oldfilename
        (with-open-file (istream oldfilename :direction :input)
          (loop
            (let ((hunk (read-hunk istream)))
              (unless hunk (return))
              (unless (null (message-msgid hunk))
                (let ((other-hunk (gethash (message-msgid hunk) hunk-table)))
                  (when other-hunk
                    (setf (message-msgstr other-hunk) (message-msgstr hunk))
      ) ) ) ) ) ) )
      (with-open-file (ostream outfilename :direction :output)
        (flet ((print-hunk (hunklistr)
                 (let* ((hunk (car hunklistr))
                        (lines (message-lines hunk))
                        (msgid (message-msgid hunk))
                        (msgstr (message-msgstr hunk))
                        (occurs (message-occurs hunk)))
                   (dolist (line lines)
                     (cond ((and (>= (length line) 3) (string= line "#: " :end1 3))
                            (when occurs
                              (format ostream "#: ~{~A~^ ~}~%" occurs)
                              (setq occurs nil)
                           ))
                           ((and (>= (length line) 1) (string= line "#" :end1 1))
                            (format ostream "~A~%" line)
                           )
                           ((and (>= (length line) 6) (string= line "msgid " :end1 6))
                            (format ostream "msgid ~{~A~%~}" msgid)
                           )
                           ((and (>= (length line) 7) (string= line "msgstr " :end1 7))
                            (format ostream "msgstr ~{~A~%~}" msgstr)
                           )
                   ) )
                   (when (cdr hunklistr) (format ostream "~%"))
              )) )
          (mapl #'print-hunk hunk-list)
        )
) ) ) )

(main (first *args*) (second *args*) (third *args*))