Viewing file: xml2po (26.42 KB) -rwxr-xr-x Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
#!/usr/bin/python
# Copyright (c) 2004 Danilo Segan .
#
# This file is part of xml2po.
#
# xml2po is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# xml2po is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with xml2po; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# xml2po -- translate XML documents
VERSION = "0.8.0"
# Versioning system (I use this for a long time, so lets explain it to
# those Linux-versioning-scheme addicts):
# 1.0.* are unstable, development versions
# 1.1 will be first stable release (release 1), and 1.1.* bugfix releases
# 2.0.* will be unstable-feature-development stage (milestone 1)
# 2.1.* unstable development betas (milestone 2)
# 2.2 second stable release (release 2), and 2.2.* bugfix releases
# ...
#
import sys
import libxml2
import gettext
import os
import re
class NoneTranslations:
def gettext(self, message):
return None
def lgettext(self, message):
return None
def ngettext(self, msgid1, msgid2, n):
return None
def lngettext(self, msgid1, msgid2, n):
return None
def ugettext(self, message):
return None
def ungettext(self, msgid1, msgid2, n):
return None
class MessageOutput:
def __init__(self, with_translations = 0):
self.messages = []
self.comments = {}
self.linenos = {}
self.nowrap = {}
if with_translations:
self.translations = []
self.do_translations = with_translations
self.output_msgstr = 0 # this is msgid mode for outputMessage; 1 is for msgstr mode
def translationsFollow(self):
"""Indicate that what follows are translations."""
self.output_msgstr = 1
def setFilename(self, filename):
self.filename = filename
def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = 0, tag = None):
"""Adds a string to the list of messages."""
if (text.strip() != ''):
t = escapePoString(normalizeString(text, not spacepreserve))
if self.output_msgstr:
self.translations.append(t)
return
if self.do_translations or (not t in self.messages):
self.messages.append(t)
if spacepreserve:
self.nowrap[t] = 1
if t in self.linenos.keys():
self.linenos[t].append((self.filename, tag, lineno))
else:
self.linenos[t] = [ (self.filename, tag, lineno) ]
if (not self.do_translations) and comment and not t in self.comments:
self.comments[t] = comment
else:
if t in self.linenos.keys():
self.linenos[t].append((self.filename, tag, lineno))
else:
self.linenos[t] = [ (self.filename, tag, lineno) ]
if comment and not t in self.comments:
self.comments[t] = comment
def outputHeader(self, out):
import time
out.write("""msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"POT-Creation-Date: %s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME \\n"
"Language-Team: LANGUAGE \\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
""" % (time.strftime("%Y-%m-%d %H:%M%z")))
def outputAll(self, out):
self.outputHeader(out)
for k in self.messages:
if k in self.comments:
out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
references = ""
for reference in self.linenos[k]:
references += "%s:%d(%s) " % (reference[0], reference[2], reference[1])
out.write("#: %s\n" % (references))
if k in self.nowrap and self.nowrap[k]:
out.write("#, no-wrap\n")
out.write("msgid \"%s\"\n" % (k))
translation = ""
if self.do_translations:
if len(self.translations)>0:
translation = self.translations.pop(0)
out.write("msgstr \"%s\"\n\n" % (translation))
def normalizeNode(node):
#print >>sys.stderr, "<%s> (%s) [%s]" % (node.name, node.type, node.serialize('utf-8'))
if not node:
return
elif isSpacePreserveNode(node):
return
elif node.isText():
if node.isBlankNode():
if expand_entities or ( not (node.prev and not node.prev.isBlankNode()
and node.next and not node.next.isBlankNode()) ):
#print >>sys.stderr, "BLANK"
node.setContent('')
else:
node.setContent(re.sub('\s+',' ', node.content))
elif node.children and node.type == 'element':
child = node.children
while child:
normalizeNode(child)
child = child.next
def normalizeString(text, ignorewhitespace = 1):
"""Normalizes string to be used as key for gettext lookup.
Removes all unnecessary whitespace."""
if not ignorewhitespace:
return text
try:
# Lets add document DTD so entities are resolved
dtd = doc.intSubset()
tmp = dtd.serialize('utf-8')
tmp = tmp + '%s' % text
except:
tmp = '%s' % text
try:
ctxt = libxml2.createDocParserCtxt(tmp)
if expand_entities:
ctxt.replaceEntities(1)
ctxt.parseDocument()
tree = ctxt.doc()
newnode = tree.getRootElement()
except:
print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
return text
normalizeNode(newnode)
result = ''
child = newnode.children
while child:
result += child.serialize('utf-8')
child = child.next
result = re.sub('^ ','', result)
result = re.sub(' $','', result)
return result
def stringForEntity(node):
"""Replaces entities in the node."""
text = node.serialize('utf-8')
try:
# Lets add document DTD so entities are resolved
dtd = node.doc.intSubset()
tmp = dtd.serialize('utf-8') + '%s' % text
next = 1
except:
tmp = '%s' % text
next = 0
ctxt = libxml2.createDocParserCtxt(tmp)
if expand_entities:
ctxt.replaceEntities(1)
ctxt.parseDocument()
tree = ctxt.doc()
if next:
newnode = tree.children.next
else:
newnode = tree.children
result = ''
child = newnode.children
while child:
result += child.serialize('utf-8')
child = child.next
return result
def escapePoString(text):
return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
def unEscapePoString(text):
return text.replace('\\"', '"').replace('\\\\','\\')
def getTranslation(text, spacepreserve = 0):
"""Returns a translation via gettext for specified snippet.
text should be a string to look for, spacepreserve set to 1
when spaces should be preserved.
"""
#print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8'))
text = normalizeString(text, not spacepreserve)
if (text.strip() == ''):
return text
file = open(mofile, "rb")
if file:
myfallback = NoneTranslations()
gt = gettext.GNUTranslations(file)
gt.add_fallback(myfallback)
if gt:
res = gt.ugettext(text.decode('utf-8'))
return res
return text
def myAttributeSerialize(node):
result = ''
if node.children:
child = node.children
while child:
if child.type=='text':
result += doc.encodeEntitiesReentrant(child.content)
elif child.type=='entity_ref':
if not expand_entities:
result += '&' + child.name + ';'
else:
result += child.content.decode('utf-8')
else:
result += myAttributeSerialize(child)
child = child.next
else:
result = node.serialize('utf-8')
return result
def startTagForNode(node):
if not node:
return 0
result = node.name
params = ''
if node.properties:
for p in node.properties:
if p.type == 'attribute':
try:
nsprop = p.ns().name + ":" + p.name
except:
nsprop = p.name
params += " %s=\"%s\"" % (nsprop, myAttributeSerialize(p))
return result+params
def endTagForNode(node):
if not node:
return 0
result = node.name
return result
def isFinalNode(node):
if automatic:
auto = autoNodeIsFinal(node)
# Check if any of the parents is also autoNodeIsFinal,
# and if it is, don't consider this node a final one
parent = node.parent
while parent and auto:
auto = not autoNodeIsFinal(parent)
parent = parent.parent
return auto
#node.type =='text' or not node.children or
if node.type == 'element' and node.name in ultimate_tags:
return 1
elif node.children:
final_children = 1
child = node.children
while child and final_children:
if not child.isBlankNode() and child.type != 'comment' and not isFinalNode(child):
final_children = 0
child = child.next
if final_children:
return 1
return 0
def ignoreNode(node):
if automatic:
if node.type in ('dtd', 'comment'):
return 1
else:
return 0
else:
if isFinalNode(node):
return 0
if node.name in ignored_tags or node.type in ('dtd', 'comment'):
return 1
return 0
def isSpacePreserveNode(node):
pres = node.getSpacePreserve()
if pres == 1:
return 1
else:
if CurrentXmlMode and (node.name in CurrentXmlMode.getSpacePreserveTags()):
return 1
else:
return 0
def getCommentForNode(node):
"""Walk through previous siblings until a comment is found, or other element.
Only whitespace is allowed between comment and current node."""
prev = node.prev
while prev and prev.type == 'text' and prev.content.strip() == '':
prev = prev.prev
if prev and prev.type == 'comment':
return prev.content.strip()
else:
return None
def replaceNodeContentsWithText(node,text):
"""Replaces all subnodes of a node with contents of text treated as XML."""
if node.children:
starttag = startTagForNode(node)
endtag = endTagForNode(node)
# Lets add document DTD so entities are resolved
tmp = ''
try:
dtd = doc.intSubset()
tmp = tmp + dtd.serialize('utf-8')
except libxml2.treeError:
pass
content = '<%s>%s%s>' % (starttag, text, endtag)
tmp = tmp + content.encode('utf-8')
newnode = None
try:
ctxt = libxml2.createDocParserCtxt(tmp)
ctxt.replaceEntities(0)
ctxt.parseDocument()
newnode = ctxt.doc()
except:
pass
if not newnode:
print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
return
newelem = newnode.getRootElement()
if newelem and newelem.children:
free = node.children
while free:
next = free.next
free.unlinkNode()
free = next
if node:
copy = newelem.copyNodeList()
next = node.next
node.replaceNode(newelem.copyNodeList())
node.next = next
else:
# In practice, this happens with tags such as " " (only whitespace in between)
pass
else:
node.setContent(text)
def autoNodeIsFinal(node):
"""Returns 1 if node is text node, contains non-whitespace text nodes or entities."""
final = 0
if node.isText() and node.content.strip()!='':
return 1
child = node.children
while child:
if child.type in ['text'] and child.content.strip()!='':
final = 1
break
child = child.next
return final
def worthOutputting(node):
"""Returns 1 if node is "worth outputting", otherwise 0.
Node is "worth outputting", if none of the parents
isFinalNode, and it contains non-blank text and entities.
"""
worth = 1
parent = node.parent
final = isFinalNode(node) and node.name not in ignored_tags
while not final and parent:
if isFinalNode(parent):
final = 1 # reset if we've got to one final tag
if final and (parent.name not in ignored_tags) and worthOutputting(parent):
worth = 0
break
parent = parent.parent
if not worth:
return 0
return autoNodeIsFinal(node)
def processElementTag(node, replacements, restart = 0):
"""Process node with node.type == 'element'."""
if node.type == 'element':
outtxt = ''
if restart:
myrepl = []
else:
myrepl = replacements
submsgs = []
child = node.children
while child:
if (isFinalNode(child)) or (child.type == 'element' and worthOutputting(child)):
myrepl.append(processElementTag(child, myrepl, 1))
outtxt += '' % (len(myrepl))
else:
if child.type == 'element':
(starttag, content, endtag, translation) = processElementTag(child, myrepl, 0)
outtxt += '<%s>%s%s>' % (starttag, content, endtag)
else:
outtxt += doSerialize(child)
child = child.next
if mode == 'merge':
translation = getTranslation(outtxt, isSpacePreserveNode(node))
else:
translation = outtxt
starttag = startTagForNode(node)
endtag = endTagForNode(node)
worth = worthOutputting(node)
if not translation:
translation = outtxt.decode('utf-8')
if worth and mark_untranslated: node.setLang('C')
if restart or worth:
i = 0
while i < len(myrepl):
replacement = '<%s>%s%s>' % (myrepl[i][0], myrepl[i][3], myrepl[i][2])
i += 1
translation = translation.replace('' % (i), replacement)
if worth:
if mode == 'merge':
replaceNodeContentsWithText(node, translation)
else:
msg.outputMessage(outtxt, node.lineNo(), getCommentForNode(node), isSpacePreserveNode(node), tag = node.name)
return (starttag, outtxt, endtag, translation)
else:
raise Exception("You must pass node with node.type=='element'.")
def isExternalGeneralParsedEntity(node):
if (node and node.type=='entity_ref'):
try:
# it would be nice if debugDumpNode could use StringIO, but it apparently cannot
tmp = file(".xml2po-entitychecking","w+")
node.debugDumpNode(tmp,0)
tmp.seek(0)
tmpstr = tmp.read()
tmp.close()
os.remove(".xml2po-entitychecking")
except:
# We fail silently, and replace all entities if we cannot
# write .xml2po-entitychecking
# !!! This is not very nice thing to do, but I don't know if
# raising an exception is any better
return 0
if tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1:
return 1
else:
return 0
else:
return 0
def doSerialize(node):
"""Serializes a node and its children, emitting PO messages along the way.
node is the node to serialize, first indicates whether surrounding
tags should be emitted as well.
"""
if ignoreNode(node):
return ''
elif not node.children:
return node.serialize("utf-8")
elif node.type == 'entity_ref':
if isExternalGeneralParsedEntity(node):
return node.serialize('utf-8')
else:
return stringForEntity(node) #content #content #serialize("utf-8")
elif node.type == 'entity_decl':
return node.serialize('utf-8') #'<%s>%s%s>' % (startTagForNode(node), node.content, node.name)
elif node.type == 'text':
return node.serialize('utf-8')
elif node.type == 'element':
repl = []
(starttag, content, endtag, translation) = processElementTag(node, repl, 1)
return '<%s>%s%s>' % (starttag, content, endtag)
else:
child = node.children
outtxt = ''
while child:
outtxt += doSerialize(child)
child = child.next
return outtxt
def read_finaltags(filelist):
if CurrentXmlMode:
return CurrentXmlMode.getFinalTags()
else:
defaults = ['para', 'title', 'releaseinfo', 'revnumber',
'date', 'itemizedlist', 'orderedlist',
'variablelist', 'varlistentry', 'term' ]
return defaults
def read_ignoredtags(filelist):
if CurrentXmlMode:
return CurrentXmlMode.getIgnoredTags()
else:
defaults = ['itemizedlist', 'orderedlist', 'variablelist',
'varlistentry' ]
return defaults
def tryToUpdate(allargs, lang):
# Remove "-u" and "--update-translation"
print >>sys.stderr, "OVDI!"
command = allargs[0]
args = allargs[1:]
opts, args = getopt.getopt(args, 'avhm:ket:o:p:u:',
['automatic-tags','version', 'help', 'keep-entities', 'extract-all-entities', 'merge', 'translation=',
'output=', 'po-file=', 'update-translation=' ])
for opt, arg in opts:
if opt in ('-a', '--automatic-tags'):
command += " -a"
elif opt in ('-k', '--keep-entities'):
command += " -k"
elif opt in ('-e', '--extract-all-entities'):
command += " -e"
elif opt in ('-m', '--mode'):
command += " -m %s" % arg
elif opt in ('-o', '--output'):
sys.stderr.write("Error: Option '-o' is not yet supported when updating translations directly.\n")
sys.exit(8)
elif opt in ('-v', '--version'):
print VERSION
sys.exit(0)
elif opt in ('-h', '--help'):
sys.stderr.write("Error: If you want help, please use `%s --help' without '-u' option.\n" % (allargs[0]))
sys.exit(9)
elif opt in ('-u', '--update-translation'):
pass
else:
sys.stderr.write("Error: Option `%s' is not supported with option `-u'.\n" % (opt))
sys.exit(9)
while args:
command += " " + args.pop()
file = lang
sys.stderr.write("Merging translations for %s: " % (lang))
result = os.system("%s bool(false)
|