Archiwum

Posts Tagged ‘translation’

Internationalization of Web DynPro component made easier

2009-12-28 Komentarze wyłączone

Internationalization (i18n) is very important issue for most of web systems, and almost every significant framework provides its tools for it. So this is with SAP’s Web DynPro. The tool is using modified XLIFF format (known as S2X), in which for each national version there exists single XLF file. Naming convention is similar to this from Java properties file. File of name MyView.wdview.xlf will have German version MyView.wdview_de.xlf, Russian version MyView.wdview_ru.xlf etc.

Editing XML files is no way so simple and human-friendly as editing properties files, so SAP provides its tool for that job (s2x editor). However, the tool is buggy and very unergonomic to use. Additionally the initial job to translate everything, including banal formulas such as month and city names, standard menu positions etc is boring, replicative and unchallenging.

This is why I’ve came up with idea of automating this process. I’ve used the most automatic option which means translating all values using online translating service, Google Translate f.g. However, exchanging GoogleTranslate class with other implementation, f.g. reading text files prepared by human translator, it can be used to make translation process more convenient efficient.

Here goes the code that automatically translates given resource to a few national versions.







package pl.linfo.lang.xlf;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import pl.linfo.lang.translate.GoogleTranslator;

public class SapXlfProcessor {
  
  private String sourceLang = "pl";
  private String destLang = "en";
  
  private Document doc;
  
  public void process(String xlfPaththrows SAXException, IOException, ClassCastException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    File xlfFile = new File(xlfPath);
    DOMParser parser = new DOMParser();
    parser.parse(new InputSource(new FileInputStream(xlfFile)));
    doc = parser.getDocument();
    Node root = doc.getFirstChild();
    processNodes(root);
  }
  
  public void dump(PrintStream psthrows ClassCastException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    DOMImplementation implementation= DOMImplementationRegistry.newInstance()
    .getDOMImplementation("XML 3.0");
    DOMImplementationLS feature = (DOMImplementationLSimplementation.getFeature("LS",
    "3.0");
    LSSerializer serializer = feature.createLSSerializer();
    LSOutput output = feature.createLSOutput();
    output.setByteStream(ps);
    serializer.write(doc, output);
  }

  private void processNode(Node item) {
    if ("body".equals(item.getNodeName())) {
      processBody(item);
    else if ("file".equals(item.getNodeName())) {
      ((Elementitem).setAttribute("source-language", destLang);
      processNodes(item);
    else if ("originalLocale".equals(item.getNodeName())) {
      ((Elementitem).setAttribute("xml:lang", destLang);
    else {// if ("header".equals(item.getNodeName()) || "giltDirectives".equals(item.getNodeName())) {
      processNodes(item);
    }
  }

  private void processNodes(Node item) {
    NodeList nl = item.getChildNodes();
    for (int i=0;i<nl.getLength();i++) {
      processNode(nl.item(i));
    }
  }

  private void processBody(Node item) {
    // process all groups
    List<Element> transUnits = new ArrayList<Element>();
    for (int i=0;i<item.getChildNodes().getLength();i++) {
      Node groupNode = item.getChildNodes().item(i);
      for (int j=0;j<groupNode.getChildNodes().getLength();j++) {
        Node unitNode = groupNode.getChildNodes().item(j);
        if ("trans-unit".equals(unitNode.getNodeName())) {
          transUnits.add((ElementunitNode);
        }
      }
    }
    // now we can do translation
    if (transUnits.size() == 0) {
      System.out.println("Nothing to translate");
      return ;
    }
    String[] values = new String[transUnits.size()];
    for (int i=0;i<transUnits.size();i++) {
      Element transUnit = transUnits.get(i);
      NodeList sources = transUnit.getElementsByTagName("source");
      if (sources.getLength() == 1) {
        values[i((Elementsources.item(0)).getTextContent();
      else 
        values[i""
    }
    String[] translated = new String[values.length];
//    List<String> toTranslate = new ArrayList<String>();
//    for (int i=0;i<values.length;i++) {
//      translated[i] = new GoogleTranslator().translate(text, sourceLang, destLang)
//    }
    try {
      translated = new GoogleTranslator().translate(values, sourceLang, destLang);
    catch (Exception e) {
      e.printStackTrace();
      return;
    }
    for (int i=0;i<transUnits.size();i++) {
      Element transUnit = transUnits.get(i);
      NodeList sources = transUnit.getElementsByTagName("source");
      if (sources.getLength() == 1) {
        ((Elementsources.item(0)).setTextContent(translated[i]);
      }
    }  
  }
  
  public static void main(String[] argsthrows Exception {
    String fileName = "MyView.wdview.xlf";
    String[] targetLangs = new String[]{"en""de""ru""uk"};
    SapXlfProcessor processor = new SapXlfProcessor();
    for (String targetLang : targetLangs) {
      processor.destLang = targetLang;
      processor.process(fileName);
      String dumpFile = new File(fileName).getName();
      dumpFile = "tmp/" + dumpFile.replace(".xlf""_"+targetLang+".xlf");
      processor.dump(new PrintStream(dumpFile));
    }
  }

}

Java2html


As you will notice, ukrainian language is referenced as uk, not ua. This can be quite confusing for users getting error Can’t set language. The full list of country codes used by SAP is listed in following PDF document.

The implementation of GoogleTranslate is using Google Translate API library and is listed here:






package pl.linfo.lang.translate;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.TextExtractor;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;

import com.google.api.translate.Language;
import com.google.api.translate.Translate;

public class GoogleTranslator {
  
  private HttpClient httpClient;
  
  private Source currentPage;
  
  public GoogleTranslator() throws IllegalStateException, IOException {
    httpClient = new DefaultHttpClient();
    httpClient.getParams().setParameter("http.protocol.expect-continue"false);
    httpClient.getParams().setParameter("http.connection.timeout"180*1000);

    Translate.setHttpReferrer("http://translate.google.pl&#34;);
    HttpGet httpget = new HttpGet("http://translate.google.pl&#34;);
    HttpResponse response = httpClient.execute(httpget);
    HttpEntity entity = response.getEntity();
    currentPage = new Source(entity.getContent());
  }
  
  public String[] translate(String[] text, String sourceLang, String destLangthrows Exception {
    
    Language sl = Language.fromString(sourceLang);
    Language tl = Language.fromString(destLang);
    
    // do grouping
    int ptr = 0;
    int len = 0;
    int max_len = 500;
    String[] result = new String[text.length];
    List<String> tmp = new ArrayList<String>();
    for (int i=0;i<text.length;i++) {
      
      int strlen = text[i].length();
      len += strlen;
      tmp.add(text[i]);
      
      if (i+1==text.length || len >= max_len) {
        System.out.println("Doing request with " + len + " bytes");
        String[] tmp2 = Translate.execute(tmp.toArray(new String[0]), sl, tl);
        for (int j=0;j<tmp2.length;j++)
          result[ptr+j= tmp2[j];
        ptr = i+1;
        len = 0;
        tmp.clear();
      }
    }
      return result;
  }   
  
}

Java2html