Rebuilt collation sequence also failing in tests
Posted by mholmes on 14 Oct 2010 in Activity log
I've rewritten the RuleBased Collator -- source code is below. However, when invoked in the Moses web application, it appears not to be working at all. The next stage will be writing a test package to test it within the Java environment to see if it's the Java that's broken, or something in the XSLT or Saxon which is failing to use it properly. Source code:
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package ca.uvic.hcmc.moses;
/**
*
* @author mholmes
*/
import java.text.ParseException;
import java.text.RuleBasedCollator;
public class MosesCollation extends RuleBasedCollator{
public MosesCollation() throws ParseException
{
super(mosesRules);
}
private static String glottal = new String("\u0294");
private static String a = new String("a,a\u0301,a\u0300,\u00e1,\u00e0");
private static String aDot = new String("\u1ea1,a\u0323,\u1ea1\u0301,a\u0323\u0300,\u00e1\u0323,\u00e0\u0323");
private static String cDot = new String("c\u0323");
private static String cApos = new String("c\u02bc");
private static String schwa = new String("\u0259,\u0259\u0301,\u0259\u0300");
private static String schwaDot = new String("\u0259\u0323,\u0259\u0323\u0301,\u0259\u0323\u0300");
private static String hDot = new String("\u1e25,h\u0323");
private static String hDotW = new String("\u1e25\u02b7,h\u0323\u02b7");
private static String i = new String("i,i\u0301,i\u0300,\u00ed,\u00ec");
private static String iDot = new String("\u1ecb,\u1ecb\u0301,\u1ecb\u0300,i\u0323\u0301,i\u0323\u0300,\u00ed\u0323,\u00ec\u0323");
private static String kApos = new String("k\u02bc");
private static String kW = new String("k\u02b7");
private static String kAposW = new String("k\u02bc\u027b");
private static String lDot = new String("\u1e37,l\u0323");
private static String lGlot = new String("l\u02c0");
private static String lDotGlot = new String("\u1e37\u02c0,l\u0323\u02c0");
private static String lBelt = new String("\026c");
private static String barLamApos = new String("\u019b\u02bc");
private static String mGlot = new String("m\u02c0");
private static String nGlot = new String("n\u02c0");
private static String pApos = new String("p\u02bc");
private static String qApos = new String("q\u02bc");
private static String qW = new String("q\u02b7");
private static String qAposW = new String("q\u02bc\u027b");
private static String rGlot = new String("r\u02c0");
private static String sDot = new String("\u1e63,s\u0323");
private static String tApos = new String("t\u02bc");
private static String u = new String("u,u\u0301,u\u0300,\u00fa,\u00f9");
private static String uDot = new String("\u1ee5,\u1ee5\u0301,\u1ee5\u0300,u\u0323\u0301,u\u0323\u0300,\u00fa\u0323,\u00f9\u0323");
private static String wGlot = new String("w\u02c0");
private static String xW = new String("x\u02b7");
private static String xDot = new String("x\u0323");
private static String xDotW = new String("x\u0323\u02b7");
private static String yGlot = new String("y\u02c0");
private static String phar = new String("\u0295");
private static String pharGlot = new String("\u0295\u02c0");
private static String pharW = new String("\u0295\u02b7");
private static String pharGlotW = new String("\u0295\u02c0\u02b7");
private static String mosesRules =
("< " + glottal + " < " + a + "a < " + aDot + " < c " +
" < " + cDot + "̣ < " + cApos + " < " + schwa + " < " + schwaDot +
" < h < " + hDot + " < " + hDotW + " < i " +
" < " + iDot + " < k < " + kApos + " < " + kW +
" < " + kAposW + " < l < " + lDot + " < " + lGlot +
" < " + lDotGlot + " < " + lBelt + " < " + barLamApos + " < m " +
" < " + mGlot + " < n < " + nGlot + " < p " +
" < " + pApos + " < q < " + qApos + " < " + qW +
" < " + qAposW + " < r < " + rGlot + " < s " +
" < " + sDot + " < t < " + tApos + " < " + u +
" < " + uDot + " < w < " + wGlot + " < x " +
" < " + xW + " < " + xDot + " < " + xDotW + " < y " +
" < " + yGlot + " < " + phar + " < " + pharGlot + " < " + pharW +
" < " + pharGlotW);
}