「cpdetector, free java code page detection.」,這是另一套編碼偵測的解決方案(Java)~ 同時也包含了Mozilla's chardet (jchardet)~
另外根據「Shared Development: Character encoding detection」所針對「cpdetector」的編碼測試~ 它所顯現的成果的確相當顯著~ 有需要的人用看看吧~
範例程式
import java.io.File; import java.io.IOException; import java.net.URL; import java.nio.charset.Charset; import cpdetector.io.CodepageDetectorProxy; import cpdetector.io.HTMLCodepageDetector; import cpdetector.io.JChardetFacade; public class CPdetector { private static CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance(); static { detector.add(new HTMLCodepageDetector(false)); detector.add(JChardetFacade.getInstance()); } public String getEncoding(File f)throws Exception { return getEncoding(f.toURI().toURL()); } public String getEncoding(URL url)throws IOException { Charset charset = detector.detectCodepage(url); if (charset != null) return charset.name(); return null; } public static void main(String[] args) { CPdetector detector = new CPdetector(); try { String encoding = detector.getEncoding(new File("Big5.txt")); System.out.println("encoding:"+encoding); encoding = detector.getEncoding(new URL("http://www.google.com.tw")); System.out.println("encoding:"+encoding); }catch(Exception e) { e.printStackTrace(); } } }
範例結果:
encoding:UTF-8 encoding:Big5