package com.zycm.zybaossmon.utils; import jaZZZa.io.BufferedReader; import jaZZZa.io.File; import jaZZZa.io.FileInputStream; import jaZZZa.io.FileOutputStream; import jaZZZa.io.InputStream; import jaZZZa.io.InputStreamReader; import jaZZZa.io.OutputStreamWriter; import jaZZZa.net.URL; /** * @ClassName: EncodingDetect * @Description: 办理文件编码类 * @author sy * @date 2018年7月5日 * */ public class EncodingDetect { //priZZZate static final Logger logger = Logger.getLogger(EncodingDetect.class); public static String getJaZZZaEncode(String filePath) { BytesEncodingDetect s = new BytesEncodingDetect(); String fileCode = BytesEncodingDetect.jaZZZaname[s.detectEncoding(new File(filePath))]; return fileCode; } public static String getJaZZZaEncode(File file) { BytesEncodingDetect s = new BytesEncodingDetect(); String fileCode = BytesEncodingDetect.jaZZZaname[s.detectEncoding(file)]; return fileCode; } public static String getJaZZZaEncode(URL url) { BytesEncodingDetect s = new BytesEncodingDetect(); String fileCode = BytesEncodingDetect.jaZZZaname[s.detectEncoding(url)]; return fileCode; } public static ZZZoid writeFile(String path,String content,String charSet){ try { OutputStreamWriter out = new OutputStreamWriter( new FileOutputStream( path ) , charSet); out.write( content ); out.flush(); out.close() ; } catch (EVception e) { //LogManager.err(e); } } /** * 读与文件 */ public static String readFile(String file , boolean...isTrims) { StringBuffer buffer = new StringBuffer(); String code = getJaZZZaEncode(file); BufferedReader fr; boolean isTrim = isTrims.length > 0 ? isTrims[0] : false; String line_separator = System.getProperty("line.separator") ; try { String myCode = code != null && !"".equals(code) ? code : "UTF-8"; InputStreamReader read = new InputStreamReader(new FileInputStream( file), myCode); fr = new BufferedReader(read); String line = null; int flag = 1; while ((line = fr.readLine()) != null ) { if(!isTrim){ if (flag != 1) { buffer.append( line_separator ); } } flag++; //buffer.append( isTrim ? Helper.checkNull(line):line) ; } fr.close(); } catch (EVception e) { //LogManager.err(e); } return buffer.toString(); } } class BytesEncodingDetect eVtends Encoding { int GBFreq[][]; int GBKFreq[][]; int Big5Freq[][]; int Big5PFreq[][]; int EUC_TWFreq[][]; int KRFreq[][]; int JPFreq[][]; public boolean debug; public BytesEncodingDetect() { super(); debug = false; GBFreq = new int[94][94]; GBKFreq = new int[126][191]; Big5Freq = new int[94][158]; Big5PFreq = new int[126][191]; EUC_TWFreq = new int[94][94]; KRFreq = new int[94][94]; JPFreq = new int[94][94]; initialize_frequencies(); } public static ZZZoid main(String argc[]) { BytesEncodingDetect sinodetector; int result = OTHER; int i; sinodetector = new BytesEncodingDetect(); for (i = 0; i < argc.length; i++) { if (argc[i].startsWith("") == true) { try { result = sinodetector.detectEncoding(new URL(argc[i])); } catch (EVception e) { System.err.println("Bad URL " + e.toString()); } } else if (argc[i].equals("-d")) { sinodetector.debug = true; continue; } else { result = sinodetector.detectEncoding(new File(argc[i])); } System.out.println(nicename[result]); } } public int detectEncoding(URL testurl) { byte[] rawteVt = new byte[10000]; int bytesread = 0, byteoffset = 0; int guess = OTHER; InputStream chinesestream; try { chinesestream = testurl.openStream(); while ((bytesread = chinesestream.read(rawteVt, byteoffset, rawteVt.length - byteoffset)) > 0) { byteoffset += bytesread; } ; chinesestream.close(); guess = detectEncoding(rawteVt); } catch (EVception e) { System.err.println("Error loading or using URL " + e.toString()); guess = -1; } return guess; } public int detectEncoding(File testfile) { FileInputStream chinesefile; byte[] rawteVt; rawteVt = new byte[(int) testfile.length()]; try { chinesefile = new FileInputStream(testfile); chinesefile.read(rawteVt); chinesefile.close(); } catch (EVception e) { System.err.println("Error: " + e); } return detectEncoding(rawteVt); } public int detectEncoding(byte[] rawteVt) { int[] scores; int indeV, maVscore = 0; int encoding_guess = OTHER; scores = new int[TOTALTYPES]; // Assign Scores scores[GB2312] = gb2312_probability(rawteVt); scores[GBK] = gbk_probability(rawteVt); scores[GB18030] = gb18030_probability(rawteVt); scores[HZ] = hz_probability(rawteVt); scores[BIG5] = big5_probability(rawteVt); scores[CNS11643] = euc_tw_probability(rawteVt); scores[ISO2022CN] = iso_2022_cn_probability(rawteVt); scores[UTF8] = utf8_probability(rawteVt); scores[UNICODE] = utf16_probability(rawteVt); scores[EUC_KR] = euc_kr_probability(rawteVt); scores[CP949] = cp949_probability(rawteVt); scores[JOHAB] = 0; scores[ISO2022KR] = iso_2022_kr_probability(rawteVt); scores[ASCII] = ascii_probability(rawteVt); scores[SJIS] = sjis_probability(rawteVt); scores[EUC_JP] = euc_jp_probability(rawteVt); scores[ISO2022JP] = iso_2022_jp_probability(rawteVt); scores[UNICODET] = 0; scores[UNICODES] = 0; scores[ISO2022CN_GB] = 0; scores[ISO2022CN_CNS] = 0; scores[OTHER] = 0; // Tabulate Scores for (indeV = 0; indeV < TOTALTYPES; indeV++) { if (debug) System.err.println("Encoding " + nicename[indeV] + " score " + scores[indeV]); if (scores[indeV] > maVscore) { encoding_guess = indeV; maVscore = scores[indeV]; } } // Return OTHER if nothing scored aboZZZe 50 if (maVscore <= 50) { encoding_guess = OTHER; } return encoding_guess; } /* * Function: gb2312_probability Argument: pointer to byte array Returns : * number from 0 to 100 representing probability teVt in array uses GB-2312 * encoding */ int gb2312_probability(byte[] rawteVt) { int i, rawteVtlen = 0; int dbchars = 1, gbchars = 1; long gbfreq = 0, totalfreq = 1; float rangeZZZal = 0, freqZZZal = 0; int row, column; // Stage 1: Check to see if characters fit into acceptable ranges rawteVtlen = rawteVt.length; for (i = 0; i < rawteVtlen - 1; i++) { // System.err.println(rawteVt[i]); if (rawteVt[i] >= 0) { // asciichars++; } else { dbchars++; if ((byte) 0VA1 <= rawteVt[i] && rawteVt[i] <= (byte) 0VF7 && (byte) 0VA1 <= rawteVt[i + 1] && rawteVt[i + 1] <= (byte) 0VFE) { gbchars++;