// 数据结构解析:<单词,二维数组>,其中单词表示公共词, // 二维数组一维度表示句子一的向量,另一维度表示句子二的向量 public class CosSimTextSim { Map<String, float[]> vectorMap = new HashMap<String, float[]>(); float[] tempArray = null;
public CosSimTextSim(String string1, String string2) { String stri2 = null; float stri1 = 0.00f; for (String str2 : string2.split(" ")) { if (vectorMap.containsKey(str2)) { vectorMap.get(str2)[1]++; } else { tempArray = new float[2]; tempArray[0] = 1; tempArray[1] = 0; vectorMap.put(str2, tempArray); } } for (String str2 : string2.split(" ")) { if (vectorMap.containsKey(str2)) { vectorMap.get(str2)[1]++; } else { tempArray = new float[2]; tempArray[0] = 0; tempArray[1] = 1; vectorMap.put(str2, tempArray); } } for (Entryentry : vectorMap.entrySet()) { }}// 求余弦相似度public double sim() { double result = 0; result = pointMulti(vectorMap) / sqrtMulti(vectorMap); return result;}private double sqrtMulti(Map vectorMap2) { double result = 0; result = squares(vectorMap2); result = Math.sqrt(result); return result;}// 求平方和private double squares(Map vectorMap2) { double result1 = 0; double result2 = 0; Set keySet = vectorMap2.keySet(); for (String str : keySet) { float[] temp = vectorMap2.get(str); result1 += (temp[0] * temp[0]); result2 += (temp[1] * temp[1]); } return result1 * result2;}// 点乘法private double pointMulti(Map vectorMap2) { double result = 0; Set keySet = vectorMap2.keySet(); for (String str : keySet) { float[] temp = vectorMap2.get(str); result += (temp[0] * temp[1]); } return result;}
}