import pandas as pd import random import numpy as np def cos_sim(v1, v2): return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) FNAME="label_average_cosine_50_cluter_3times_16only.csv" FNAME2="label_average_cosine_50_cluter_3times_31only.csv" CLUSTER_NUM =16 CLUSTER_NUM2 =31 print("CSV list reading_16...") df = pd.read_csv(FNAME,delimiter =",", header=None)#csvを読み込み dict ={} name_list =[] for i in range(len(df)): list_a =df.iloc[i,1:50].tolist() name_list.append(df.iat[i,0]) dict[df.iat[i,0]] = np.array(list_a) #print(dict) #print(name_list) print("CSV list reading_31...") df2 = pd.read_csv(FNAME2,delimiter =",", header=None)#csvを読み込み dict2 ={} name_list2 =[] for i in range(len(df2)): list_a =df2.iloc[i,1:50].tolist() name_list2.append(df2.iat[i,0]) dict2[df2.iat[i,0]] = np.array(list_a) dict_check =[] name_cossim_list =[] for i in range(len(name_list)): for j in range(len(name_list2)): name_cossim_list.append([name_list[i],name_list2[j],cos_sim(dict[name_list[i]],dict2[name_list2[j]])]) df3 = pd.DataFrame(name_cossim_list,columns=['name1','name2','コサイン類似度']) df3.to_csv("cos_sim_all_for_"+str(CLUSTER_NUM)+"_"+str(CLUSTER_NUM2)+".csv",index=False,encoding="utf-8-sig")