基本信息
源码名称:C# 代码相似度计算 例子源码下载
源码大小:0.07M
文件格式:.rar
开发语言:C#
更新时间:2014-10-19
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
文本相似度计算
文本相似度计算
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using System.Threading; //using System.Threading.Tasks; using System.IO; namespace WindowsFormsApp { public partial class Form1 : Form { public Form1() { InitializeComponent(); } String str1; //代码文件一 String str2; //代码文件二 int[] a1 = new int[30]; //分别统计以下字符数组的频度,建立一维向量数组: //X[30]={“class”,“include”,“define”,“int”,“char”,“float”,“double”,“string”,“if”, //“else”,“while”,“for”,“case”,“ ”,“-”,“*”,“/”,“%”,“*”,“&”,"return","switch","printf","scanf,""<",">","=","[","{","("} int[] a2 = new int[30]; int[] d1 = new int[7]; //用于进行结构分析Y[]={"class","include","define","{","return","void","struct"} int[] d2 = new int[7]; /// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button1_Click(object sender, EventArgs e) //点击文件1浏览选择代码文件 { OpenFileDialog openFileDialog = new OpenFileDialog(); //创建文件对话框对象 openFileDialog.Filter = "文本文件(*.txt)|*.txt"; //在对话框中显示的文件筛选器 openFileDialog.RestoreDirectory = true; //控制对话框在关闭之前恢复当前目录 if (openFileDialog.ShowDialog() == DialogResult.OK) //打开文件的窗口显示 { String filePath; filePath = openFileDialog.FileName; ////获得选择的文件路径 textBox1.Text = filePath; String fileName; fileName = Path.GetFileName(filePath); //获得文件名 richTextBox1.Text = fileName; StreamReader reader = new StreamReader(filePath, System.Text.Encoding.Default); //读文件 str1 = reader.ReadToEnd(); richTextBox1.AppendText("\r\n"); richTextBox1.AppendText(str1); } } private void button2_Click(object sender, EventArgs e) //点击文件2浏览选择代码文件 { OpenFileDialog openFileDialog = new OpenFileDialog(); //创建文件对话框对象 openFileDialog.Filter = "文本文件(*.txt)|*.txt"; //在对话框中显示的文件筛选器 openFileDialog.RestoreDirectory = true; //控制对话框在关闭之前恢复当前目录 if (openFileDialog.ShowDialog() == DialogResult.OK) //打开文件的窗口显示 { String filePath; filePath = openFileDialog.FileName; //获得选择的文件路径 textBox2.Text = filePath; String fileName; fileName = Path.GetFileName(filePath); //获得文件名 richTextBox2.Text = fileName; StreamReader reader = new StreamReader(filePath, System.Text.Encoding.Default); //读文件 str2 = reader.ReadToEnd(); richTextBox2.AppendText("\r\n"); richTextBox2.AppendText(str2); } } /// ////////////////////////////////////////////////////////////////////////// //X[30]={“class”,“include”,“define”,“int”,“char”,“float”,“double”,“string”,“if”, //“else”,“while”,“for”,“case”,“ ”,“-”,“*”,“/”,“%”,“*”,“&”,"return","switch","printf","scanf,","<",">","=","[","{","("} /// <summary> /// /// </summary> /// <param name="str"></param> private void SubstringCount1(string str) //统计字符串中的某个子串出现的次数,得到a1,d1数组 { string[] s ={"class","include","define","int","char","float","double","string","if", "else","while","for","case"," ","-","*","/","%","*","&","return","switch","printf","scanf,","<",">","=","[","{","("}; for (int i = 0; i < s.Length; i ) { a1[i] = CountBF(str, s[i]); } string[] t = { "class", "include", "define", "{", "return", "void", "struct" }; for (int i = 0; i < t.Length; i ) { d1[i] = CountBF(str, t[i]); } } /// <summary> /// /// </summary> /// <param name="str"></param> private void SubstringCount2(string str) //统计字符串中的某个子串出现的次数,得到a2,d2数组 { string[] s ={"class","include","define","int","char","float","double","string","if", "else","while","for","case"," ","-","*","/","%","*","&","return","switch","printf","scanf,","<",">","=","[","{","("}; for (int i = 0; i < s.Length; i ) { a2[i] = CountBF(str, s[i]); } string[] t = { "class", "include", "define", "{", "return", "void", "struct" }; for (int i = 0; i < t.Length; i ) { d2[i] = CountBF(str, t[i]); } } /// <summary> /// /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private int SimStruct() //判断结构是否相似 { int []state=new int [7]; for (int i = 0; i < 7; i ) { if (d1[i] == d2[i]) state[i] = 0; else state[i] = 1; } int count=0; for(int i=0;i<7;i ) { count =state[i]; } if(count==0) return 1; else return 0; } private void button3_Click(object sender, EventArgs e) ////开始计算 { double sim; if (SimStruct()==1) //结构相似 { sim = SimCount1(); //1-(|a1[i]-a2[i]|/((a1[i] a2[i])/2))之和的除以30,比较单元结构 } else //结构不同采用公共串比较 { sim = SimCount2(); //(2*公共串的长度)/(str1的长度 str2的长度) } labelCount.Text = sim.ToString("F2"); } /// <summary> /// /// </summary> /// <returns></returns> private double SimCount1() //公式获得相似度 1-(|a1[i]-a2[i]|/((a1[i] a2[i])/2))之和的除以30 { double sim = 0; SubstringCount1(str1); SubstringCount2(str2); richTextBox1.AppendText("\r\n"); richTextBox1.AppendText("关键字符向量:"); for (int i = 0; i < 30; i ) { richTextBox1.AppendText(a1[i].ToString()); richTextBox1.AppendText(","); } richTextBox2.AppendText("\r\n"); richTextBox2.AppendText("关键字符向量:"); for (int i = 0; i < 30; i ) { richTextBox2.AppendText(a2[i].ToString()); richTextBox2.AppendText(","); } double[] d = new double[30]; for (int i = 0; i < 30; i ) { d[i] = Main1(a1[i], a2[i]); } double sum = 0; for (int i = 0; i < 30; i ) { sum = d[i]; } sim = sum / 30 * 100; //sum/30 * 100 return sim; } private double SimCount2() //公式获得相似度 (2*公共串的长度)/(str1的长度 str2的长度) { double sim = 0; SubstringCount1(str1); SubstringCount2(str2); int n1 = str1.Length; int n2 = str2.Length; //字符数:n1,n2 int[] b = { 5, 7, 6, 3, 4, 5, 6, 6, 2, 4, 5, 3, 4, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1 }; richTextBox1.AppendText("\r\n"); richTextBox1.AppendText("关键字符向量:"); for (int i = 0; i < 30; i ) { richTextBox1.AppendText(a1[i].ToString()); richTextBox1.AppendText(","); } richTextBox2.AppendText("\r\n"); richTextBox2.AppendText("关键字符向量:"); for (int i = 0; i < 30; i ) { richTextBox2.AppendText(a2[i].ToString()); richTextBox2.AppendText(","); } int g = 0;//公共串的长度 for (int i = 0; i < 30; i ) { g = b[i] * (Min(a1[i], a2[i])); //公共串的长度 } double N = n1 n2; double M = 2 * g; sim = M / N*100; richTextBox1.AppendText("\r\n"); richTextBox1.AppendText(M.ToString("F2")); richTextBox1.AppendText("\r\n"); richTextBox1.AppendText(N.ToString("F2")); richTextBox1.AppendText("\r\n"); richTextBox1.AppendText(sim.ToString("F2")); return sim; } private int Min(int a, int b)//最小值 { if (a > b) return b; else return a; } /// <summary> /// /// </summary> /// <param name="a"></param> /// <param name="b"></param> /// <returns></returns> private int JD(int a, int b)//绝对值 { if (a > b) return a - b; else return b - a; } /// <summary> /// /// </summary> /// <param name="a"></param> /// <param name="b"></param> /// <returns></returns> private double Main1(int a, int b) //关键字符相似度 { int x = JD(a, b); int y = (a b) / 2; if (y != 0) { int z = x / y; return 1 - z; } else { return 1; } } /// <summary> /// /// </summary> /// <param name="S1"></param> /// <param name="S2"></param> /// <returns></returns> private int FindBF(string S1, string S2) // BF匹配算法 { int i = 0, j = 0; char[] s1 = S1.ToCharArray(); char[] s2 = S2.ToCharArray(); while (i < S1.Length && j < S2.Length) { if (s1[i] == s2[j]) { i ; j ; } else { i = i - j 1; j = 0; } } if (j >= S2.Length) return (i - j); else return -1; } /// <summary> /// /// </summary> /// <param name="str"></param> /// <param name="substr"></param> /// <returns></returns> private int CountBF(string str, string substr) // str中substr的个数 { int cnt = 0; for (int i = 0; i < str.Length; i ) { if ((i substr.Length) < str.Length) { string tmpstr = str.Substring(i, substr.Length); if (tmpstr == substr) cnt ; } } return cnt; } } }