基本信息
源码名称:C# 代码相似度计算 例子源码下载
源码大小:0.07M
文件格式:.rar
开发语言:C#
更新时间:2014-10-19
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300

本次赞助数额为: 2 元 
   源码介绍
文本相似度计算

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Threading;
//using System.Threading.Tasks;
using System.IO;
namespace WindowsFormsApp
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }
        String str1;     //代码文件一
        String str2;     //代码文件二
        int[] a1 = new int[30];    //分别统计以下字符数组的频度,建立一维向量数组:
        //X[30]={“class”,“include”,“define”,“int”,“char”,“float”,“double”,“string”,“if”,
        //“else”,“while”,“for”,“case”,“ ”,“-”,“*”,“/”,“%”,“*”,“&”,"return","switch","printf","scanf,""<",">","=","[","{","("}
        int[] a2 = new int[30];
        int[] d1 = new int[7];   //用于进行结构分析Y[]={"class","include","define","{","return","void","struct"}
        int[] d2 = new int[7];
        /// <summary>
        /// 
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void button1_Click(object sender, EventArgs e)                //点击文件1浏览选择代码文件
        { 
            OpenFileDialog openFileDialog = new OpenFileDialog();              //创建文件对话框对象
            openFileDialog.Filter = "文本文件(*.txt)|*.txt";                   //在对话框中显示的文件筛选器
            openFileDialog.RestoreDirectory = true;                            //控制对话框在关闭之前恢复当前目录
            if (openFileDialog.ShowDialog() == DialogResult.OK)                //打开文件的窗口显示
            {
                String filePath;
                filePath = openFileDialog.FileName;                            ////获得选择的文件路径
                textBox1.Text = filePath;
                String fileName;
                fileName = Path.GetFileName(filePath);           //获得文件名 
                richTextBox1.Text = fileName;
                StreamReader reader = new StreamReader(filePath, System.Text.Encoding.Default);  //读文件
                str1 = reader.ReadToEnd();
                richTextBox1.AppendText("\r\n");
                richTextBox1.AppendText(str1);
            }
        }

        private void button2_Click(object sender, EventArgs e)                //点击文件2浏览选择代码文件
        {
            OpenFileDialog openFileDialog = new OpenFileDialog();              //创建文件对话框对象
            openFileDialog.Filter = "文本文件(*.txt)|*.txt";                   //在对话框中显示的文件筛选器
            openFileDialog.RestoreDirectory = true;                            //控制对话框在关闭之前恢复当前目录
            if (openFileDialog.ShowDialog() == DialogResult.OK)                //打开文件的窗口显示
            {
                String filePath;
                filePath = openFileDialog.FileName;                            //获得选择的文件路径
                textBox2.Text = filePath;
                String fileName;
                fileName = Path.GetFileName(filePath);           //获得文件名 
                richTextBox2.Text = fileName;
                StreamReader reader = new StreamReader(filePath, System.Text.Encoding.Default);  //读文件
                str2 = reader.ReadToEnd();
                richTextBox2.AppendText("\r\n");
                richTextBox2.AppendText(str2);
            }

        }
        /// //////////////////////////////////////////////////////////////////////////
        //X[30]={“class”,“include”,“define”,“int”,“char”,“float”,“double”,“string”,“if”,
        //“else”,“while”,“for”,“case”,“ ”,“-”,“*”,“/”,“%”,“*”,“&”,"return","switch","printf","scanf,","<",">","=","[","{","("}
        /// <summary>
        /// 
        /// </summary>
        /// <param name="str"></param>
        private void SubstringCount1(string str)                                               //统计字符串中的某个子串出现的次数,得到a1,d1数组
        {
            string[] s ={"class","include","define","int","char","float","double","string","if",
                        "else","while","for","case"," ","-","*","/","%","*","&","return","switch","printf","scanf,","<",">","=","[","{","("};
            for (int i = 0; i < s.Length; i  )
            {
                a1[i] = CountBF(str, s[i]);
            }
            string[] t = { "class", "include", "define", "{", "return", "void", "struct" };
            for (int i = 0; i < t.Length; i  )
            {
                d1[i] = CountBF(str, t[i]);
            }
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="str"></param>
        private void SubstringCount2(string str)                                               //统计字符串中的某个子串出现的次数,得到a2,d2数组
        {
            string[] s ={"class","include","define","int","char","float","double","string","if",
                        "else","while","for","case"," ","-","*","/","%","*","&","return","switch","printf","scanf,","<",">","=","[","{","("};
            for (int i = 0; i < s.Length; i  )
            {
                a2[i] = CountBF(str, s[i]);
            }
            string[] t = { "class", "include", "define", "{", "return", "void", "struct" };
            for (int i = 0; i < t.Length; i  )
            {
                d2[i] = CountBF(str, t[i]);
            }
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private int SimStruct()                 //判断结构是否相似
        {
            int []state=new int [7];
            for (int i = 0; i < 7; i  )
            {
                if (d1[i] == d2[i])
                    state[i] = 0;
                else
                    state[i] = 1;
            }
            int count=0;
            for(int i=0;i<7;i  )
            {
                count =state[i];
            }
            if(count==0)
                return 1;
            else 
                return 0;
        }

        private void button3_Click(object sender, EventArgs e)                                ////开始计算
        {
            double sim;
            if (SimStruct()==1)         //结构相似
            {
                sim = SimCount1();   //1-(|a1[i]-a2[i]|/((a1[i] a2[i])/2))之和的除以30,比较单元结构
            }
            else                  //结构不同采用公共串比较
            {
                sim = SimCount2();    //(2*公共串的长度)/(str1的长度 str2的长度)
            }
            labelCount.Text = sim.ToString("F2");
        }
        /// <summary>
        /// 
        /// </summary>
        /// <returns></returns>
        private double SimCount1() //公式获得相似度     1-(|a1[i]-a2[i]|/((a1[i] a2[i])/2))之和的除以30
        {
            double sim = 0;


            SubstringCount1(str1);
            SubstringCount2(str2);
            richTextBox1.AppendText("\r\n");
            richTextBox1.AppendText("关键字符向量:");
            for (int i = 0; i < 30; i  )
            {
                richTextBox1.AppendText(a1[i].ToString());
                richTextBox1.AppendText(",");
            }
            richTextBox2.AppendText("\r\n");
            richTextBox2.AppendText("关键字符向量:");
            for (int i = 0; i < 30; i  )
            {
                richTextBox2.AppendText(a2[i].ToString());
                richTextBox2.AppendText(",");
            }
            double[] d = new double[30];
            for (int i = 0; i < 30; i  )
            {
                d[i] = Main1(a1[i], a2[i]);
            }
            double sum = 0;
            for (int i = 0; i < 30; i  )
            {
                sum  = d[i];
            }
            sim = sum / 30 * 100;                   //sum/30 * 100
            return sim;
        }
        private double SimCount2() //公式获得相似度     (2*公共串的长度)/(str1的长度 str2的长度)
        {
            double sim = 0;


            SubstringCount1(str1);
            SubstringCount2(str2);
            int n1 = str1.Length;
            int n2 = str2.Length;                      //字符数:n1,n2 
            int[] b = { 5, 7, 6, 3, 4, 5, 6, 6, 2, 4, 5, 3, 4, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 5, 1, 1, 1, 1, 1, 1 };
            richTextBox1.AppendText("\r\n");
            richTextBox1.AppendText("关键字符向量:");
            for (int i = 0; i < 30; i  )
            {
                richTextBox1.AppendText(a1[i].ToString());
                richTextBox1.AppendText(",");
            }
            richTextBox2.AppendText("\r\n");
            richTextBox2.AppendText("关键字符向量:");
            for (int i = 0; i < 30; i  )
            {
                richTextBox2.AppendText(a2[i].ToString());
                richTextBox2.AppendText(",");
            }
            int g = 0;//公共串的长度
            for (int i = 0; i < 30; i  )
            {

                g  = b[i] * (Min(a1[i], a2[i]));    //公共串的长度
            }
            double  N = n1   n2;
            double  M = 2 * g;
            sim = M / N*100;
            richTextBox1.AppendText("\r\n");
            richTextBox1.AppendText(M.ToString("F2"));
            richTextBox1.AppendText("\r\n");
            richTextBox1.AppendText(N.ToString("F2"));
            richTextBox1.AppendText("\r\n");
            richTextBox1.AppendText(sim.ToString("F2"));
            return sim;
        }
         

        private int Min(int a, int b)//最小值
        {
            if (a > b) return b;
            else return a;
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="a"></param>
        /// <param name="b"></param>
        /// <returns></returns>
        private int JD(int a, int b)//绝对值
        {
            if (a > b) return a - b;
            else return b - a;
        }

        /// <summary>
        /// 
        /// </summary>
        /// <param name="a"></param>
        /// <param name="b"></param>
        /// <returns></returns>
        private double Main1(int a, int b)    //关键字符相似度
        {
            int x = JD(a, b);
            int y = (a   b) / 2;
            if (y != 0)
            {
                int z = x / y;
                return 1 - z;
            }
            else
            {
                return 1;
            }
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="S1"></param>
        /// <param name="S2"></param>
        /// <returns></returns>
        private int FindBF(string S1, string S2)   // BF匹配算法
        {
            int i = 0, j = 0;
            char[] s1 = S1.ToCharArray();
            char[] s2 = S2.ToCharArray();
            while (i < S1.Length && j < S2.Length)
            {
                if (s1[i] == s2[j]) { i  ; j  ; }
                else { i = i - j   1; j = 0; }
            }
            if (j >= S2.Length)
                return (i - j);
            else return -1;
        }
        /// <summary>
        /// 
        /// </summary>
        /// <param name="str"></param>
        /// <param name="substr"></param>
        /// <returns></returns>
        private int CountBF(string str, string substr)    // str中substr的个数
        {
            int cnt = 0;
            for (int i = 0; i < str.Length; i  )
            {
                if ((i   substr.Length) < str.Length)
                {
                    string tmpstr = str.Substring(i, substr.Length);
                    if (tmpstr == substr) cnt  ;
                }
            }
            return cnt;
        }
    }
}