基本信息
源码名称:C#实现Apriori算法 实例源码下载
源码大小:3.84M
文件格式:.zip
开发语言:C#
更新时间:2017-04-02
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):78630559

本次赞助数额为: 2 元 
   源码介绍

using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Collections;
using System.IO;
using System.Threading;

namespace Apriori
{
    //关联规则的右半部分,例如 1^2^7 ==> 3^5 [12, 0.50],即箭头右边的部分
    public class RuleRightPart
    {
        public string items;
        public int support;
        public double confidence;

        public RuleRightPart()
        { }

        public RuleRightPart(string str, int i, double d)
        {
            this.items = str;
            this.support = i;
            this.confidence = d;
        }
    }

    public class Apriori
    {
        static void Main(string[] args)
        {
            Apriori apriori = new Apriori(200, 0.05, 0.5);
            apriori.GetRules();
        }

        public Apriori()
        { }

        public Apriori(int _maxId, double _support, double _confidence)
        {
            this.maxId = _maxId;
            this.support = _support;
            this.confidence = _confidence;
        }

        public static string sDataFile = @"D:\Projects\Apriori\Data\test.txt";//保存事务集的文件路径

        private int maxId;           //事务的最大编号
        private double support;      //支持度
        private double confidence;   //置信度
        
        private List<DataItem> AllTransactions = new List<DataItem>();  //所有事务
        private List<bool> NeedReview = new List<bool>();               //判断某事务是否需要遍历

        /// <summary>
        /// 从文件中加载所有事务
        /// </summary>
        private void LoadAllItems()
        {
            string sLine;
            string[] strArray;
            char[] separator = { '\t' };

            int[] aPrev = new int[maxId   1];
            int[] aCurr = new int[maxId   1];

            try
            {
                StreamReader sr = new StreamReader(sDataFile, Encoding.Default, false);

                sLine = sr.ReadLine();
                while (sLine != null)
                {
                    strArray = sLine.Split(separator, System.StringSplitOptions.RemoveEmptyEntries);

                    //利用数组去重和排序                    
                    foreach (string str in strArray)
                    {
                        int ix = int.Parse(str);
                        if ((ix <= maxId) && (aPrev[ix] == aCurr[ix]))
                              aCurr[ix];
                    }

                    List<int> l = new List<int>();
                    for (int ix = 0; ix <= maxId;   ix)
                        if (aCurr[ix] == 1   aPrev[ix])
                        {
                            l.Add(ix);
                            aPrev[ix] = aCurr[ix];
                        }

                    DataItem item = new DataItem(l);
                    AllTransactions.Add(item);
                    NeedReview.Add(true);

                    sLine = sr.ReadLine();
                }

                sr.Close();
            }
            catch (IOException e)
            {
                Console.WriteLine(e.ToString());
            }
        }

        public Dictionary<string, int> ItemsSupport = new Dictionary<string, int>();//频繁项集与支持度

        /// <summary>
        /// 生成单项的频繁项集合
        /// </summary>
        /// <returns></returns>
        private List<DataItem> GenerateLevel1Set()
        {
            List<DataItem> setLevel1 = new List<DataItem>();

            int[] itemFreq = new int[maxId 1];
            for (int ix = 0; ix <= maxId;   ix)
                itemFreq[ix] = 0;

            foreach (DataItem item in AllTransactions)
                foreach (int ix in item.items)
                      itemFreq[ix];

            for (int ix = 0; ix <= maxId;   ix)
                if (itemFreq[ix] >= support * AllTransactions.Count)
                {
                    List<int> l = new List<int>();
                    l.Add(ix);
                    DataItem item = new DataItem(l);
                    setLevel1.Add(item);

                    ItemsSupport.Add(ix.ToString(), itemFreq[ix]);
                }

            return setLevel1;
        }

        private List<DataItem> HighLevelFreqItems = new List<DataItem>();//高层频繁项集-用于生成关联规则

        /// <summary>
        /// 由低层的频繁项集生成高层的频繁项集
        /// </summary>
        /// <param name="setLowLevel">低层的频繁项集</param>
        /// <returns>高层的频繁项集</returns>
        private List<DataItem> GenerateHighLevelSet(List<DataItem> setLowLevel)
        {
            List<DataItem> setHighLevel = new List<DataItem>();

            List<DataItem> setTmp = new List<DataItem>();                   //候选的高层项集
            Dictionary<string, int> dic = new Dictionary<string, int>();    //候选的高层项集与它们的支持度

            //低层的频繁项集组合生成的候选的高层项集
            for (int ix = 0; ix < setLowLevel.Count;   ix)
            {
                for (int iy = ix   1; iy < setLowLevel.Count;   iy)
                {
                    DataItem itemComb = new DataItem();
                    if (setLowLevel[ix].Combine(setLowLevel[iy], ref itemComb))
                    {
                        //剪枝 - 判断所有低一层的子集是否频繁项集
                        List<DataItem> subSets = new List<DataItem>();
                        if (itemComb.GetLowSubSets(ref subSets))
                        {
                            int ik=0;
                            for (; ik<subSets.Count;   ik)
                                if (!ItemsSupport.ContainsKey(subSets[ik].ToString()))
                                    break;

                            if (ik == subSets.Count)
                            {
                                setTmp.Add(itemComb);
                                dic.Add(itemComb.ToString(), 0);
                            }
                        }
                    }
                }
            }
            if (setTmp.Count == 0)
                return setHighLevel;

            //计算候选的高层项集的支持度
            for (int ix = 0; ix < AllTransactions.Count;   ix)
            {
                bool flag = false;
                if (NeedReview[ix])
                {
                    foreach (DataItem item in setTmp)
                    {
                        if (AllTransactions[ix].Contains(item))
                        {
                              dic[item.ToString()];
                            flag = true;
                        }
                    }
                }
                NeedReview[ix] = flag;
            }

            foreach (string str in dic.Keys)
            {
                if (dic[str] >= support * AllTransactions.Count)
                {
                    DataItem item = new DataItem(str);
                    setHighLevel.Add(item);
                    HighLevelFreqItems.Add(item);
                    ItemsSupport.Add(str, dic[str]);
                }
            }

            return setHighLevel;
        }

        //关联规则
        public Dictionary<string, List<RuleRightPart>> AssociationRules = new Dictionary<string, List<RuleRightPart>>();
        
        /// <summary>
        /// 根据高层频繁项集生成关联规则
        /// </summary>
        private void GenerateAssociationRules()
        {
            if (HighLevelFreqItems.Count == 0)
                return;

            List<DataItem> subSet1 = new List<DataItem>();
            List<DataItem> subSet2 = new List<DataItem>();

            foreach (DataItem item in HighLevelFreqItems)
            {
                if (item.GetAllSubSets(ref subSet1, ref subSet2))
                {
                    int nSize = subSet1.Count;
                    for (int ix = 0; ix < nSize;   ix)
                    {
                        string sLeft = subSet1[ix].ToString();

                        int nSupportLeft = ItemsSupport[sLeft];
                        int nSupportAll = ItemsSupport[item.ToString()];
                        double dConfidence = (double)nSupportAll / (double)nSupportLeft;

                        if (dConfidence >= confidence)
                        {
                            RuleRightPart rp = new RuleRightPart(subSet2[ix].ToString(), nSupportAll, dConfidence);
                            if (AssociationRules.ContainsKey(sLeft))
                                AssociationRules[sLeft].Add(rp);
                            else
                            {
                                List<RuleRightPart> l = new List<RuleRightPart>();
                                l.Add(rp);
                                AssociationRules.Add(sLeft, l);
                            }
                        }
                    }
                }
            }
        }

        /// <summary>
        /// 输出所有的关联规则
        /// </summary>
        private void PrintAssociationRule()
        {
            if (AssociationRules.Count == 0)
                return;

            Console.WriteLine("关联规则:");
            foreach(string str in AssociationRules.Keys)
            {
                foreach (RuleRightPart rp in AssociationRules[str])
                {
                    Console.Write(str   " ==> "   rp.items   " [");
                    Console.Write(rp.support);
                    Console.Write(", ");
                    Console.Write(rp.confidence);
                    Console.Write("]\n");
                }
            }
        }

        /// <summary>
        /// 生成频繁项集
        /// </summary>
        public void GetFreqItems()
        {
            //GenerateTestFile();
            LoadAllItems();
            List<DataItem> setLevel = GenerateLevel1Set();
            Console.WriteLine("单项的频繁项集合:");
            DataItem.PrintListItems(setLevel);
            Console.Write('\n');

            int ix = 2;
            List<DataItem> setHighLevel = new List<DataItem>();
            while (setLevel.Count != 0)
            {
                setHighLevel = GenerateHighLevelSet(setLevel);
                if (setHighLevel.Count != 0)
                {
                    Console.WriteLine(ix.ToString()   "-项的频繁项集合:");
                    DataItem.PrintListItems(setHighLevel);
                    Console.Write('\n');
                }
                  ix;
                setLevel.Clear();
                setLevel = setHighLevel;
            }
        }

        /// <summary>
        /// 生成关联规则
        /// </summary>
        public void GetRules()
        {
            GetFreqItems();

            GenerateAssociationRules();
            PrintAssociationRule();
        }

        /// <summary>
        /// 随机生成初始的事务文件
        /// </summary>
        private void GenerateTestFile()
        {
            int nRow = 100;     //行数
            int nColumn = 20;   //每行最多包含的项数

            try
            {
                StreamWriter sw = new StreamWriter(sDataFile, false, Encoding.Default);

                for (int ix = 0; ix < nRow;   ix)
                {
                    Random r = new Random();
                    int nSize = r.Next(nColumn-5, nColumn 5);
                    for (int iy = 0; iy < nSize;   iy)
                    {
                        int nPaperId = r.Next(1, maxId   1);
                        sw.Write(nPaperId);
                        sw.Write('\t');
                    }
                    sw.Write('\n');

                    //如果计算机运行速度很快,并且触发Randm函数间隔时间很短,就有可能产生一样的随机数
                    Thread.Sleep(20);
                }

                sw.Close();
            }
            catch (IOException e)
            {
                Console.WriteLine(e.ToString());
            }
        }
    }
}