基本信息
源码名称:C#实现Apriori算法 实例源码下载
源码大小:3.84M
文件格式:.zip
开发语言:C#
更新时间:2017-04-02
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):78630559
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Collections;
using System.IO;
using System.Threading;
namespace Apriori
{
//关联规则的右半部分,例如 1^2^7 ==> 3^5 [12, 0.50],即箭头右边的部分
public class RuleRightPart
{
public string items;
public int support;
public double confidence;
public RuleRightPart()
{ }
public RuleRightPart(string str, int i, double d)
{
this.items = str;
this.support = i;
this.confidence = d;
}
}
public class Apriori
{
static void Main(string[] args)
{
Apriori apriori = new Apriori(200, 0.05, 0.5);
apriori.GetRules();
}
public Apriori()
{ }
public Apriori(int _maxId, double _support, double _confidence)
{
this.maxId = _maxId;
this.support = _support;
this.confidence = _confidence;
}
public static string sDataFile = @"D:\Projects\Apriori\Data\test.txt";//保存事务集的文件路径
private int maxId; //事务的最大编号
private double support; //支持度
private double confidence; //置信度
private List<DataItem> AllTransactions = new List<DataItem>(); //所有事务
private List<bool> NeedReview = new List<bool>(); //判断某事务是否需要遍历
/// <summary>
/// 从文件中加载所有事务
/// </summary>
private void LoadAllItems()
{
string sLine;
string[] strArray;
char[] separator = { '\t' };
int[] aPrev = new int[maxId 1];
int[] aCurr = new int[maxId 1];
try
{
StreamReader sr = new StreamReader(sDataFile, Encoding.Default, false);
sLine = sr.ReadLine();
while (sLine != null)
{
strArray = sLine.Split(separator, System.StringSplitOptions.RemoveEmptyEntries);
//利用数组去重和排序
foreach (string str in strArray)
{
int ix = int.Parse(str);
if ((ix <= maxId) && (aPrev[ix] == aCurr[ix]))
aCurr[ix];
}
List<int> l = new List<int>();
for (int ix = 0; ix <= maxId; ix)
if (aCurr[ix] == 1 aPrev[ix])
{
l.Add(ix);
aPrev[ix] = aCurr[ix];
}
DataItem item = new DataItem(l);
AllTransactions.Add(item);
NeedReview.Add(true);
sLine = sr.ReadLine();
}
sr.Close();
}
catch (IOException e)
{
Console.WriteLine(e.ToString());
}
}
public Dictionary<string, int> ItemsSupport = new Dictionary<string, int>();//频繁项集与支持度
/// <summary>
/// 生成单项的频繁项集合
/// </summary>
/// <returns></returns>
private List<DataItem> GenerateLevel1Set()
{
List<DataItem> setLevel1 = new List<DataItem>();
int[] itemFreq = new int[maxId 1];
for (int ix = 0; ix <= maxId; ix)
itemFreq[ix] = 0;
foreach (DataItem item in AllTransactions)
foreach (int ix in item.items)
itemFreq[ix];
for (int ix = 0; ix <= maxId; ix)
if (itemFreq[ix] >= support * AllTransactions.Count)
{
List<int> l = new List<int>();
l.Add(ix);
DataItem item = new DataItem(l);
setLevel1.Add(item);
ItemsSupport.Add(ix.ToString(), itemFreq[ix]);
}
return setLevel1;
}
private List<DataItem> HighLevelFreqItems = new List<DataItem>();//高层频繁项集-用于生成关联规则
/// <summary>
/// 由低层的频繁项集生成高层的频繁项集
/// </summary>
/// <param name="setLowLevel">低层的频繁项集</param>
/// <returns>高层的频繁项集</returns>
private List<DataItem> GenerateHighLevelSet(List<DataItem> setLowLevel)
{
List<DataItem> setHighLevel = new List<DataItem>();
List<DataItem> setTmp = new List<DataItem>(); //候选的高层项集
Dictionary<string, int> dic = new Dictionary<string, int>(); //候选的高层项集与它们的支持度
//低层的频繁项集组合生成的候选的高层项集
for (int ix = 0; ix < setLowLevel.Count; ix)
{
for (int iy = ix 1; iy < setLowLevel.Count; iy)
{
DataItem itemComb = new DataItem();
if (setLowLevel[ix].Combine(setLowLevel[iy], ref itemComb))
{
//剪枝 - 判断所有低一层的子集是否频繁项集
List<DataItem> subSets = new List<DataItem>();
if (itemComb.GetLowSubSets(ref subSets))
{
int ik=0;
for (; ik<subSets.Count; ik)
if (!ItemsSupport.ContainsKey(subSets[ik].ToString()))
break;
if (ik == subSets.Count)
{
setTmp.Add(itemComb);
dic.Add(itemComb.ToString(), 0);
}
}
}
}
}
if (setTmp.Count == 0)
return setHighLevel;
//计算候选的高层项集的支持度
for (int ix = 0; ix < AllTransactions.Count; ix)
{
bool flag = false;
if (NeedReview[ix])
{
foreach (DataItem item in setTmp)
{
if (AllTransactions[ix].Contains(item))
{
dic[item.ToString()];
flag = true;
}
}
}
NeedReview[ix] = flag;
}
foreach (string str in dic.Keys)
{
if (dic[str] >= support * AllTransactions.Count)
{
DataItem item = new DataItem(str);
setHighLevel.Add(item);
HighLevelFreqItems.Add(item);
ItemsSupport.Add(str, dic[str]);
}
}
return setHighLevel;
}
//关联规则
public Dictionary<string, List<RuleRightPart>> AssociationRules = new Dictionary<string, List<RuleRightPart>>();
/// <summary>
/// 根据高层频繁项集生成关联规则
/// </summary>
private void GenerateAssociationRules()
{
if (HighLevelFreqItems.Count == 0)
return;
List<DataItem> subSet1 = new List<DataItem>();
List<DataItem> subSet2 = new List<DataItem>();
foreach (DataItem item in HighLevelFreqItems)
{
if (item.GetAllSubSets(ref subSet1, ref subSet2))
{
int nSize = subSet1.Count;
for (int ix = 0; ix < nSize; ix)
{
string sLeft = subSet1[ix].ToString();
int nSupportLeft = ItemsSupport[sLeft];
int nSupportAll = ItemsSupport[item.ToString()];
double dConfidence = (double)nSupportAll / (double)nSupportLeft;
if (dConfidence >= confidence)
{
RuleRightPart rp = new RuleRightPart(subSet2[ix].ToString(), nSupportAll, dConfidence);
if (AssociationRules.ContainsKey(sLeft))
AssociationRules[sLeft].Add(rp);
else
{
List<RuleRightPart> l = new List<RuleRightPart>();
l.Add(rp);
AssociationRules.Add(sLeft, l);
}
}
}
}
}
}
/// <summary>
/// 输出所有的关联规则
/// </summary>
private void PrintAssociationRule()
{
if (AssociationRules.Count == 0)
return;
Console.WriteLine("关联规则:");
foreach(string str in AssociationRules.Keys)
{
foreach (RuleRightPart rp in AssociationRules[str])
{
Console.Write(str " ==> " rp.items " [");
Console.Write(rp.support);
Console.Write(", ");
Console.Write(rp.confidence);
Console.Write("]\n");
}
}
}
/// <summary>
/// 生成频繁项集
/// </summary>
public void GetFreqItems()
{
//GenerateTestFile();
LoadAllItems();
List<DataItem> setLevel = GenerateLevel1Set();
Console.WriteLine("单项的频繁项集合:");
DataItem.PrintListItems(setLevel);
Console.Write('\n');
int ix = 2;
List<DataItem> setHighLevel = new List<DataItem>();
while (setLevel.Count != 0)
{
setHighLevel = GenerateHighLevelSet(setLevel);
if (setHighLevel.Count != 0)
{
Console.WriteLine(ix.ToString() "-项的频繁项集合:");
DataItem.PrintListItems(setHighLevel);
Console.Write('\n');
}
ix;
setLevel.Clear();
setLevel = setHighLevel;
}
}
/// <summary>
/// 生成关联规则
/// </summary>
public void GetRules()
{
GetFreqItems();
GenerateAssociationRules();
PrintAssociationRule();
}
/// <summary>
/// 随机生成初始的事务文件
/// </summary>
private void GenerateTestFile()
{
int nRow = 100; //行数
int nColumn = 20; //每行最多包含的项数
try
{
StreamWriter sw = new StreamWriter(sDataFile, false, Encoding.Default);
for (int ix = 0; ix < nRow; ix)
{
Random r = new Random();
int nSize = r.Next(nColumn-5, nColumn 5);
for (int iy = 0; iy < nSize; iy)
{
int nPaperId = r.Next(1, maxId 1);
sw.Write(nPaperId);
sw.Write('\t');
}
sw.Write('\n');
//如果计算机运行速度很快,并且触发Randm函数间隔时间很短,就有可能产生一样的随机数
Thread.Sleep(20);
}
sw.Close();
}
catch (IOException e)
{
Console.WriteLine(e.ToString());
}
}
}
}