1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
| using System;
using System.Collections.Generic;
using System.Linq;
using System.Runtime.Serialization;
namespace Beise
{
[DataContract]
public class SaveInfo
{
[DataMember]
public IDictionary<String, Int32> Spam { get; set; }
[DataMember]
public IDictionary<String, Int32> NotSpam { get; set; }
[DataMember]
public Int32 SpamCount { get; set; }
[DataMember]
public Int32 NotSpamCount { get; set; }
}
public class Core
{
private Int32 _spamDocCount;
private Int32 _nSpamDocCount;
readonly IDictionary<String, Int32> _spam = new Dictionary<String, Int32>();
readonly IDictionary<String, Int32> _nSpam = new Dictionary<String, Int32>();
public Core() { }
public Core(SaveInfo saveInfo)
{
_spam = saveInfo.Spam;
_nSpam = saveInfo.NotSpam;
_nSpamDocCount = saveInfo.NotSpamCount;
_spamDocCount = saveInfo.SpamCount;
}
public Core(IEnumerable<String> spamDoc, IEnumerable<String> nspamDoc)
{
foreach (var s in spamDoc)
{
InitSpam(s);
}
foreach (var s in nspamDoc)
{
InitNSpam(s);
}
}
/// <summary>
/// Заполняем базу знаний о спаме
/// </summary>
/// <param name="s"></param>
public void InitSpam(String s)
{
var strings = s.ToLower().Split('\r', '\n', '\t', ' ', ',', '.', ':', ';', ')', '(');
foreach (var str in strings.Where(x => !String.IsNullOrWhiteSpace(x)))
{
if (_spam.ContainsKey(str))
_spam[str]++;
else
{
_spam.Add(str, 1);
}
}
_spamDocCount++;
}
/// <summary>
/// заполняем базу о не спаме
/// </summary>
/// <param name="s"></param>
public void InitNSpam(String s)
{
var strings = s.ToLower().Split('\r', '\n', '\t', ' ', ',', '.', ':', ';', ')', '(');
foreach (var str in strings.Where(x => !String.IsNullOrWhiteSpace(x)))
{
if (_nSpam.ContainsKey(str))
_nSpam[str]++;
else
{
_nSpam.Add(str, 1);
}
}
_nSpamDocCount++;
}
/// <summary>
/// получить оценку на вероятность пренадлежности сообщений к группе спам
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
public Double TestToSpam(String s)
{
var rate = Math.Log(_spamDocCount / (float)(_spamDocCount + _nSpamDocCount));
var strings = s.ToLower().Split('\r', '\n', '\t', ' ', ',', '.', ':', ';', ')', '(');
foreach (var str in strings.Where(x => !String.IsNullOrWhiteSpace(x)))
{
float count = 1;
float lc = _spam.Count;//_spam.Sum(x => x.Value);//не втупил возможно достаточно просто _spam.Count
float v = (_spam.Count + _nSpam.Count);
if (_spam.ContainsKey(str))
count += _spam[str];
rate += Math.Log(count / (v + lc));
}
return rate;
}
/// <summary>
/// получить оценку на вероятность пренадлежности сообщений к группе НЕ спам
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
public Double TestToNotSpam(String s)
{
var rate = Math.Log(_nSpamDocCount / (float)(_spamDocCount + _nSpamDocCount));
var strings = s.ToLower().Split('\r', '\n', '\t', ' ', ',', '.', ':', ';', ')', '(');
foreach (var str in strings.Where(x => !String.IsNullOrWhiteSpace(x)))
{
float count = 1;
float lc = _nSpam.Count;//_nSpam.Sum(x => x.Value);//не втупил возможно достаточно просто _spam.Count
float v = (_spam.Count + _nSpam.Count);
if (_nSpam.ContainsKey(str))
count += _nSpam[str];
rate += Math.Log(count / (v + lc));
}
return rate;
}
public String Test(String s)
{
var nSpam = TestToNotSpam(s);
var spam = TestToSpam(s);
var spamProc = Math.Exp(spam) / (Math.Exp(spam) + Math.Exp(nSpam));
var nspamProc = Math.Exp(nSpam) / (Math.Exp(spam) + Math.Exp(nSpam));
return String.Format("Вероятность что это спам = {0}%;\r\nВероятность что это не спам = {1}%;", spamProc, nspamProc);
}
public SaveInfo GetSaveObj()
{
return new SaveInfo
{
Spam = _spam,
NotSpam = _nSpam,
NotSpamCount = _nSpamDocCount,
SpamCount = _spamDocCount
};
}
}
} |