You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.3 KiB
C#

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.IO;
using System.Runtime.Serialization.Formatters.Binary;
using System.Security.Cryptography;
namespace DataStructures.BloomFilterSpace
{
[Serializable]
public abstract class BloomFilter<T>
{
private readonly BitArray bits;
private readonly int bitsPerElement;
private readonly int numberOfHashFunctions;
private readonly string[] algorithmNames =
{
"SHA",
"MD5",
"SHA1",
"SHA256",
"SHA384",
};
public int BitsPerElement
{
get { return bitsPerElement; }
}
public int Count { get; private set; }
public int NumberOfHashes
{
get { return algorithmNames.Length; }
}
public double FalsePositiveProbability
{
// (1 - e^(-k * n / m)) ^ k
get
{
return Math.Pow((1 - Math.Exp(-NumberOfHashes * (double)Count
/ (double)bits.Length)), NumberOfHashes);
}
}
public abstract object SyncRoot { get; }
[ContractInvariantMethod]
private void ObjectInvariant()
{
Contract.Invariant(Count >= 0);
}
public BloomFilter(int bitsPerElement, int numberOfElements)
{
Contract.Requires<ArgumentOutOfRangeException>(bitsPerElement > 0);
Contract.Requires<ArgumentOutOfRangeException>(numberOfElements > 0);
this.bitsPerElement = bitsPerElement;
bits = new BitArray(bitsPerElement * numberOfElements);
Count = 0;
}
private byte[] ToBytes(T element)
{
Contract.Requires<ArgumentNullException>(element != null);
BinaryFormatter formatter = new BinaryFormatter();
MemoryStream memoryStream = new MemoryStream();
formatter.Serialize(memoryStream, element);
return memoryStream.ToArray();
}
private int[] CreateHashes(byte[] bytes)
{
int[] result = new int[numberOfHashFunctions];
int k = 0;
foreach (string algoName in algorithmNames)
{
byte[] digest;
using (HashAlgorithm algorithm = HashAlgorithm.Create(algoName))
{
digest = algorithm.ComputeHash(bytes);
}
for (int i = 0; i < digest.Length / 4 && k < numberOfHashFunctions; i++)
{
int h = 0;
for (int j = (i * 4); j < (i * 4) + 4; j++)
{
h <<= 8;
h |= ((int)digest[j]) & 0xFF;
}
result[k] = h;
k++;
}
}
return result;
}
public void Add(T element)
{
Contract.Requires<ArgumentNullException>(element != null);
int[] hashes = CreateHashes(ToBytes(element));
foreach (int hash in hashes)
{
bits.Set((int)Math.Abs(hash % bits.Length), true);
}
Count++;
}
public void AddRange(IEnumerable<T> elements)
{
Contract.Requires<ArgumentNullException>(elements != null);
foreach (var element in elements)
{
Add(element);
}
}
private bool Contains(byte[] bytes)
{
Contract.Requires<ArgumentNullException>(bytes != null);
Contract.Requires<ArgumentException>(bytes.Length > 0);
int[] hashes = CreateHashes(bytes);
foreach (int hash in hashes)
{
if (!bits.Get((int)Math.Abs(hash % bits.Length)))
{
return false;
}
}
return true;
}
[Pure]
public bool Contains(T element)
{
Contract.Requires<ArgumentNullException>(element != null);
return Contains(ToBytes(element));
}
public void Clear()
{
bits.SetAll(false);
Count = 0;
}
}
}