refactored hassing task

Signed-off-by: Trial97 <alexandru.tripon97@gmail.com>
This commit is contained in:
Trial97
2024-06-18 16:51:26 +03:00
parent 3f68b68691
commit 766ddc80e3
18 changed files with 178 additions and 225 deletions

View File

@ -8,14 +8,14 @@
#include "MurmurHash2.h"
//-----------------------------------------------------------------------------
namespace Murmur2 {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const uint32_t m = 0x5bd1e995;
const int r = 24;
uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out)
uint32_t hash(Reader* file_stream, std::size_t buffer_size, std::function<bool(char)> filter_out)
{
auto* buffer = new char[buffer_size];
char data[4];
@ -26,24 +26,21 @@ uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::
// We need the size without the filtered out characters before actually calculating the hash,
// to setup the initial value for the hash.
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
read = file_stream->read(buffer, buffer_size);
for (int i = 0; i < read; i++) {
if (!filter_out(buffer[i]))
size += 1;
}
} while (!file_stream.eof());
} while (!file_stream->eof());
file_stream.clear();
file_stream.seekg(0, file_stream.beg);
file_stream->goToBegining();
int index = 0;
// This forces a seed of 1.
IncrementalHashInfo info{ (uint32_t)1 ^ size, (uint32_t)size };
do {
file_stream.read(buffer, buffer_size);
read = file_stream.gcount();
read = file_stream->read(buffer, buffer_size);
for (int i = 0; i < read; i++) {
char c = buffer[i];
@ -57,14 +54,13 @@ uint32_t MurmurHash2(std::ifstream&& file_stream, std::size_t buffer_size, std::
if (index == 0)
FourBytes_MurmurHash2(reinterpret_cast<unsigned char*>(&data), info);
}
} while (!file_stream.eof());
} while (!file_stream->eof());
// Do one last bit shuffle in the hash
FourBytes_MurmurHash2(reinterpret_cast<unsigned char*>(&data), info);
delete[] buffer;
file_stream.close();
return info.h;
}
@ -109,4 +105,4 @@ void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev)
}
}
//-----------------------------------------------------------------------------
} // namespace Murmur2

View File

@ -9,17 +9,23 @@
#pragma once
#include <cstdint>
#include <fstream>
#include <functional>
//-----------------------------------------------------------------------------
namespace Murmur2 {
#define KiB 1024
#define MiB 1024 * KiB
uint32_t MurmurHash2(
std::ifstream&& file_stream,
class Reader {
public:
virtual ~Reader() = default;
virtual int read(char* s, int n) = 0;
virtual bool eof() = 0;
virtual void goToBegining() = 0;
};
uint32_t hash(
Reader* file_stream,
std::size_t buffer_size = 4 * MiB,
std::function<bool(char)> filter_out = [](char) { return false; });
@ -29,5 +35,4 @@ struct IncrementalHashInfo {
};
void FourBytes_MurmurHash2(const unsigned char* data, IncrementalHashInfo& prev);
//-----------------------------------------------------------------------------
} // namespace Murmur2