diff --git a/src/misc.cpp b/src/misc.cpp
index 484d0b21..0bae9f1e 100644
--- a/src/misc.cpp
+++ b/src/misc.cpp
@@ -47,6 +47,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
 #include <sstream>
 #include <vector>
 
+#ifdef __linux__
+#include <stdlib.h>
+#include <sys/mman.h>
+#endif
+
 #include "misc.h"
 #include "thread.h"
 
@@ -190,7 +195,7 @@ const std::string compiler_info() {
      compiler += "(unknown version)";
   #endif
 
-  #if defined(__APPLE__) 
+  #if defined(__APPLE__)
      compiler += " on Apple";
   #elif defined(__CYGWIN__)
      compiler += " on Cygwin";
@@ -288,6 +293,35 @@ void prefetch(void* addr) {
 
 #endif
 
+
+/// aligned_ttmem_alloc will return suitably aligned memory, and if possible use large pages.
+/// The returned pointer is the aligned one, while the mem argument is the one that needs to be passed to free.
+/// With c++17 some of this functionality can be simplified.
+#ifdef __linux__
+
+void* aligned_ttmem_alloc(size_t allocSize, void** mem) {
+
+  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
+  size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
+  *mem = aligned_alloc(alignment, size);
+  madvise(*mem, allocSize, MADV_HUGEPAGE);
+  return *mem;
+}
+
+#else
+
+void* aligned_ttmem_alloc(size_t allocSize, void** mem) {
+
+  constexpr size_t alignment = 64; // assumed cache line size
+  size_t size = allocSize + alignment - 1; // allocate some extra space
+  *mem = malloc(size);
+  void* ret = reinterpret_cast<void*>((uintptr_t(*mem) + alignment - 1) & ~uintptr_t(alignment - 1));
+  return ret;
+}
+
+#endif
+
+
 namespace WinProcGroup {
 
 #ifndef _WIN32
diff --git a/src/misc.h b/src/misc.h
index b11c5aa8..45d9951a 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -33,6 +33,7 @@ const std::string engine_info(bool to_uci = false);
 const std::string compiler_info();
 void prefetch(void* addr);
 void start_logger(const std::string& fname);
+void* aligned_ttmem_alloc(size_t size, void** mem);
 
 void dbg_hit_on(bool b);
 void dbg_hit_on(bool c, bool b);
diff --git a/src/tt.cpp b/src/tt.cpp
index 0b4a59de..080d3a6b 100644
--- a/src/tt.cpp
+++ b/src/tt.cpp
@@ -63,11 +63,10 @@ void TranspositionTable::resize(size_t mbSize) {
 
   Threads.main()->wait_for_search_finished();
 
-  clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
-
   free(mem);
-  mem = malloc(clusterCount * sizeof(Cluster) + CacheLineSize - 1);
 
+  clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
+  table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), &mem));
   if (!mem)
   {
       std::cerr << "Failed to allocate " << mbSize
@@ -75,7 +74,6 @@ void TranspositionTable::resize(size_t mbSize) {
       exit(EXIT_FAILURE);
   }
 
-  table = (Cluster*)((uintptr_t(mem) + CacheLineSize - 1) & ~(CacheLineSize - 1));
   clear();
 }
 
diff --git a/src/tt.h b/src/tt.h
index 98b054d3..142afd90 100644
--- a/src/tt.h
+++ b/src/tt.h
@@ -57,24 +57,22 @@ private:
 };
 
 
-/// A TranspositionTable consists of a power of 2 number of clusters and each
-/// cluster consists of ClusterSize number of TTEntry. Each non-empty entry
-/// contains information of exactly one position. The size of a cluster should
-/// divide the size of a cache line size, to ensure that clusters never cross
-/// cache lines. This ensures best cache performance, as the cacheline is
-/// prefetched, as soon as possible.
+/// A TranspositionTable is an array of Cluster, of size clusterCount. Each
+/// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
+/// contains information on exactly one position. The size of a Cluster should
+/// divide the size of a cache line for best performance,
+/// as the cacheline is prefetched when possible.
 
 class TranspositionTable {
 
-  static constexpr int CacheLineSize = 64;
   static constexpr int ClusterSize = 3;
 
   struct Cluster {
     TTEntry entry[ClusterSize];
-    char padding[2]; // Align to a divisor of the cache line size
+    char padding[2]; // Pad to 32 bytes
   };
 
-  static_assert(CacheLineSize % sizeof(Cluster) == 0, "Cluster size incorrect");
+  static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
 
 public:
  ~TranspositionTable() { free(mem); }