Proof of Concept
Note: This PoC uses internal HNSW APIs that are exercised by Redis modules and may require building against the same Redis source tree. It demonstrates a concurrent insert scenario to illustrate the race, not a remote-exploit chain.
Prerequisites:
- A Redis build with the HNSW-based vector/search path enabled (as in 8.x, with vset/w2v usage).
- A multi-threaded client or test harness that can run two threads performing HNSW inserts against the same index concurrently.
- Shared HNSW index object and enough vectors to insert.
PoC outline (C-like pseudocode):
// Thread function for concurrent CAS-style insert
void *race_insert(void *arg) {
struct thread_params *p = (struct thread_params*)arg;
HNSW *index = p->index;
float vec[DIM] = { /* fill with p->vec */ };
uint64_t id = p->id; // unique per thread
const void *value = p->value; // thread-local value pointer
// Prepare insertion without acquiring global lock
InsertContext *ic = hnsw_prepare_insert(index, vec, NULL, 0, id, /* ef */ p->ef);
// Attempt CAS commit with its own value
hnswNode *node = hnsw_try_commit_insert(index, ic, (void*)value);
if (node == NULL) {
// CAS failed, fall back to full-lock path
hnsw_insert(index, vec, NULL, 0, id, (void*)value, p->ef);
}
return NULL;
}
// Setup and run two threads racing on two different items
// Thread A: id=1, vecA, valueA
// Thread B: id=2, vecB, valueB
// Spawn threads nearly simultaneously and wait for completion
Expected outcome (vulnerability):
- In the pre-fix code path, due to the value being assigned during the nolock prepare phase, a race could allow a thread to observe/modify node->value inconsistently between prepare and commit when two inserts happen concurrently.
- With the fix, value is associated with the node only during the commit path, reducing or eliminating the window where concurrent CAS attempts can race on node->value.
Note: In practice you would instrument after running both threads to verify that each inserted node carries its own value pointer (valueA/valueB) and that no data races can overwrite the intended value. If you can observe two distinct nodes with swapped or corrupted values under heavy concurrency in older builds, this would demonstrate the race that the patch fixes.
Code Diff
diff --git a/hnsw.c b/hnsw.c
index e0ea97cd508..a9a2695ad43 100644
--- a/hnsw.c
+++ b/hnsw.c
@@ -375,7 +375,7 @@ void hnsw_normalize_vector(float *x, float *l2ptr, uint32_t dim) {
}
/* Helper function to generate random level. */
-uint32_t random_level() {
+uint32_t random_level(void) {
static const int threshold = HNSW_P * RAND_MAX;
uint32_t level = 0;
@@ -1656,7 +1656,7 @@ struct InsertContext {
* See hnsw_node_new() for information about 'vector' and 'qvector'
* arguments, and which one to pass. */
InsertContext *hnsw_prepare_insert_nolock(HNSW *index, const float *vector,
- const int8_t *qvector, float qrange, uint64_t id, void *value,
+ const int8_t *qvector, float qrange, uint64_t id,
int slot, int ef)
{
InsertContext *ctx = hmalloc(sizeof(*ctx));
@@ -1673,7 +1673,6 @@ InsertContext *hnsw_prepare_insert_nolock(HNSW *index, const float *vector,
hfree(ctx);
return NULL;
}
- ctx->node->value = value;
hnswNode *curr_ep = index->enter_point;
@@ -1707,12 +1706,12 @@ InsertContext *hnsw_prepare_insert_nolock(HNSW *index, const float *vector,
/* External API for hnsw_prepare_insert_nolock(), handling locking. */
InsertContext *hnsw_prepare_insert(HNSW *index, const float *vector,
- const int8_t *qvector, float qrange, uint64_t id, void *value,
+ const int8_t *qvector, float qrange, uint64_t id,
int ef)
{
InsertContext *ctx;
int slot = hnsw_acquire_read_slot(index);
- ctx = hnsw_prepare_insert_nolock(index,vector,qvector,qrange,id,value,slot,ef);
+ ctx = hnsw_prepare_insert_nolock(index,vector,qvector,qrange,id,slot,ef);
hnsw_release_read_slot(index,slot);
return ctx;
}
@@ -1738,8 +1737,9 @@ void hnsw_free_insert_context(InsertContext *ctx) {
* just inserted node. Out of memory is not possible since no critical
* allocation is never performed in this code path: we populate links
* on already allocated nodes. */
-hnswNode *hnsw_commit_insert_nolock(HNSW *index, InsertContext *ctx) {
+hnswNode *hnsw_commit_insert_nolock(HNSW *index, InsertContext *ctx, void *value) {
hnswNode *node = ctx->node;
+ node->value = value;
/* Handle first node case. */
if (index->enter_point == NULL) {
@@ -1798,7 +1798,7 @@ hnswNode *hnsw_commit_insert_nolock(HNSW *index, InsertContext *ctx) {
* index and return its pointer. Otherwise NULL is returned and the operation
* should be either performed with the blocking API hnsw_insert() or attempted
* again. */
-hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx) {
+hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx, void *value) {
/* Check if the version changed since preparation. Note that we
* should access index->version under the write lock in order to
* be sure we can safely commit the write: this is just a fast-path
@@ -1824,7 +1824,7 @@ hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx) {
/* Commit the change: note that it's up to hnsw_commit_insert_nolock()
* to free the insertion context. */
- hnswNode *node = hnsw_commit_insert_nolock(index, ctx);
+ hnswNode *node = hnsw_commit_insert_nolock(index, ctx, value);
/* Release the write lock. */
pthread_rwlock_unlock(&index->global_lock);
@@ -1848,14 +1848,14 @@ hnswNode *hnsw_insert(HNSW *index, const float *vector, const int8_t *qvector, f
// Prepare the insertion - note we pass slot 0 since we're single threaded.
InsertContext *ctx = hnsw_prepare_insert_nolock(index, vector, qvector,
- qrange, id, value, 0, ef);
+ qrange, id, 0, ef);
if (!ctx) {
pthread_rwlock_unlock(&index->global_lock);
return NULL;
}
// Commit the prepared insertion without version checking.
- hnswNode *node = hnsw_commit_insert_nolock(index, ctx);
+ hnswNode *node = hnsw_commit_insert_nolock(index, ctx, value);
// Release write lock and return our node pointer.
pthread_rwlock_unlock(&index->global_lock);
diff --git a/hnsw.h b/hnsw.h
index deaa67126b8..877302e5067 100644
--- a/hnsw.h
+++ b/hnsw.h
@@ -144,8 +144,8 @@ int hnsw_acquire_read_slot(HNSW *index);
void hnsw_release_read_slot(HNSW *index, int slot);
/* Optimistic insertion API. */
-InsertContext *hnsw_prepare_insert(HNSW *index, const float *vector, const int8_t *qvector, float qrange, uint64_t id, void *value, int ef);
-hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx);
+InsertContext *hnsw_prepare_insert(HNSW *index, const float *vector, const int8_t *qvector, float qrange, uint64_t id, int ef);
+hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx, void *value);
void hnsw_free_insert_context(InsertContext *ctx);
/* Serialization. */
diff --git a/vset.c b/vset.c
index af1026ae982..1297e809475 100644
--- a/vset.c
+++ b/vset.c
@@ -339,7 +339,7 @@ float *parseVector(RedisModuleString **argv, int argc, int start_idx,
/* Now parse the vector format as before. */
float *vec = NULL;
- char *vec_format = RedisModule_StringPtrLen(argv[start_idx],NULL);
+ const char *vec_format = RedisModule_StringPtrLen(argv[start_idx],NULL);
if (!strcasecmp(vec_format,"FP32")) {
if (argc < start_idx + 2) return NULL; // Need FP32 + vector + value.
@@ -404,7 +404,7 @@ void *VADD_thread(void *arg) {
int ef = (uint64_t)targ[6];
/* Look for candidates... */
- InsertContext *ic = hnsw_prepare_insert(vset->hnsw, vec, NULL, 0, 0, NULL, ef);
+ InsertContext *ic = hnsw_prepare_insert(vset->hnsw, vec, NULL, 0, 0, ef);
targ[5] = ic; // Pass the context to the reply callback.
/* Unblock the client so that our read reply will be invoked. */
@@ -473,15 +473,13 @@ int VADD_CASReply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
* locking failure (likely impossible in practical terms). */
hnswNode *newnode;
if (ic == NULL ||
- (newnode = hnsw_try_commit_insert(vset->hnsw, ic)) == NULL)
+ (newnode = hnsw_try_commit_insert(vset->hnsw, ic, nv)) == NULL)
{
/* If we are here, the CAS insert failed. We need to insert
* again with full locking for neighbors selection and
* actual insertion. This time we can't fail: */
newnode = hnsw_insert(vset->hnsw, vec, NULL, 0, 0, nv, ef);
RedisModule_Assert(newnode != NULL);
- } else {
- newnode->value = nv;
}
RedisModule_DictSet(vset->dict,val,newnode);
val = NULL; // Don't free it later.
diff --git a/w2v.c b/w2v.c
index a8b21dcf489..8d7614d2e22 100644
--- a/w2v.c
+++ b/w2v.c
@@ -344,8 +344,8 @@ void *threaded_insert(void *ctxptr) {
// applies the check if the graph wasn't modified.
InsertContext *ic;
uint64_t next_id = ctx->id++;
- ic = hnsw_prepare_insert(ctx->index, v, NULL, 0, next_id, word, 200);
- if (hnsw_try_commit_insert(ctx->index, ic) == NULL) {
+ ic = hnsw_prepare_insert(ctx->index, v, NULL, 0, next_id, 200);
+ if (hnsw_try_commit_insert(ctx->index, ic, word) == NULL) {
// This time try locking since the start.
hnsw_insert(ctx->index, v, NULL, 0, next_id, word, 200);
}