diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/expression/VecExpr.java b/marklogic-client-api/src/main/java/com/marklogic/client/expression/VecExpr.java index 7210bcc30..87753efb5 100644 --- a/marklogic-client-api/src/main/java/com/marklogic/client/expression/VecExpr.java +++ b/marklogic-client-api/src/main/java/com/marklogic/client/expression/VecExpr.java @@ -57,30 +57,26 @@ public interface VecExpr { */ public ServerExpression base64Encode(ServerExpression vector1); - /** - * Returns the cosine similarity between two vectors. The vectors must be of the same dimension. - * - * - * - *
- * Provides a client interface to the vec:cosine server function. - * - * @param vector1 The vector from which to calculate the cosine similarity with vector2. (of vec:vector) - * @param vector2 The vector from which to calculate the cosine similarity with vector1. (of vec:vector) - * @return a server expression with the xs:double server data type - * @since 7.2.0 - */ - public ServerExpression cosine(ServerExpression vector1, ServerExpression vector2); +/** + * Returns the cosine of the angle between two vectors. The vectors must be of the same dimension. + *
+ * Provides a client interface to the vec:cosine server function. + * @param vector1 The vector from which to calculate the cosine with vector2. (of vec:vector) + * @param vector2 The vector from which to calculate the cosine with vector1. (of vec:vector) + * @return a server expression with the xs:double server data type + * @since 7.2.0 + */ + public ServerExpression cosine(ServerExpression vector1, ServerExpression vector2); - /** - * Return the distance between two vectors. The vectors must be of the same dimension. - * - * @param vector1 The vector from which to calculate the cosine distance with vector2. (of vec:vector) - * @param vector2 The vector from which to calculate the cosine distance with vector1. (of vec:vector) - * @return a server expression with the xs:double server data type - * @since 7.2.0 - */ - public ServerExpression cosineDistance(ServerExpression vector1, ServerExpression vector2); +/** + * Returns the cosine distance between two vectors. The vectors must be of the same dimension. + * + * @param vector1 The vector from which to calculate the cosine distance with vector2. (of vec:vector) + * @param vector2 The vector from which to calculate the cosine distance with vector1. (of vec:vector) + * @return a server expression with the xs:double server data type + * @since 7.2.0 + */ + public ServerExpression cosineDistance(ServerExpression vector1, ServerExpression vector2); /** * Returns the dimension of the vector passed in. @@ -246,44 +242,63 @@ public interface VecExpr { */ public ServerExpression vector(ServerExpression values); /** - * A helper function that returns a hybrid score using a cts score and a vector similarity calculation result. You can tune the effect of the vector similarity on the score using the similarityWeight option. The ideal value for similarityWeight depends on your application. + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. *
* Provides a client interface to the vec:vector-score server function. * @param score The cts:score of the matching document. (of xs:unsignedInt) - * @param similarity The similarity between the vector in the matching document and the query vector. The result of a call to ovec:cosine(). In the case that the vectors are normalized, pass ovec:dot-product(). Note that vec:euclidean-distance() should not be used here. (of xs:double) + * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double) * @return a server expression with the xs:unsignedLong server data type */ - public ServerExpression vectorScore(ServerExpression score, double similarity); + public ServerExpression vectorScore(ServerExpression score, double distance); /** - * A helper function that returns a hybrid score using a cts score and a vector similarity calculation result. You can tune the effect of the vector similarity on the score using the similarityWeight option. The ideal value for similarityWeight depends on your application. - * - * - + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. + *
+ * Provides a client interface to the vec:vector-score server function. + * @param score The cts:score of the matching document. (of xs:unsignedInt) + * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double) + * @return a server expression with the xs:unsignedLong server data type + */ + public ServerExpression vectorScore(ServerExpression score, ServerExpression distance); +/** + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. + *
+ * Provides a client interface to the vec:vector-score server function. + * @param score The cts:score of the matching document. (of xs:unsignedInt) + * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double) + * @param distanceWeight The weight of the vector distance on the annScore. This value is a positive coefficient that scales the distance. A larger distanceWeight produces a lower annScore for the same distance. The default value is 1. (of xs:double) + * @return a server expression with the xs:unsignedLong server data type + */ + public ServerExpression vectorScore(ServerExpression score, double distance, double distanceWeight); +/** + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. *
* Provides a client interface to the vec:vector-score server function. * @param score The cts:score of the matching document. (of xs:unsignedInt) - * @param similarity The similarity between the vector in the matching document and the query vector. The result of a call to ovec:cosine(). In the case that the vectors are normalized, pass ovec:dot-product(). Note that vec:euclidean-distance() should not be used here. (of xs:double) + * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double) + * @param distanceWeight The weight of the vector distance on the annScore. This value is a positive coefficient that scales the distance. A larger distanceWeight produces a lower annScore for the same distance. The default value is 1. (of xs:double) * @return a server expression with the xs:unsignedLong server data type */ - public ServerExpression vectorScore(ServerExpression score, ServerExpression similarity); + public ServerExpression vectorScore(ServerExpression score, ServerExpression distance, ServerExpression distanceWeight); /** - * A helper function that returns a hybrid score using a cts score and a vector similarity calculation result. You can tune the effect of the vector similarity on the score using the similarityWeight option. The ideal value for similarityWeight depends on your application. + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. *
* Provides a client interface to the vec:vector-score server function. * @param score The cts:score of the matching document. (of xs:unsignedInt) - * @param similarity The similarity between the vector in the matching document and the query vector. The result of a call to ovec:cosine(). In the case that the vectors are normalized, pass ovec:dot-product(). Note that vec:euclidean-distance() should not be used here. (of xs:double) - * @param similarityWeight The weight of the vector similarity on the score. The default value is 0.1. If 0.0 is passed in, vector similarity has no effect. If passed a value less than 0.0 or greater than 1.0, throw VEC-VECTORSCORE. (of xs:double) + * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double) + * @param distanceWeight The weight of the vector distance on the annScore. This value is a positive coefficient that scales the distance. A larger distanceWeight produces a lower annScore for the same distance. The default value is 1. (of xs:double) + * @param weight The weight of the annScore in the final hybrid score. This value is a coefficient between 0 and 1, where 0 gives full weight to the cts score and 1 gives full weight to the annScore. The default value is 0.5. (of xs:double) * @return a server expression with the xs:unsignedLong server data type */ - public ServerExpression vectorScore(ServerExpression score, double similarity, double similarityWeight); + public ServerExpression vectorScore(ServerExpression score, double distance, double distanceWeight, double weight); /** - * A helper function that returns a hybrid score using a cts score and a vector similarity calculation result. You can tune the effect of the vector similarity on the score using the similarityWeight option. The ideal value for similarityWeight depends on your application. + * A helper function that returns a hybrid score using a cts score and a vector distance calculation result. You can tune the effect of the vector distance on the score using the distanceWeight option. The ideal value for distanceWeight depends on your application. The hybrid score is calculated using the formula: score = weight * annScore + (1 - weight) * ctsScore. - annScore is derived from the distance and distanceWeight, where a larger distanceWeight reduces the annScore for the same distance. - weight determines the contribution of the annScore and ctsScore to the final score. A weight of 0.5 balances both equally. This formula allows you to combine traditional cts scoring with vector-based distance scoring, providing a flexible way to rank results. *
* Provides a client interface to the vec:vector-score server function.
* @param score The cts:score of the matching document. (of xs:unsignedInt)
- * @param similarity The similarity between the vector in the matching document and the query vector. The result of a call to ovec:cosine(). In the case that the vectors are normalized, pass ovec:dot-product(). Note that vec:euclidean-distance() should not be used here. (of xs:double)
- * @param similarityWeight The weight of the vector similarity on the score. The default value is 0.1. If 0.0 is passed in, vector similarity has no effect. If passed a value less than 0.0 or greater than 1.0, throw VEC-VECTORSCORE. (of xs:double)
+ * @param distance The distance between the vector in the matching document and the query vector. Examples, the result of a call to ovec:cosine-distance() or ovec:euclidean-distance(). (of xs:double)
+ * @param distanceWeight The weight of the vector distance on the annScore. This value is a positive coefficient that scales the distance. A larger distanceWeight produces a lower annScore for the same distance. The default value is 1. (of xs:double)
+ * @param weight The weight of the annScore in the final hybrid score. This value is a coefficient between 0 and 1, where 0 gives full weight to the cts score and 1 gives full weight to the annScore. The default value is 0.5. (of xs:double)
* @return a server expression with the xs:unsignedLong server data type
*/
- public ServerExpression vectorScore(ServerExpression score, ServerExpression similarity, ServerExpression similarityWeight);
+ public ServerExpression vectorScore(ServerExpression score, ServerExpression distance, ServerExpression distanceWeight, ServerExpression weight);
}
diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/impl/VecExprImpl.java b/marklogic-client-api/src/main/java/com/marklogic/client/impl/VecExprImpl.java
index 5a8ac10d7..79e901872 100644
--- a/marklogic-client-api/src/main/java/com/marklogic/client/impl/VecExprImpl.java
+++ b/marklogic-client-api/src/main/java/com/marklogic/client/impl/VecExprImpl.java
@@ -148,26 +148,38 @@ public ServerExpression vector(ServerExpression values) {
@Override
- public ServerExpression vectorScore(ServerExpression score, double similarity) {
- return vectorScore(score, xs.doubleVal(similarity));
+ public ServerExpression vectorScore(ServerExpression score, double distance) {
+ return vectorScore(score, xs.doubleVal(distance));
}
@Override
- public ServerExpression vectorScore(ServerExpression score, ServerExpression similarity) {
- return new XsExprImpl.UnsignedLongCallImpl("vec", "vector-score", new Object[]{ score, similarity });
+ public ServerExpression vectorScore(ServerExpression score, ServerExpression distance) {
+ return new XsExprImpl.UnsignedLongCallImpl("vec", "vector-score", new Object[]{ score, distance });
}
@Override
- public ServerExpression vectorScore(ServerExpression score, double similarity, double similarityWeight) {
- return vectorScore(score, xs.doubleVal(similarity), xs.doubleVal(similarityWeight));
+ public ServerExpression vectorScore(ServerExpression score, double distance, double distanceWeight) {
+ return vectorScore(score, xs.doubleVal(distance), xs.doubleVal(distanceWeight));
}
@Override
- public ServerExpression vectorScore(ServerExpression score, ServerExpression similarity, ServerExpression similarityWeight) {
- return new XsExprImpl.UnsignedLongCallImpl("vec", "vector-score", new Object[]{ score, similarity, similarityWeight });
+ public ServerExpression vectorScore(ServerExpression score, ServerExpression distance, ServerExpression distanceWeight) {
+ return new XsExprImpl.UnsignedLongCallImpl("vec", "vector-score", new Object[]{ score, distance, distanceWeight });
+ }
+
+
+ @Override
+ public ServerExpression vectorScore(ServerExpression score, double distance, double distanceWeight, double weight) {
+ return vectorScore(score, xs.doubleVal(distance), xs.doubleVal(distanceWeight), xs.doubleVal(weight));
+ }
+
+
+ @Override
+ public ServerExpression vectorScore(ServerExpression score, ServerExpression distance, ServerExpression distanceWeight, ServerExpression weight) {
+ return new XsExprImpl.UnsignedLongCallImpl("vec", "vector-score", new Object[]{ score, distance, distanceWeight, weight });
}
static class VectorSeqCallImpl extends BaseTypeImpl.ServerExpressionCallImpl {
diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/VectorTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/VectorTest.java
index d41e4bfee..0f609ad04 100644
--- a/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/VectorTest.java
+++ b/marklogic-client-api/src/test/java/com/marklogic/client/test/rows/VectorTest.java
@@ -45,6 +45,7 @@ void vectorFunctionsHappyPath() {
.limit(1)
.bind(op.as("sampleVector", op.vec.vector(sampleVector)))
.bind(op.as("cosine", op.vec.cosine(op.col("embedding"), op.col("sampleVector"))))
+ .bind(op.as("cosineDistanceEmbedding", op.vec.cosineDistance(op.col("embedding"), op.col("sampleVector"))))
.bind(op.as("cosineDistance", op.vec.cosineDistance(op.col("sampleVector"), op.col("sampleVector"))))
.bind(op.as("dotProduct", op.vec.dotProduct(op.col("embedding"), op.col("sampleVector"))))
.bind(op.as("euclideanDistance", op.vec.euclideanDistance(op.col("embedding"), op.col("sampleVector"))))
@@ -67,9 +68,16 @@ void vectorFunctionsHappyPath() {
assertEquals(1, rows.size());
RowRecord row = rows.get(0);
- // Simple sanity checks to verify that the functions ran.
+ // Simple sanity checks to verify that the functions ran and produce reasonable values.
double cosine = row.getDouble("cosine");
- assertTrue((cosine > 0) && (cosine < 1), "Unexpected value: " + cosine);
+ assertTrue((cosine >= -1) && (cosine <= 1), "Cosine must be between -1 and 1, got: " + cosine);
+
+ double cosineDistanceEmbedding = row.getDouble("cosineDistanceEmbedding");
+ assertTrue(cosineDistanceEmbedding >= 0 && cosineDistanceEmbedding <= 2, "Cosine distance must be between 0 and 2, got: " + cosineDistanceEmbedding);
+
+ // this identity (cosine distance = 1 - cosine) should be true for doubles within a small delta, but we won't require exact equality due to inexact floating point math.
+ assertEquals(1 - cosine, cosineDistanceEmbedding, 0.0001, "Cosine distance should be 1 - cosine");
+
double dotProduct = row.getDouble("dotProduct");
Assertions.assertTrue(dotProduct > 0, "Unexpected value: " + dotProduct);
double euclideanDistance = row.getDouble("euclideanDistance");
@@ -254,4 +262,75 @@ void trunc() {
assertEquals(2.12, trunc2.get(1).asDouble(), 0.001, "Second element should be truncated to 2.12");
assertEquals(3.12, trunc2.get(2).asDouble(), 0.001, "Third element should be truncated to 3.12");
}
+
+ @Test
+ void vectorScoreWithWeight_primitive() {
+ // Test vec.vectorScore with all 4 parameters using primitive doubles
+ PlanBuilder.ModifyPlan plan = op.fromView("vectors", "persons")
+ .limit(1)
+ .bind(op.as("vectorScore1", op.vec.vectorScore(op.xs.unsignedInt(100), 0.3, 0.5, 0.5)))
+ .bind(op.as("vectorScore2", op.vec.vectorScore(op.xs.unsignedInt(100), 0.3, 0.8, 0.7)))
+ .bind(op.as("vectorScore3", op.vec.vectorScore(op.xs.unsignedInt(100), 0.3, 0.5, 0.3)));
+
+ List