3030
3131#include "rum.h"
3232
33+ #include <float.h>
3334#include <math.h>
3435
3536
36- #define RumOverlapStrategy 1
37- #define RumContainsStrategy 2
38- #define RumContainedStrategy 3
39- #define RumEqualStrategy 4
40- #define RumSimilarStrategy 5
37+ #define RUM_OVERLAP_STRATEGY 1
38+ #define RUM_CONTAINS_STRATEGY 2
39+ #define RUM_CONTAINED_STRATEGY 3
40+ #define RUM_EQUAL_STRATEGY 4
41+ #define RUM_SIMILAR_STRATEGY 5
4142
4243
4344#define LINEAR_LIMIT 5
4445#define NDIM 1
4546
4647
47- #define ARRNELEMS (x ) ArrayGetNItems(ARR_NDIM(x), ARR_DIMS(x))
48- #define ARRISVOID (x ) ((x) == NULL || ARRNELEMS (x) == 0)
48+ #define ARR_NELEMS (x ) ArrayGetNItems(ARR_NDIM(x), ARR_DIMS(x))
49+ #define ARR_ISVOID (x ) ((x) == NULL || ARR_NELEMS (x) == 0)
4950
5051#define CHECKARRVALID (x ) \
5152 do { \
5960 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
6061 errmsg("array must not contain nulls"))); \
6162 } \
62- } while(0)
63+ } while (0)
64+
65+ #define InitDummySimpleArray (s , len ) \
66+ do { \
67+ (s)->elems = NULL; \
68+ (s)->hashedElems = NULL; \
69+ (s)->nelems = (len); \
70+ (s)->nHashedElems = -1; \
71+ (s)->info = NULL; \
72+ } while (0)
6373
6474
6575typedef struct AnyArrayTypeInfo
@@ -86,7 +96,8 @@ typedef struct SimpleArray
8696 AnyArrayTypeInfo * info ;
8797} SimpleArray ;
8898
89- typedef enum SimilarityType {
99+ typedef enum SimilarityType
100+ {
90101 AA_Cosine ,
91102 AA_Jaccard ,
92103 AA_Overlap
@@ -100,12 +111,13 @@ PG_FUNCTION_INFO_V1(rum_extract_anyarray_query);
100111
101112PG_FUNCTION_INFO_V1 (rum_anyarray_consistent );
102113
114+ PG_FUNCTION_INFO_V1 (rum_anyarray_ordering );
103115PG_FUNCTION_INFO_V1 (rum_anyarray_similar );
104116PG_FUNCTION_INFO_V1 (rum_anyarray_distance );
105117
106118
107- static SimilarityType SmlType ;
108- static double SmlLimit ;
119+ static SimilarityType SmlType = AA_Cosine ;
120+ static float8 SmlLimit = 0.5 ;
109121
110122
111123static Oid getAMProc (Oid amOid , Oid typid );
@@ -123,8 +135,8 @@ static int cmpDescArrayElem(const void *a, const void *b, void *arg);
123135static void sortSimpleArray (SimpleArray * s , int32 direction );
124136static void uniqSimpleArray (SimpleArray * s , bool onlyDuplicate );
125137
126- static int getNumOfIntersect (SimpleArray * sa , SimpleArray * sb );
127- static double getSimilarity (SimpleArray * sa , SimpleArray * sb );
138+ static int32 getNumOfIntersect (SimpleArray * sa , SimpleArray * sb );
139+ static float8 getSimilarity (SimpleArray * sa , SimpleArray * sb , int32 intersect );
128140
129141
130142
@@ -224,26 +236,30 @@ rum_extract_anyarray_query(PG_FUNCTION_ARGS)
224236
225237 switch (strategy )
226238 {
227- case RumOverlapStrategy :
239+ case RUM_OVERLAP_STRATEGY :
228240 * searchMode = GIN_SEARCH_MODE_DEFAULT ;
229241 break ;
230- case RumContainsStrategy :
242+ case RUM_CONTAINS_STRATEGY :
231243 if (* nentries > 0 )
232244 * searchMode = GIN_SEARCH_MODE_DEFAULT ;
233245 else /* everything contains the empty set */
234246 * searchMode = GIN_SEARCH_MODE_ALL ;
235247 break ;
236- case RumContainedStrategy :
248+ case RUM_CONTAINED_STRATEGY :
237249 /* empty set is contained in everything */
238250 * searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY ;
239251 break ;
240- case RumEqualStrategy :
252+ case RUM_EQUAL_STRATEGY :
241253 if (* nentries > 0 )
242254 * searchMode = GIN_SEARCH_MODE_DEFAULT ;
243255 else
244256 * searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY ;
245257 break ;
246- case RumSimilarStrategy :
258+ case RUM_SIMILAR_STRATEGY :
259+ * searchMode = GIN_SEARCH_MODE_DEFAULT ;
260+ break ;
261+ /* Special case for distance */
262+ case RUM_DISTANCE :
247263 * searchMode = GIN_SEARCH_MODE_DEFAULT ;
248264 break ;
249265 default :
@@ -285,7 +301,7 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
285301
286302 switch (strategy )
287303 {
288- case RumOverlapStrategy :
304+ case RUM_OVERLAP_STRATEGY :
289305 /* result is not lossy */
290306 * recheck = false;
291307 /* must have a match for at least one non-null element */
@@ -299,7 +315,7 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
299315 }
300316 }
301317 break ;
302- case RumContainsStrategy :
318+ case RUM_CONTAINS_STRATEGY :
303319 /* result is not lossy */
304320 * recheck = false;
305321
@@ -314,7 +330,7 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
314330 }
315331 }
316332 break ;
317- case RumContainedStrategy :
333+ case RUM_CONTAINED_STRATEGY :
318334 /* we will need recheck */
319335 * recheck = true;
320336
@@ -329,7 +345,7 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
329345 }
330346 }
331347 break ;
332- case RumEqualStrategy :
348+ case RUM_EQUAL_STRATEGY :
333349 /* we will need recheck */
334350 * recheck = true;
335351
@@ -356,7 +372,7 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
356372 }
357373 }
358374 break ;
359- case RumSimilarStrategy :
375+ case RUM_SIMILAR_STRATEGY :
360376 /* we will need recheck */
361377 * recheck = true;
362378
@@ -377,16 +393,130 @@ rum_anyarray_consistent(PG_FUNCTION_ARGS)
377393 * Similarity and distance
378394 */
379395
396+ Datum
397+ rum_anyarray_ordering (PG_FUNCTION_ARGS )
398+ {
399+ bool * check = (bool * ) PG_GETARG_POINTER (0 );
400+ int nkeys = PG_GETARG_INT32 (3 );
401+ Datum * addInfo = (Datum * ) PG_GETARG_POINTER (8 );
402+ bool * addInfoIsNull = (bool * ) PG_GETARG_POINTER (9 );
403+
404+ float8 dist ,
405+ sml ;
406+ int32 intersect = 0 ,
407+ nentries = -1 ;
408+ int i ;
409+
410+ SimpleArray sa , sb ;
411+
412+ for (i = 0 ; i < nkeys ; i ++ )
413+ if (check [i ])
414+ intersect ++ ;
415+
416+ if (intersect == 0 )
417+ PG_RETURN_FLOAT8 (get_float8_infinity ());
418+
419+ for (i = 0 ; i < nkeys ; i ++ )
420+ if (!addInfoIsNull [0 ])
421+ {
422+ nentries = DatumGetInt32 (addInfo [i ]);
423+ break ;
424+ }
425+
426+ InitDummySimpleArray (& sa , nentries );
427+ InitDummySimpleArray (& sb , nkeys );
428+ sml = getSimilarity (& sa , & sb , intersect );
429+
430+ if (sml == 0.0 )
431+ dist = get_float8_infinity ();
432+ else
433+ dist = 1.0 / sml ;
434+
435+ PG_RETURN_FLOAT8 (dist );
436+ }
437+
380438Datum
381439rum_anyarray_similar (PG_FUNCTION_ARGS )
382440{
383- PG_RETURN_BOOL (true);
441+ ArrayType * a = PG_GETARG_ARRAYTYPE_P (0 );
442+ ArrayType * b = PG_GETARG_ARRAYTYPE_P (1 );
443+ AnyArrayTypeInfo * info ;
444+ SimpleArray * sa ,
445+ * sb ;
446+ float8 result = 0.0 ;
447+
448+ CHECKARRVALID (a );
449+ CHECKARRVALID (b );
450+
451+ if (ARR_ELEMTYPE (a ) != ARR_ELEMTYPE (b ))
452+ ereport (ERROR ,
453+ (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
454+ errmsg ("array types do not match" )));
455+
456+ if (ARR_ISVOID (a ) || ARR_ISVOID (b ))
457+ PG_RETURN_BOOL (false);
458+
459+ if (fcinfo -> flinfo -> fn_extra == NULL )
460+ fcinfo -> flinfo -> fn_extra = getAnyArrayTypeInfo (fcinfo -> flinfo -> fn_mcxt ,
461+ ARR_ELEMTYPE (a ));
462+ info = (AnyArrayTypeInfo * ) fcinfo -> flinfo -> fn_extra ;
463+
464+ sa = Array2SimpleArray (info , a );
465+ sb = Array2SimpleArray (info , b );
466+
467+ result = getSimilarity (sa , sb , getNumOfIntersect (sa , sb ));
468+
469+ freeSimpleArray (sb );
470+ freeSimpleArray (sa );
471+
472+ PG_FREE_IF_COPY (b , 1 );
473+ PG_FREE_IF_COPY (a , 0 );
474+
475+ PG_RETURN_BOOL (result >= SmlLimit );
384476}
385477
386478Datum
387479rum_anyarray_distance (PG_FUNCTION_ARGS )
388480{
389- PG_RETURN_FLOAT8 (0.0 );
481+ ArrayType * a = PG_GETARG_ARRAYTYPE_P (0 );
482+ ArrayType * b = PG_GETARG_ARRAYTYPE_P (1 );
483+ AnyArrayTypeInfo * info ;
484+ SimpleArray * sa ,
485+ * sb ;
486+ float8 result = 0.0 ;
487+
488+ CHECKARRVALID (a );
489+ CHECKARRVALID (b );
490+
491+ if (ARR_ELEMTYPE (a ) != ARR_ELEMTYPE (b ))
492+ ereport (ERROR ,
493+ (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
494+ errmsg ("array types do not match" )));
495+
496+ if (ARR_ISVOID (a ) || ARR_ISVOID (b ))
497+ PG_RETURN_FLOAT8 (0.0 );
498+
499+ if (fcinfo -> flinfo -> fn_extra == NULL )
500+ fcinfo -> flinfo -> fn_extra = getAnyArrayTypeInfo (fcinfo -> flinfo -> fn_mcxt ,
501+ ARR_ELEMTYPE (a ));
502+ info = (AnyArrayTypeInfo * ) fcinfo -> flinfo -> fn_extra ;
503+
504+ sa = Array2SimpleArray (info , a );
505+ sb = Array2SimpleArray (info , b );
506+
507+ result = getSimilarity (sa , sb , getNumOfIntersect (sa , sb ));
508+ if (result == 0.0 )
509+ result = get_float8_infinity ();
510+ else
511+ result = 1.0 / result ;
512+
513+ freeSimpleArray (sb );
514+ freeSimpleArray (sa );
515+
516+ PG_FREE_IF_COPY (b , 1 );
517+ PG_FREE_IF_COPY (a , 0 );
518+
519+ PG_RETURN_FLOAT8 (result );
390520}
391521
392522
@@ -546,7 +676,7 @@ Array2SimpleArray(AnyArrayTypeInfo *info, ArrayType *a)
546676 s -> nHashedElems = 0 ;
547677 s -> hashedElems = NULL ;
548678
549- if (ARRISVOID (a ))
679+ if (ARR_ISVOID (a ))
550680 {
551681 s -> elems = NULL ;
552682 s -> nelems = 0 ;
@@ -676,11 +806,11 @@ uniqSimpleArray(SimpleArray *s, bool onlyDuplicate)
676806 * Similarity calculation
677807 */
678808
679- static int
809+ static int32
680810getNumOfIntersect (SimpleArray * sa , SimpleArray * sb )
681811{
682- int cnt = 0 ,
683- cmp ;
812+ int32 cnt = 0 ;
813+ int cmp ;
684814 Datum * aptr = sa -> elems ,
685815 * bptr = sb -> elems ;
686816 AnyArrayTypeInfo * info = sa -> info ;
@@ -711,24 +841,21 @@ getNumOfIntersect(SimpleArray *sa, SimpleArray *sb)
711841 return cnt ;
712842}
713843
714- static double
715- getSimilarity (SimpleArray * sa , SimpleArray * sb )
844+ static float8
845+ getSimilarity (SimpleArray * sa , SimpleArray * sb , int32 intersect )
716846{
717- int inter ;
718- double result = 0.0 ;
719-
720- inter = getNumOfIntersect (sa , sb );
847+ float8 result = 0.0 ;
721848
722- switch (SmlType )
849+ switch (SmlType )
723850 {
724851 case AA_Cosine :
725- result = ((double ) inter ) / sqrt (((double )sa -> nelems ) * ((double )sb -> nelems ));
852+ result = ((float8 ) intersect ) / sqrt (((float8 )sa -> nelems ) * ((float8 )sb -> nelems ));
726853 break ;
727854 case AA_Jaccard :
728- result = ((double ) inter ) / (((double )sa -> nelems ) + ((double )sb -> nelems ) - ((double )inter ));
855+ result = ((float8 ) intersect ) / (((float8 )sa -> nelems ) + ((float8 )sb -> nelems ) - ((double )intersect ));
729856 break ;
730857 case AA_Overlap :
731- result = inter ;
858+ result = intersect ;
732859 break ;
733860 default :
734861 elog (ERROR , "unknown similarity type" );
0 commit comments