@@ -9,19 +9,22 @@ import { buildFullMemberForMergeSuggestions } from '@crowd/opensearch'
99import {
1010 ILLMConsumableMember ,
1111 IMemberBaseForMergeSuggestions ,
12+ IMemberIdentity ,
1213 IMemberMergeSuggestion ,
1314 MemberIdentityType ,
1415 MemberMergeSuggestionTable ,
1516 OpenSearchIndex ,
16- PlatformType ,
1717} from '@crowd/types'
1818
19+ import { EMAIL_AS_USERNAME_PLATFORMS } from '../enums'
1920import { svc } from '../main'
2021import MemberSimilarityCalculator from '../memberSimilarityCalculator'
21- import { ISimilarMemberOpensearchResult , ISimilarityFilter } from '../types'
22- import { chunkArray } from '../utils'
23-
24- import { EMAIL_AS_USERNAME_PLATFORMS } from './common'
22+ import {
23+ ISimilarMemberOpensearchResult ,
24+ ISimilarityFilter ,
25+ OpenSearchQueryClauseBuilder ,
26+ } from '../types'
27+ import { chunkArray , isEmailAsUsernamePlatform , isNumeric , stripProtocol } from '../utils'
2528
2629/**
2730 * Finds similar members of given member in a tenant
@@ -74,180 +77,163 @@ export async function getMemberMergeSuggestions(
7477 const unverifiedEmailUsernameMatches = [ ]
7578 const unverifiedUsernameEmailMatches = [ ]
7679
77- // Process up to 100 identities
80+ // Process up to 75 identities
7881 // This is a safety limit to prevent OpenSearch max clause errors
79- for ( const identity of identities . slice ( 0 , 100 ) ) {
80- if ( identity . value && identity . value . length > 0 ) {
81- if ( identity . verified ) {
82- // Verified identities: exact match on unverified identities
83- verifiedExactMatches . push ( {
84- value : identity . value ,
85- platform : identity . platform ,
86- } )
87-
88- // Email-as-username: verified email matching unverified username
89- if ( identity . type === MemberIdentityType . EMAIL ) {
90- verifiedEmailUsernameMatches . push ( {
91- value : identity . value ,
92- } )
82+ for ( const { verified, value, platform, type } of identities . slice ( 0 , 75 ) ) {
83+ const isEmail = type === MemberIdentityType . EMAIL
84+ const isUsername = type === MemberIdentityType . USERNAME
85+ const isEmailAsUsername = isUsername && isEmailAsUsernamePlatform ( platform )
86+
87+ const targetLists = verified
88+ ? {
89+ exact : verifiedExactMatches ,
90+ emailUsername : verifiedEmailUsernameMatches ,
91+ usernameEmail : verifiedUsernameEmailMatches ,
92+ fuzzy : verifiedFuzzyMatches ,
9393 }
94-
95- // Email-as-username: verified username matching unverified email
96- if (
97- identity . type === MemberIdentityType . USERNAME &&
98- EMAIL_AS_USERNAME_PLATFORMS . includes ( identity . platform as PlatformType )
99- ) {
100- verifiedUsernameEmailMatches . push ( {
101- value : identity . value ,
102- } )
94+ : {
95+ exact : unverifiedExactMatches ,
96+ emailUsername : unverifiedEmailUsernameMatches ,
97+ usernameEmail : unverifiedUsernameEmailMatches ,
10398 }
10499
105- // Fuzzy search for verified identities (non-numeric only)
106- if ( Number . isNaN ( Number ( identity . value ) ) ) {
107- const cleanedIdentityName = identity . value . replace ( / ^ h t t p s ? : \/ \/ / , '' )
108- verifiedFuzzyMatches . push ( {
109- value : identity . value ,
110- cleanedValue : cleanedIdentityName ,
111- } )
112- }
113- } else {
114- // Unverified identities: exact match on verified identities
115- unverifiedExactMatches . push ( {
116- value : identity . value ,
117- platform : identity . platform ,
118- } )
100+ // Exact matches
101+ targetLists . exact . push ( { value, platform } )
119102
120- // Email-as-username: unverified email matching verified username
121- if ( identity . type === MemberIdentityType . EMAIL ) {
122- unverifiedEmailUsernameMatches . push ( {
123- value : identity . value ,
124- } )
125- }
103+ // Email-as-username cases
104+ if ( isEmail ) {
105+ targetLists . emailUsername . push ( { value } )
106+ } else if ( isEmailAsUsername ) {
107+ targetLists . usernameEmail . push ( { value } )
108+ }
126109
127- // Email-as-username: unverified username matching verified email
128- if (
129- identity . type === MemberIdentityType . USERNAME &&
130- EMAIL_AS_USERNAME_PLATFORMS . includes ( identity . platform as PlatformType )
131- ) {
132- unverifiedUsernameEmailMatches . push ( {
133- value : identity . value ,
134- } )
135- }
136- }
110+ // Fuzzy matches (only for verified & non-numeric)
111+ if ( verified && ! isNumeric ( value ) ) {
112+ targetLists . fuzzy . push ( { value : stripProtocol ( value ) } )
137113 }
138114 }
139115
140116 // Build OpenSearch query clauses
141117 const identitiesShould = [ ]
142- const CHUNK_SIZE = 20 // Split queries into chunks to avoid OpenSearch limits
143-
144- // Query 1: Verified -> Unverified exact matches
145- for ( const { value, platform } of verifiedExactMatches ) {
146- identitiesShould . push ( {
147- bool : {
148- must : [
149- { term : { [ `nested_identities.keyword_value` ] : value } } ,
150- { match : { [ `nested_identities.string_platform` ] : platform } } ,
151- { term : { [ `nested_identities.bool_verified` ] : false } } ,
152- ] ,
153- } ,
154- } )
155- }
156-
157- // Query 2: Verified email -> Unverified username (email-as-username platforms)
158- for ( const { value } of verifiedEmailUsernameMatches ) {
159- identitiesShould . push ( {
160- bool : {
161- must : [
162- { term : { [ `nested_identities.keyword_value` ] : value } } ,
163- { terms : { [ `nested_identities.string_platform` ] : EMAIL_AS_USERNAME_PLATFORMS } } ,
164- { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . USERNAME } } ,
165- { term : { [ `nested_identities.bool_verified` ] : false } } ,
166- ] ,
167- } ,
168- } )
169- }
170-
171- // Query 3: Verified username -> Unverified email (email-as-username platforms)
172- for ( const { value } of verifiedUsernameEmailMatches ) {
173- identitiesShould . push ( {
174- bool : {
175- must : [
176- { term : { [ `nested_identities.keyword_value` ] : value } } ,
177- { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . EMAIL } } ,
178- { term : { [ `nested_identities.bool_verified` ] : false } } ,
179- ] ,
180- } ,
181- } )
182- }
183-
184- // Query 4: Verified -> Verified fuzzy matches (chunked)
185- if ( verifiedFuzzyMatches . length > 0 ) {
186- const uniqueFuzzyValues = [
187- ...new Set ( verifiedFuzzyMatches . map ( ( { cleanedValue } ) => cleanedValue ) ) ,
188- ]
189- const fuzzyChunks = chunkArray ( uniqueFuzzyValues , CHUNK_SIZE )
118+ const CHUNK_SIZE = 15 // Split queries into chunks to avoid OpenSearch limits
190119
191- for ( const chunk of fuzzyChunks ) {
192- const fuzzyShouldClauses = chunk . map ( ( cleanedValue ) => ( {
120+ const clauseBuilders : OpenSearchQueryClauseBuilder < Partial < IMemberIdentity > > [ ] = [
121+ {
122+ // Query 1: Verified -> Unverified exact matches
123+ matches : verifiedExactMatches ,
124+ builder : ( { value, platform } ) => ( {
125+ bool : {
126+ must : [
127+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
128+ { match : { [ `nested_identities.string_platform` ] : platform } } ,
129+ { term : { [ `nested_identities.bool_verified` ] : false } } ,
130+ ] ,
131+ } ,
132+ } ) ,
133+ } ,
134+ {
135+ // Query 2: Verified email -> Unverified username (email-as-username platforms)
136+ matches : verifiedEmailUsernameMatches ,
137+ builder : ( { value } ) => ( {
138+ bool : {
139+ must : [
140+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
141+ { terms : { [ `nested_identities.string_platform` ] : EMAIL_AS_USERNAME_PLATFORMS } } ,
142+ { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . USERNAME } } ,
143+ { term : { [ `nested_identities.bool_verified` ] : false } } ,
144+ ] ,
145+ } ,
146+ } ) ,
147+ } ,
148+ {
149+ // Query 3: Verified username -> Unverified email (email-as-username platforms)
150+ matches : verifiedUsernameEmailMatches ,
151+ builder : ( { value } ) => ( {
152+ bool : {
153+ must : [
154+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
155+ { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . EMAIL } } ,
156+ { term : { [ `nested_identities.bool_verified` ] : false } } ,
157+ ] ,
158+ } ,
159+ } ) ,
160+ } ,
161+ {
162+ // Query 5: Unverified -> Verified exact matches
163+ matches : unverifiedExactMatches ,
164+ builder : ( { value, platform } ) => ( {
165+ bool : {
166+ must : [
167+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
168+ { match : { [ `nested_identities.string_platform` ] : platform } } ,
169+ { term : { [ `nested_identities.bool_verified` ] : true } } ,
170+ ] ,
171+ } ,
172+ } ) ,
173+ } ,
174+ {
175+ // Query 6: Unverified email -> Verified username (email-as-username platforms)
176+ matches : unverifiedEmailUsernameMatches ,
177+ builder : ( { value } ) => ( {
178+ bool : {
179+ must : [
180+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
181+ { terms : { [ `nested_identities.string_platform` ] : EMAIL_AS_USERNAME_PLATFORMS } } ,
182+ { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . USERNAME } } ,
183+ { term : { [ `nested_identities.bool_verified` ] : true } } ,
184+ ] ,
185+ } ,
186+ } ) ,
187+ } ,
188+ {
189+ // Query 7: Unverified username -> Verified email (email-as-username platforms)
190+ matches : unverifiedUsernameEmailMatches ,
191+ builder : ( { value } ) => ( {
192+ bool : {
193+ must : [
194+ { term : { [ `nested_identities.keyword_value` ] : value } } ,
195+ { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . EMAIL } } ,
196+ { term : { [ `nested_identities.bool_verified` ] : true } } ,
197+ ] ,
198+ } ,
199+ } ) ,
200+ } ,
201+ {
202+ // Query 4: Verified -> Verified fuzzy matches
203+ matches : uniqBy ( verifiedFuzzyMatches , 'value' ) ,
204+ builder : ( { value } ) => ( {
193205 match : {
194206 [ `nested_identities.string_value` ] : {
195- query : cleanedValue ,
207+ query : value ,
196208 prefix_length : 1 ,
197209 fuzziness : 'auto' ,
198210 } ,
199211 } ,
200- } ) )
201-
202- identitiesShould . push ( {
203- bool : {
204- should : fuzzyShouldClauses ,
205- minimum_should_match : 1 ,
206- } ,
207- } )
212+ } ) ,
213+ filter : [ { term : { [ `nested_identities.bool_verified` ] : true } } ] ,
214+ } ,
215+ ]
216+
217+ for ( const clauseBuilder of clauseBuilders ) {
218+ const { matches, builder, filter } = clauseBuilder
219+ if ( matches . length > 0 ) {
220+ const chunks = chunkArray ( matches , CHUNK_SIZE )
221+ for ( const chunk of chunks ) {
222+ const shouldClauses = chunk . map ( builder )
223+ const chunkQuery : any = {
224+ bool : {
225+ should : shouldClauses ,
226+ minimum_should_match : 1 ,
227+ } ,
228+ }
229+ if ( filter ) {
230+ chunkQuery . bool . filter = filter
231+ }
232+ identitiesShould . push ( chunkQuery )
233+ }
208234 }
209235 }
210236
211- // Query 5: Unverified -> Verified exact matches
212- for ( const { value, platform } of unverifiedExactMatches ) {
213- identitiesShould . push ( {
214- bool : {
215- must : [
216- { term : { [ `nested_identities.keyword_value` ] : value } } ,
217- { match : { [ `nested_identities.string_platform` ] : platform } } ,
218- { term : { [ `nested_identities.bool_verified` ] : true } } ,
219- ] ,
220- } ,
221- } )
222- }
223-
224- // Query 6: Unverified email -> Verified username (email-as-username platforms)
225- for ( const { value } of unverifiedEmailUsernameMatches ) {
226- identitiesShould . push ( {
227- bool : {
228- must : [
229- { term : { [ `nested_identities.keyword_value` ] : value } } ,
230- { terms : { [ `nested_identities.string_platform` ] : EMAIL_AS_USERNAME_PLATFORMS } } ,
231- { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . USERNAME } } ,
232- { term : { [ `nested_identities.bool_verified` ] : true } } ,
233- ] ,
234- } ,
235- } )
236- }
237-
238- // Query 7: Unverified username -> Verified email (email-as-username platforms)
239- for ( const { value } of unverifiedUsernameEmailMatches ) {
240- identitiesShould . push ( {
241- bool : {
242- must : [
243- { term : { [ `nested_identities.keyword_value` ] : value } } ,
244- { term : { [ `nested_identities.keyword_type` ] : MemberIdentityType . EMAIL } } ,
245- { term : { [ `nested_identities.bool_verified` ] : true } } ,
246- ] ,
247- } ,
248- } )
249- }
250-
251237 // Wrap all identity queries in a nested query (identities are nested documents)
252238 const nestedIdentityQuery = {
253239 nested : {
0 commit comments