@@ -239,13 +239,12 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
239239 // Sanitizers are currently not modeled in MaD. TODO: check if this has large negative impact.
240240 predicate isSanitizer ( Endpoint e , EndpointType t ) {
241241 exists ( t ) and
242- (
243- e .asNode ( ) .getType ( ) instanceof BoxedType
244- or
245- e .asNode ( ) .getType ( ) instanceof PrimitiveType
246- or
247- e .asNode ( ) .getType ( ) instanceof NumberType
248- )
242+ AutomodelJavaUtil:: isUnexploitableType ( [
243+ // for most endpoints, we can get the type from the node
244+ e .asNode ( ) .getType ( ) ,
245+ // but not for calls to void methods, where we need to go via the AST
246+ e .asTop ( ) .( Expr ) .getType ( )
247+ ] )
249248 or
250249 t instanceof AutomodelEndpointTypes:: PathInjectionSinkType and
251250 e .asNode ( ) instanceof PathSanitizer:: PathInjectionSanitizer
@@ -372,62 +371,124 @@ class ApplicationModeMetadataExtractor extends string {
372371 }
373372}
374373
374+ /**
375+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
376+ *
377+ * The other parameters record various other properties of interest.
378+ */
379+ predicate isCandidate (
380+ Endpoint endpoint , string package , string type , string subtypes , string name , string signature ,
381+ string input , string output , string isVarargs , string extensibleType , string alreadyAiModeled
382+ ) {
383+ CharacteristicsImpl:: isCandidate ( endpoint , _) and
384+ not exists ( CharacteristicsImpl:: UninterestingToModelCharacteristic u |
385+ u .appliesToEndpoint ( endpoint )
386+ ) and
387+ any ( ApplicationModeMetadataExtractor meta )
388+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output , isVarargs ,
389+ alreadyAiModeled , extensibleType ) and
390+ // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
391+ // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
392+ // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
393+ // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
394+ // types, and we don't need to reexamine it.
395+ alreadyAiModeled .matches ( [ "" , "%ai-%" ] ) and
396+ AutomodelJavaUtil:: includeAutomodelCandidate ( package , type , name , signature )
397+ }
398+
399+ /**
400+ * Holds if the given `endpoint` is a negative example for the `extensibleType`
401+ * because of the `characteristic`.
402+ *
403+ * The other parameters record various other properties of interest.
404+ */
405+ predicate isNegativeExample (
406+ Endpoint endpoint , EndpointCharacteristic characteristic , float confidence , string package ,
407+ string type , string subtypes , string name , string signature , string input , string output ,
408+ string isVarargsArray , string extensibleType
409+ ) {
410+ characteristic .appliesToEndpoint ( endpoint ) and
411+ // the node is known not to be an endpoint of any appropriate type
412+ forall ( AutomodelEndpointTypes:: EndpointType tp |
413+ tp = CharacteristicsImpl:: getAPotentialType ( endpoint )
414+ |
415+ characteristic .hasImplications ( tp , false , _)
416+ ) and
417+ // the lowest confidence across all endpoint types should be at least highConfidence
418+ confidence =
419+ min ( float c |
420+ characteristic .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , false , c )
421+ ) and
422+ confidence >= SharedCharacteristics:: highConfidence ( ) and
423+ any ( ApplicationModeMetadataExtractor meta )
424+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output ,
425+ isVarargsArray , _, extensibleType ) and
426+ // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
427+ // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
428+ not exists ( EndpointCharacteristic characteristic2 , float confidence2 |
429+ characteristic2 != characteristic
430+ |
431+ characteristic2 .appliesToEndpoint ( endpoint ) and
432+ confidence2 >= SharedCharacteristics:: maximalConfidence ( ) and
433+ characteristic2
434+ .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , true , confidence2 )
435+ )
436+ }
437+
438+ /**
439+ * Holds if the given `endpoint` is a positive example for the `endpointType`.
440+ *
441+ * The other parameters record various other properties of interest.
442+ */
443+ predicate isPositiveExample (
444+ Endpoint endpoint , string endpointType , string package , string type , string subtypes , string name ,
445+ string signature , string input , string output , string isVarargsArray , string extensibleType
446+ ) {
447+ any ( ApplicationModeMetadataExtractor meta )
448+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output ,
449+ isVarargsArray , _, extensibleType ) and
450+ CharacteristicsImpl:: isKnownAs ( endpoint , endpointType , _) and
451+ exists ( CharacteristicsImpl:: getRelatedLocationOrCandidate ( endpoint , CallContext ( ) ) )
452+ }
453+
375454/*
376455 * EndpointCharacteristic classes that are specific to Automodel for Java.
377456 */
378457
379458/**
380- * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
381- * and its return value should not be considered a source.
459+ * A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks.
382460 *
383461 * A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
384462 * type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
385463 * the dangerous/interesting thing, so we want the latter to be modeled as the sink.
386464 *
387465 * TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
388466 */
389- private class UnexploitableIsCharacteristic extends CharacteristicsImpl:: NeitherSourceNorSinkCharacteristic
390- {
467+ private class UnexploitableIsCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
391468 UnexploitableIsCharacteristic ( ) { this = "unexploitable (is-style boolean method)" }
392469
393470 override predicate appliesToEndpoint ( Endpoint e ) {
394471 e .getCallable ( ) .getName ( ) .matches ( "is%" ) and
395472 e .getCallable ( ) .getReturnType ( ) instanceof BooleanType and
396- (
397- e .getExtensibleType ( ) = "sinkModel" and
398- not ApplicationCandidatesImpl:: isSink ( e , _, _)
399- or
400- e .getExtensibleType ( ) = "sourceModel" and
401- not ApplicationCandidatesImpl:: isSource ( e , _, _) and
402- e .getMaDOutput ( ) = "ReturnValue"
403- )
473+ not ApplicationCandidatesImpl:: isSink ( e , _, _)
404474 }
405475}
406476
407477/**
408478 * A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
409- * considered sinks, and its return value should not be considered a source .
479+ * considered sinks.
410480 *
411481 * A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
412482 * boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
413483 * dangerous/interesting thing, so we want the latter to be modeled as the sink.
414484 */
415- private class UnexploitableExistsCharacteristic extends CharacteristicsImpl:: NeitherSourceNorSinkCharacteristic
416- {
485+ private class UnexploitableExistsCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
417486 UnexploitableExistsCharacteristic ( ) { this = "unexploitable (existence-checking boolean method)" }
418487
419488 override predicate appliesToEndpoint ( Endpoint e ) {
420- exists ( Callable callable |
421- callable = e .getCallable ( ) and
489+ exists ( Callable callable | callable = e .getCallable ( ) |
422490 callable .getName ( ) .toLowerCase ( ) = [ "exists" , "notexists" ] and
423491 callable .getReturnType ( ) instanceof BooleanType
424- |
425- e .getExtensibleType ( ) = "sinkModel" and
426- not ApplicationCandidatesImpl:: isSink ( e , _, _)
427- or
428- e .getExtensibleType ( ) = "sourceModel" and
429- not ApplicationCandidatesImpl:: isSource ( e , _, _) and
430- e .getMaDOutput ( ) = "ReturnValue"
431492 )
432493 }
433494}
0 commit comments