@@ -3928,18 +3928,18 @@ def handle_opencypher_status(self, line, local_ns):
39283928
39293929
39303930
3931- # degreeDistribution
3932- # Shows the degree distribution of vertices in the graph
3931+ # % degreeDistribution. Takes traversalDirection, vertexLabels, edgeLabels parameters, and visualizes
3932+ # the degree distribution.
39333933 # TODO: Error handling
3934-
3934+
39353935 @line_magic
39363936 @needs_local_scope
39373937 @display_exceptions
39383938 @neptune_graph_only
39393939 def degreeDistribution (self , line , local_ns : dict = None ):
39403940 parser = argparse .ArgumentParser ()
39413941
3942- # Get the vertexLabels and edgeLabels from graph summary
3942+ # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
39433943 try :
39443944 summary_res = self .client .statistics ("propertygraph" , True , "detailed" , True )
39453945 summary_res .raise_for_status ()
@@ -3967,13 +3967,13 @@ def degreeDistribution(self, line, local_ns: dict = None):
39673967 "we will default to using all the edge labels." )
39683968
39693969
3970- # # Additional parameters for output control
3970+ # TODO: Additional parameter for saving the visualization?
39713971 # parser.add_argument('--export-to', type=str, default='',
39723972 # help='Export the degree distribution results to the provided file path.')
39733973
39743974 args = parser .parse_args (line .split ())
39753975
3976- # put the command line specified option as the value , if any; o.w. 'both'
3976+ # Put the selection specified on the command line , if any; o.w. default is 'both'
39773977 td_val = args .traversalDirection
39783978 td_val = td_val .lower () if td_val else 'both'
39793979
@@ -3985,6 +3985,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
39853985 value = td_val
39863986 )
39873987
3988+ # Put the vertex label(s) specified on the command line, if any; o.w. default is all the vertex labels (denoted by [])
3989+ available_vertex_labels = sorted (available_vertex_labels )
39883990 selected_vlabels = args .vertexLabels if args .vertexLabels else []
39893991 vertex_labels_select = widgets .SelectMultiple (
39903992 options = available_vertex_labels ,
@@ -3994,6 +3996,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
39943996 value = selected_vlabels
39953997 )
39963998
3999+ # Put the edge label(s) specified on the command line, if any; o.w. default is all the edge labels (denoted by [])
4000+ available_edge_labels = sorted (available_edge_labels )
39974001 selected_elabels = args .edgeLabels if args .edgeLabels else []
39984002 edge_labels_select = widgets .SelectMultiple (
39994003 options = available_edge_labels ,
@@ -4010,7 +4014,7 @@ def degreeDistribution(self, line, local_ns: dict = None):
40104014 display (td_dropdown , vertex_labels_select , edge_labels_select , submit_button , output )
40114015
40124016 def on_button_clicked (b ):
4013- # Get selected traversal direction
4017+ # Get the selected parameters
40144018 td = td_dropdown .value
40154019 vlabels = list (vertex_labels_select .value )
40164020 elabels = list (edge_labels_select .value )
@@ -4022,10 +4026,12 @@ def on_button_clicked(b):
40224026 with output :
40234027 res = self .callDD (td , vlabels , elabels , local_ns )
40244028
4029+ # Retrieve the distribution
40254030 pairs = np .array (res ['results' ][0 ]['output' ]['distribution' ])
40264031 keys = pairs [:,0 ]
40274032 values = pairs [:,1 ]
40284033
4034+ # Retrieve some statistics
40294035 max_deg = res ['results' ][0 ]['output' ]['statistics' ]['maxDeg' ]
40304036 median_deg = res ['results' ][0 ]['output' ]['statistics' ]['p50' ]
40314037 mean_deg = res ['results' ][0 ]['output' ]['statistics' ]['mean' ]
@@ -4046,6 +4052,7 @@ def callDD (self, td, vlabels, elabels, local_ns):
40464052 edge_str = ", " .join ([f'"{ e } "' for e in elabels ])
40474053 query_parts .append (f'edgeLabels: [{ edge_str } ]' )
40484054
4055+ # Construct the query
40494056 line = "CALL neptune.algo.degreeDistribution({" + ", " .join (query_parts ) + "}) YIELD output RETURN output"
40504057
40514058 # oc_rebuild_args = (f"{f'--store-to js --silent'}")
@@ -4064,25 +4071,27 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
40644071 marker_size = 50
40654072 alpha = 0.6
40664073 plt .clf ()
4067-
4068- # Use the provided unique_degrees and counts
4074+
40694075 # Get zero degree count
40704076 zero_idx = np .where (unique_degrees == 0 )[0 ]
40714077 zero_degree_count = counts [zero_idx [0 ]] if len (zero_idx ) > 0 else 0
40724078
4079+ # Get non-zero degrees and counts
40734080 mask = unique_degrees > 0
40744081 filtered_degrees = unique_degrees [mask ]
40754082 filtered_counts = counts [mask ]
40764083
4077- # Handle case when all nodes have zero degree
4084+ # Obtain the minimum non-zero degree, unless it's all zero degrees
40784085 if len (filtered_degrees ) == 0 :
40794086 min_deg = 0
40804087 else :
40814088 min_deg = np .min (filtered_degrees )
40824089
40834090 n_bins = 1
4084- if len (filtered_degrees ) > 0 : # Only create histogram if there are non-zero degree nodes
4091+ # Create histogram only if there are non-zero degree nodes
4092+ if len (filtered_degrees ) > 0 :
40854093 if bin_type != 'Raw' :
4094+ # Arrange the bins for a given bin_width
40864095 if bin_type == 'Linear' :
40874096 n_bins = max (1 , int ((max_deg - min_deg ) / bin_width ))
40884097 bins = np .linspace (min_deg , max_deg , n_bins + 1 )
@@ -4099,9 +4108,9 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
40994108 else :
41004109 # For raw data, create bars at each unique degree
41014110 plt .bar (filtered_degrees , filtered_counts , alpha = alpha ,
4102- label = 'Raw' , color = '#000080 ' )
4111+ label = 'Raw' , color = '#000000 ' )
41034112
4104- # Plot degree 0 separately
4113+ # Plot zero degree node count separately
41054114 if zero_degree_count > 0 :
41064115 plt .bar (0 , zero_degree_count , color = 'red' ,
41074116 label = 'Isolated' , alpha = alpha , width = 0.2 )
@@ -4121,7 +4130,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41214130
41224131 plt .gca ().set_ylim (top = y_max )
41234132
4124- # Add vertical dashed lines for min and max degree if enabled
4133+ # Add vertical dashed lines for min and max degree, if enabled
41254134 if show_mindeg and min_deg > 0 :
41264135 plt .axvline (x = min_deg , color = 'darkgreen' , linestyle = '--' , linewidth = 2 , label = f'Min non-zero degree: { min_deg } ' )
41274136
@@ -4155,19 +4164,22 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41554164
41564165 max_count = np .max (counts )
41574166
4158- # Create widgets (same as before)
4167+ # Scale widget, four options
41594168 scale_widget = widgets .Dropdown (
41604169 options = ['Linear-Linear' , 'Log-Log' , 'Log(x)-Linear(y)' , 'Linear(x)-Log(y)' ],
41614170 value = 'Linear-Linear' ,
41624171 description = 'Scale:'
41634172 )
41644173
4174+ # Binning widget, three options
41654175 bin_widget = widgets .Dropdown (
41664176 options = ['Raw' , 'Linear' , 'Logarithmic' ],
41674177 value = 'Linear' ,
41684178 description = 'Binning:'
41694179 )
41704180
4181+ # Bin width widget, integer options in [1, 1+(max_deg/2)] interval
4182+ # TODO: if logarithmic binning, a much smaller range makes more sense
41714183 bin_width_widget = widgets .IntSlider (
41724184 value = 1 ,
41734185 min = 1 ,
@@ -4178,28 +4190,27 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41784190 'For log binning: multiplicative factor' )
41794191 )
41804192
4193+ # Upper limit for y-axis range, enables zooming (lower limit is always zero)
41814194 y_max_widget = widgets .IntSlider (
4182- value = max_count ,
4195+ value = max_count * 1.1 ,
41834196 min = 1 ,
41844197 max = max_count * 1.1 ,
41854198 step = 1 ,
41864199 description = 'y-max:' ,
41874200 )
41884201
4189- # Add x-axis range slider
4190- x_range_widget = widgets .FloatRangeSlider (
4191- value = [min_deg , (max_deg * 1.1 ) + 5 ],
4202+ # Range slider for x-axis, enables zooming
4203+ x_range_widget = widgets .FloatRangeSlider (
41924204 min = 0 ,
41934205 max = max_deg * 1.1 + 5 ,
4206+ value = [min , max ],
41944207 step = 1 ,
41954208 description = 'x-axis range:' ,
41964209 disabled = False ,
41974210 continuous_update = True ,
41984211 readout = True ,
41994212 readout_format = '.0f' ,
42004213 )
4201- # Create output widget for statistics
4202- stats_output = widgets .Output ()
42034214
42044215 # Toggle switches for min/max degree lines
42054216 show_mindeg_widget = widgets .Checkbox (
@@ -4214,7 +4225,10 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
42144225 disabled = False
42154226 )
42164227
4217- # Create the interactive plot
4228+ # Output widget for statistics
4229+ stats_output = widgets .Output ()
4230+
4231+ # Interactive plot
42184232 interactive_plot = widgets .interactive (
42194233 update_plot ,
42204234 scale_type = scale_widget ,
@@ -4226,7 +4240,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
42264240 show_maxdeg = show_maxdeg_widget
42274241 )
42284242
4229- # Create a vertical box layout
4243+ # Vertical box layout
42304244 vbox = widgets .VBox ([interactive_plot , stats_output ])
42314245
42324246 # Display the interactive plot and stats
0 commit comments