1919import numpy as np
2020import matplotlib .pyplot as plt
2121
22+ import numpy as np
23+ import matplotlib .pyplot as plt
24+
2225from ipyfilechooser import FileChooser
2326from enum import Enum
2427from copy import copy
5760 SPARQL_EXPLAIN_MODES , OPENCYPHER_EXPLAIN_MODES , GREMLIN_EXPLAIN_MODES , \
5861 OPENCYPHER_PLAN_CACHE_MODES , OPENCYPHER_DEFAULT_TIMEOUT , OPENCYPHER_STATUS_STATE_MODES , \
5962 normalize_service_name , NEPTUNE_DB_SERVICE_NAME , NEPTUNE_ANALYTICS_SERVICE_NAME , GRAPH_PG_INFO_METRICS , TRAVERSAL_DIRECTIONS , \
63+ normalize_service_name , NEPTUNE_DB_SERVICE_NAME , NEPTUNE_ANALYTICS_SERVICE_NAME , GRAPH_PG_INFO_METRICS , TRAVERSAL_DIRECTIONS , \
6064 GREMLIN_PROTOCOL_FORMATS , DEFAULT_HTTP_PROTOCOL , DEFAULT_WS_PROTOCOL , GRAPHSONV4_UNTYPED , \
6165 GREMLIN_SERIALIZERS_WS , get_gremlin_serializer_mime , normalize_protocol_name , generate_snapshot_name )
6266from graph_notebook .network import SPARQLNetwork
@@ -3928,15 +3932,29 @@ def handle_opencypher_status(self, line, local_ns):
39283932
39293933
39303934
3931- # %degreeDistribution. Takes traversalDirection, vertexLabels, edgeLabels parameters, and visualizes
3932- # the degree distribution.
3935+ # %degreeDistribution magic command.
3936+ # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply
3937+ # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis.
3938+ # It takes traversalDirection [both (default), inbound, outbound], vertexLabels [default is empty list],
3939+ # edgeLabels parameters [default is empty list], and then gives the histogram for the specified degree
3940+ # (both/in/out) distribution of the vertices in the graph filtered by the specified vertex labels and edge
3941+ # labels. Parameters can be defined as command line argument and/or through the dropdown widgets.
3942+ # Example usages:
3943+ # > %degreeDistribution
3944+ # > %degreeDistribution --traversalDirection inbound
3945+ # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
3946+
39333947 # TODO: Error handling
39343948
39353949 @line_magic
39363950 @needs_local_scope
39373951 @display_exceptions
39383952 @neptune_graph_only
39393953 def degreeDistribution (self , line , local_ns : dict = None ):
3954+ if not self .client .is_analytics_domain ():
3955+ print ("This command is only supported for Neptune Analytics domains." )
3956+ return
3957+
39403958 parser = argparse .ArgumentParser ()
39413959
39423960 # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
@@ -3950,18 +3968,23 @@ def degreeDistribution(self, line, local_ns: dict = None):
39503968 print (f"Error retrieving graph summary: { e } " )
39513969 return
39523970
3953- # traversalDirection parameter
3971+ # traversalDirection: Type of the degree computed:
3972+ # - inbound: Counts only the incoming edges for each vertex
3973+ # - outbound: Counts only the outgoing edges for each vertex
3974+ # - both [default]: Counts both the incoming and outgoing edges for each vertex.
39543975 parser .add_argument ('--traversalDirection' , nargs = '?' , type = str .lower , default = 'both' ,
39553976 help = f'Type of the degree for which the distribution is shown. Valid inputs: { TRAVERSAL_DIRECTIONS } . '
39563977 f'Default: both.' ,
39573978 choices = TRAVERSAL_DIRECTIONS )
39583979
3959- # vertexLabels parameter
3980+ # vertexLabels: List of the vertex labels, space separated, for which the degrees are computed:
3981+ # - default value is empty list, which means the degrees are computed for any vertex label.
39603982 parser .add_argument ('--vertexLabels' , nargs = '*' , default = [],
39613983 help = "The vertex labels for which the induced graph is considered and the degree distribution is shown. "
39623984 "If not supplied, we will default to using all the vertex labels." )
39633985
3964- # edgeLabels parameter
3986+ # edgeLabels: List of the edge labels, space separated, for which the degrees are computed:
3987+ # - default value is empty list, which means the degrees are computed for any edge label.
39653988 parser .add_argument ('--edgeLabels' , nargs = '*' , default = [],
39663989 help = "The edge labels for which the degree distribution is shown. If not supplied, "
39673990 "we will default to using all the edge labels." )
@@ -3973,7 +3996,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
39733996
39743997 args = parser .parse_args (line .split ())
39753998
3976- # Put the selection specified on the command line, if any; o.w. default is 'both'
3999+ # If the traversalDirection parameter selection is specified on the command line, it is shown as the default
4000+ # in the dropdown menu. Othweise, the default in the dropdown is 'both'
39774001 td_val = args .traversalDirection
39784002 td_val = td_val .lower () if td_val else 'both'
39794003
@@ -3985,7 +4009,9 @@ def degreeDistribution(self, line, local_ns: dict = None):
39854009 value = td_val
39864010 )
39874011
3988- # Put the vertex label(s) specified on the command line, if any; o.w. default is all the vertex labels (denoted by [])
4012+ # Existing vertex labels in the graph are shown in the dropdown menu. If any vertex label is specified on
4013+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
4014+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
39894015 available_vertex_labels = sorted (available_vertex_labels )
39904016 selected_vlabels = args .vertexLabels if args .vertexLabels else []
39914017 vertex_labels_select = widgets .SelectMultiple (
@@ -3996,7 +4022,9 @@ def degreeDistribution(self, line, local_ns: dict = None):
39964022 value = selected_vlabels
39974023 )
39984024
3999- # Put the edge label(s) specified on the command line, if any; o.w. default is all the edge labels (denoted by [])
4025+ # Existing edge labels in the graph are shown in the dropdown menu. If any edge label is specified on
4026+ # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
4027+ # in the dropdown menu, which means any label and all the labels are considered in the computation.
40004028 available_edge_labels = sorted (available_edge_labels )
40014029 selected_elabels = args .edgeLabels if args .edgeLabels else []
40024030 edge_labels_select = widgets .SelectMultiple (
@@ -4024,7 +4052,7 @@ def on_button_clicked(b):
40244052
40254053 # Call the function with the selected parameters
40264054 with output :
4027- res = self .callDD (td , vlabels , elabels , local_ns )
4055+ res = self .execute_degree_distribution_query (td , vlabels , elabels , local_ns )
40284056
40294057 # Retrieve the distribution
40304058 pairs = np .array (res ['results' ][0 ]['output' ]['distribution' ])
@@ -4041,7 +4069,7 @@ def on_button_clicked(b):
40414069
40424070 submit_button .on_click (on_button_clicked )
40434071
4044- def callDD (self , td , vlabels , elabels , local_ns ):
4072+ def execute_degree_distribution_query (self , td , vlabels , elabels , local_ns ):
40454073 query_parts = [f'traversalDirection: "{ td } "' ]
40464074
40474075 if vlabels :
@@ -4055,8 +4083,7 @@ def callDD (self, td, vlabels, elabels, local_ns):
40554083 # Construct the query
40564084 line = "CALL neptune.algo.degreeDistribution({" + ", " .join (query_parts ) + "}) YIELD output RETURN output"
40574085
4058- # oc_rebuild_args = (f"{f'--store-to js --silent'}")
4059- oc_rebuild_args = (f"{ f'--store-to js' } " )
4086+ oc_rebuild_args = (f"{ f'--store-to js --silent' } " )
40604087
40614088 self .handle_opencypher_query (oc_rebuild_args , line , local_ns )
40624089
@@ -4068,14 +4095,17 @@ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg,
40684095 min_deg = 0
40694096
40704097 def update_plot (scale_type , bin_type , bin_width , y_max , x_range , show_mindeg , show_maxdeg ):
4071- marker_size = 50
4072- alpha = 0.6
4098+ # Start timing
4099+ start_time = time .time ()
4100+
4101+ alpha = 1
40734102 plt .clf ()
40744103
40754104 # Get zero degree count
40764105 zero_idx = np .where (unique_degrees == 0 )[0 ]
40774106 zero_degree_count = counts [zero_idx [0 ]] if len (zero_idx ) > 0 else 0
40784107
4108+ isolateds_exist = zero_degree_count > 0
40794109 # Get non-zero degrees and counts
40804110 mask = unique_degrees > 0
40814111 filtered_degrees = unique_degrees [mask ]
@@ -4085,8 +4115,8 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
40854115 if len (filtered_degrees ) == 0 :
40864116 min_deg = 0
40874117 else :
4088- min_deg = np .min (filtered_degrees )
4089-
4118+ min_deg = np .min (filtered_degrees )
4119+
40904120 n_bins = 1
40914121 # Create histogram only if there are non-zero degree nodes
40924122 if len (filtered_degrees ) > 0 :
@@ -4111,20 +4141,32 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41114141 label = 'Raw' , color = '#000000' )
41124142
41134143 # Plot zero degree node count separately
4114- if zero_degree_count > 0 :
4115- plt .bar (0 , zero_degree_count , color = 'red' ,
4116- label = 'Isolated' , alpha = alpha , width = 0.2 )
4144+ if isolateds_exist :
4145+ # Use a special x position for zero degree nodes in log scale
4146+ zero_x_pos = 0.1 if scale_type in ['Log-Log' , 'Log(x)-Linear(y)' ] else 0
4147+ plt .bar (zero_x_pos , zero_degree_count , color = 'red' ,
4148+ label = 'Isolated' , alpha = alpha , width = 0.1 if scale_type in ['Log-Log' , 'Log(x)-Linear(y)' ] else 2 )
41174149
41184150 plt .xlim (x_range [0 ], x_range [1 ])
41194151
4152+ if isolateds_exist :
4153+ plt .xlim (x_range [0 ], x_range [1 ])
4154+
41204155 # Set scales based on selection
41214156 if scale_type == 'Log-Log' :
41224157 plt .xscale ('log' )
41234158 plt .yscale ('log' )
4124- plt .xlim (x_range [0 ]+ 1 , x_range [1 ])
4159+ if isolateds_exist :
4160+ plt .xlim (0.05 , x_range [1 ])
4161+ else :
4162+ plt .xlim (x_range [0 ]+ 0.05 , x_range [1 ])
4163+
41254164 elif scale_type == 'Log(x)-Linear(y)' :
41264165 plt .xscale ('log' )
4127- plt .xlim (x_range [0 ]+ 1 , x_range [1 ])
4166+ if isolateds_exist :
4167+ plt .xlim (0.05 , x_range [1 ])
4168+ else :
4169+ plt .xlim (x_range [0 ]+ 0.05 , x_range [1 ])
41284170 elif scale_type == 'Linear(x)-Log(y)' :
41294171 plt .yscale ('log' )
41304172
@@ -4143,13 +4185,21 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41434185 plt .legend ()
41444186
41454187 plt .title (f'Degree Distribution' )
4146-
4188+
4189+ # End timing and display
4190+ end_time = time .time ()
4191+ runtime = end_time - start_time
4192+
41474193 # Update statistics
41484194 with stats_output :
41494195 stats_output .clear_output (wait = True )
41504196 total_nodes = sum (counts )
41514197 total_edges = sum (d * c for d , c in zip (unique_degrees , counts )) // 2
41524198 avg_degree = sum (d * c for d , c in zip (unique_degrees , counts )) / total_nodes
4199+
4200+ print (f"Render time: { runtime :.3f} seconds" )
4201+ print (f"--------------------" )
4202+
41534203 print (f"Number of nodes: { total_nodes } " )
41544204 print (f"Number of edges: { total_edges } " )
41554205 print (f"Number of isolated nodes: { zero_degree_count } " )
@@ -4178,18 +4228,56 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
41784228 description = 'Binning:'
41794229 )
41804230
4231+ # Define a function to update bin_width_widget based on bin_type
4232+ def update_bin_width_widget (change ):
4233+ if change ['new' ] == 'Logarithmic' :
4234+ # For logarithmic binning, use a FloatSlider with smaller values
4235+ bin_width_widget .min = 1.00
4236+ bin_width_widget .max = 10.00
4237+ bin_width_widget .step = 0.01
4238+ bin_width_widget .value = 1.00
4239+ bin_width_widget .readout_format = '.2f'
4240+ bin_width_widget .disabled = False
4241+ elif change ['new' ] == 'Raw' :
4242+ # For raw binning, disable the widget
4243+ bin_width_widget .value = 1
4244+ bin_width_widget .disabled = True
4245+ else :
4246+ # For linear binning, use integer values
4247+ bin_width_widget .min = 1
4248+ bin_width_widget .max = (max_deg + 2 )/ 10
4249+ bin_width_widget .step = 1
4250+ bin_width_widget .value = 1
4251+ bin_width_widget .readout_format = 'd'
4252+ bin_width_widget .disabled = False
4253+
4254+ def update_y_max_widget (change ):
4255+ if bin_widget .value == 'Raw' :
4256+ # For raw data, use the original max count
4257+ y_max_widget .max = max_count * 1.1
4258+ y_max_widget .value = max_count * 1.1
4259+ elif bin_widget .value == 'Linear' :
4260+ y_max_widget .max = max_count * bin_width_widget .value * 0.5
4261+ y_max_widget .value = max_count * bin_width_widget .value * 0.5
4262+ else : # 'Logarithmic'
4263+ y_max_widget .max = max_count * (10 ** bin_width_widget .value ) * 0.5
4264+ y_max_widget .value = max_count * (10 ** bin_width_widget .value ) * 0.5
4265+
41814266 # Bin width widget, integer options in [1, 1+(max_deg/2)] interval
4182- # TODO: if logarithmic binning, a much smaller range makes more sense
4183- bin_width_widget = widgets .IntSlider (
4267+ bin_width_widget = widgets .FloatSlider (
41844268 value = 1 ,
41854269 min = 1 ,
4186- max = (max_deg + 2 )/ 2 ,
4270+ max = (max_deg + 2 )/ 10 ,
41874271 step = 1 ,
41884272 description = 'Bin width:' ,
41894273 tooltip = ('For linear binning: actual width\n '
41904274 'For log binning: multiplicative factor' )
41914275 )
41924276
4277+ # Observe changes to bin_width_widget and bin_widget
4278+ bin_width_widget .observe (update_y_max_widget , names = 'value' )
4279+ bin_widget .observe (update_y_max_widget , names = 'value' )
4280+
41934281 # Upper limit for y-axis range, enables zooming (lower limit is always zero)
41944282 y_max_widget = widgets .IntSlider (
41954283 value = max_count * 1.1 ,
@@ -4203,7 +4291,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
42034291 x_range_widget = widgets .FloatRangeSlider (
42044292 min = 0 ,
42054293 max = max_deg * 1.1 + 5 ,
4206- value = [min , max ],
4294+ value = [0 , max_deg * 1.1 + 5 ],
42074295 step = 1 ,
42084296 description = 'x-axis range:' ,
42094297 disabled = False ,
0 commit comments