|
16 | 16 | import ast |
17 | 17 | import re |
18 | 18 |
|
| 19 | +import numpy as np |
| 20 | +import matplotlib.pyplot as plt |
| 21 | + |
19 | 22 | from ipyfilechooser import FileChooser |
20 | 23 | from enum import Enum |
21 | 24 | from copy import copy |
|
53 | 56 | STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \ |
54 | 57 | SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \ |
55 | 58 | OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \ |
56 | | - normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \ |
| 59 | + normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \ |
57 | 60 | GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \ |
58 | 61 | GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name) |
59 | 62 | from graph_notebook.network import SPARQLNetwork |
@@ -3926,39 +3929,305 @@ def handle_opencypher_status(self, line, local_ns): |
3926 | 3929 |
|
3927 | 3930 |
|
3928 | 3931 | # degreeDistribution |
3929 | | - # No error handling, see the calls in other magics for approrpriate error handling |
3930 | | - |
| 3932 | + # Shows the degree distribution of vertices in the graph |
| 3933 | + # TODO: Error handling |
| 3934 | + |
3931 | 3935 | @line_magic |
3932 | 3936 | @needs_local_scope |
3933 | 3937 | @display_exceptions |
3934 | 3938 | @neptune_graph_only |
3935 | 3939 | def degreeDistribution(self, line, local_ns: dict = None): |
3936 | 3940 | parser = argparse.ArgumentParser() |
3937 | 3941 |
|
3938 | | - # get the vertexLabels and edgeLabels |
3939 | | - summary_res = self.client.statistics("propertygraph", True, "detailed", True) |
3940 | | - summary_res.raise_for_status() # checks for HTTP error |
3941 | | - summary_res_json = summary_res.json() # gets the json object |
3942 | | - vertexLabels = summary_res_json['nodeLabels'] |
3943 | | - edgeLabels = summary_res_json['edgeLabels'] |
3944 | | - |
3945 | | - print(vertexLabels) |
3946 | | - print("-----------") |
3947 | | - print(edgeLabels) |
3948 | | - print("-----------") |
| 3942 | + # Get the vertexLabels and edgeLabels from graph summary |
| 3943 | + try: |
| 3944 | + summary_res = self.client.statistics("propertygraph", True, "detailed", True) |
| 3945 | + summary_res.raise_for_status() |
| 3946 | + summary_res_json = summary_res.json() |
| 3947 | + available_vertex_labels = summary_res_json['graphSummary']['nodeLabels'] |
| 3948 | + available_edge_labels = summary_res_json['graphSummary']['edgeLabels'] |
| 3949 | + except Exception as e: |
| 3950 | + print(f"Error retrieving graph summary: {e}") |
| 3951 | + return |
3949 | 3952 |
|
3950 | | - # traversalDirection |
3951 | | - parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default="both", |
| 3953 | + # traversalDirection parameter |
| 3954 | + parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default='both', |
3952 | 3955 | help=f'Type of the degree for which the distribution is shown. Valid inputs: {TRAVERSAL_DIRECTIONS}. ' |
3953 | 3956 | f'Default: both.', |
3954 | 3957 | choices=TRAVERSAL_DIRECTIONS) |
3955 | 3958 |
|
3956 | | - # vertexLabels |
3957 | | - parser.add_argument('--vertexLabels', default=[], |
3958 | | - help="The vertex labels for which the induced graph is considered and the degree distribution is shown. If not supplied, " |
3959 | | - "we will default to using all the vertex labels.") |
| 3959 | + # vertexLabels parameter |
| 3960 | + parser.add_argument('--vertexLabels', nargs='*', default=[], |
| 3961 | + help="The vertex labels for which the induced graph is considered and the degree distribution is shown. " |
| 3962 | + "If not supplied, we will default to using all the vertex labels.") |
3960 | 3963 |
|
3961 | | - # edgeLabels |
3962 | | - parser.add_argument('--edgeLabels', default=[], |
| 3964 | + # edgeLabels parameter |
| 3965 | + parser.add_argument('--edgeLabels', nargs='*', default=[], |
3963 | 3966 | help="The edge labels for which the degree distribution is shown. If not supplied, " |
3964 | 3967 | "we will default to using all the edge labels.") |
| 3968 | + |
| 3969 | + |
| 3970 | + # # Additional parameters for output control |
| 3971 | + # parser.add_argument('--export-to', type=str, default='', |
| 3972 | + # help='Export the degree distribution results to the provided file path.') |
| 3973 | + |
| 3974 | + args = parser.parse_args(line.split()) |
| 3975 | + |
| 3976 | + # put the command line specified option as the value, if any; o.w. 'both' |
| 3977 | + td_val = args.traversalDirection |
| 3978 | + td_val = td_val.lower() if td_val else 'both' |
| 3979 | + |
| 3980 | + td_dropdown = widgets.Dropdown( |
| 3981 | + options=TRAVERSAL_DIRECTIONS, |
| 3982 | + description='Traversal direction:', |
| 3983 | + disabled=False, |
| 3984 | + style=SEED_WIDGET_STYLE, |
| 3985 | + value = td_val |
| 3986 | + ) |
| 3987 | + |
| 3988 | + selected_vlabels = args.vertexLabels if args.vertexLabels else [] |
| 3989 | + vertex_labels_select = widgets.SelectMultiple( |
| 3990 | + options=available_vertex_labels, |
| 3991 | + description='Vertex labels:', |
| 3992 | + disabled=False, |
| 3993 | + style=SEED_WIDGET_STYLE, |
| 3994 | + value = selected_vlabels |
| 3995 | + ) |
| 3996 | + |
| 3997 | + selected_elabels = args.edgeLabels if args.edgeLabels else [] |
| 3998 | + edge_labels_select = widgets.SelectMultiple( |
| 3999 | + options=available_edge_labels, |
| 4000 | + description='Edge labels:', |
| 4001 | + disabled=False, |
| 4002 | + style=SEED_WIDGET_STYLE, |
| 4003 | + value = selected_elabels |
| 4004 | + ) |
| 4005 | + |
| 4006 | + submit_button = widgets.Button(description="Submit") |
| 4007 | + output = widgets.Output() |
| 4008 | + |
| 4009 | + # Display widgets |
| 4010 | + display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, output) |
| 4011 | + |
| 4012 | + def on_button_clicked(b): |
| 4013 | + # Get selected traversal direction |
| 4014 | + td = td_dropdown.value |
| 4015 | + vlabels = list(vertex_labels_select.value) |
| 4016 | + elabels = list(edge_labels_select.value) |
| 4017 | + |
| 4018 | + # Clear the output widget before displaying new content |
| 4019 | + output.clear_output(wait=True) |
| 4020 | + |
| 4021 | + # Call the function with the selected parameters |
| 4022 | + with output: |
| 4023 | + res = self.callDD(td, vlabels, elabels, local_ns) |
| 4024 | + |
| 4025 | + pairs = np.array(res['results'][0]['output']['distribution']) |
| 4026 | + keys = pairs[:,0] |
| 4027 | + values = pairs[:,1] |
| 4028 | + |
| 4029 | + max_deg = res['results'][0]['output']['statistics']['maxDeg'] |
| 4030 | + median_deg = res['results'][0]['output']['statistics']['p50'] |
| 4031 | + mean_deg = res['results'][0]['output']['statistics']['mean'] |
| 4032 | + |
| 4033 | + # Create the interactive visualization |
| 4034 | + self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg) |
| 4035 | + |
| 4036 | + submit_button.on_click(on_button_clicked) |
| 4037 | + |
| 4038 | + def callDD (self, td, vlabels, elabels, local_ns): |
| 4039 | + query_parts = [f'traversalDirection: "{td}"'] |
| 4040 | + |
| 4041 | + if vlabels: |
| 4042 | + vertex_str = ", ".join([f'"{v}"' for v in vlabels]) |
| 4043 | + query_parts.append(f'vertexLabels: [{vertex_str}]') |
| 4044 | + |
| 4045 | + if elabels: |
| 4046 | + edge_str = ", ".join([f'"{e}"' for e in elabels]) |
| 4047 | + query_parts.append(f'edgeLabels: [{edge_str}]') |
| 4048 | + |
| 4049 | + line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output" |
| 4050 | + |
| 4051 | + # oc_rebuild_args = (f"{f'--store-to js --silent'}") |
| 4052 | + oc_rebuild_args = (f"{f'--store-to js'}") |
| 4053 | + |
| 4054 | + self.handle_opencypher_query(oc_rebuild_args, line, local_ns) |
| 4055 | + |
| 4056 | + return local_ns['js'] |
| 4057 | + |
| 4058 | + |
| 4059 | + def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg): |
| 4060 | + |
| 4061 | + min_deg = 0 |
| 4062 | + |
| 4063 | + def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg): |
| 4064 | + marker_size = 50 |
| 4065 | + alpha = 0.6 |
| 4066 | + plt.clf() |
| 4067 | + |
| 4068 | + # Use the provided unique_degrees and counts |
| 4069 | + # Get zero degree count |
| 4070 | + zero_idx = np.where(unique_degrees == 0)[0] |
| 4071 | + zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0 |
| 4072 | + |
| 4073 | + mask = unique_degrees > 0 |
| 4074 | + filtered_degrees = unique_degrees[mask] |
| 4075 | + filtered_counts = counts[mask] |
| 4076 | + |
| 4077 | + # Handle case when all nodes have zero degree |
| 4078 | + if len(filtered_degrees) == 0: |
| 4079 | + min_deg = 0 |
| 4080 | + else: |
| 4081 | + min_deg = np.min(filtered_degrees) |
| 4082 | + |
| 4083 | + n_bins = 1 |
| 4084 | + if len(filtered_degrees) > 0: # Only create histogram if there are non-zero degree nodes |
| 4085 | + if bin_type != 'Raw': |
| 4086 | + if bin_type == 'Linear': |
| 4087 | + n_bins = max(1, int((max_deg - min_deg) / bin_width)) |
| 4088 | + bins = np.linspace(min_deg, max_deg, n_bins + 1) |
| 4089 | + else: # Logarithmic |
| 4090 | + min_deg_log = np.log10(min_deg) if min_deg > 0 else 0 |
| 4091 | + max_deg_log = np.log10(max_deg) if max_deg > 0 else 1 |
| 4092 | + n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01))) |
| 4093 | + bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1) |
| 4094 | + |
| 4095 | + all_degrees = np.repeat(filtered_degrees, filtered_counts) |
| 4096 | + |
| 4097 | + plt.hist(all_degrees, bins=bins, density=False, alpha=alpha, |
| 4098 | + histtype='bar', color='#000080') |
| 4099 | + else: |
| 4100 | + # For raw data, create bars at each unique degree |
| 4101 | + plt.bar(filtered_degrees, filtered_counts, alpha=alpha, |
| 4102 | + label='Raw', color='#000080') |
| 4103 | + |
| 4104 | + # Plot degree 0 separately |
| 4105 | + if zero_degree_count > 0: |
| 4106 | + plt.bar(0, zero_degree_count, color='red', |
| 4107 | + label='Isolated', alpha=alpha, width=0.2) |
| 4108 | + |
| 4109 | + plt.xlim(x_range[0], x_range[1]) |
| 4110 | + |
| 4111 | + # Set scales based on selection |
| 4112 | + if scale_type == 'Log-Log': |
| 4113 | + plt.xscale('log') |
| 4114 | + plt.yscale('log') |
| 4115 | + plt.xlim(x_range[0]+1, x_range[1]) |
| 4116 | + elif scale_type == 'Log(x)-Linear(y)': |
| 4117 | + plt.xscale('log') |
| 4118 | + plt.xlim(x_range[0]+1, x_range[1]) |
| 4119 | + elif scale_type == 'Linear(x)-Log(y)': |
| 4120 | + plt.yscale('log') |
| 4121 | + |
| 4122 | + plt.gca().set_ylim(top=y_max) |
| 4123 | + |
| 4124 | + # Add vertical dashed lines for min and max degree if enabled |
| 4125 | + if show_mindeg and min_deg > 0: |
| 4126 | + plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}') |
| 4127 | + |
| 4128 | + if show_maxdeg: |
| 4129 | + plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}') |
| 4130 | + |
| 4131 | + plt.grid(True, which="both", ls="-", alpha=0.2) |
| 4132 | + plt.xlabel('Degree') |
| 4133 | + plt.ylabel('Number of nodes') |
| 4134 | + plt.legend() |
| 4135 | + |
| 4136 | + plt.title(f'Degree Distribution') |
| 4137 | + |
| 4138 | + # Update statistics |
| 4139 | + with stats_output: |
| 4140 | + stats_output.clear_output(wait=True) |
| 4141 | + total_nodes = sum(counts) |
| 4142 | + total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2 |
| 4143 | + avg_degree = sum(d * c for d, c in zip(unique_degrees, counts)) / total_nodes |
| 4144 | + print(f"Number of nodes: {total_nodes}") |
| 4145 | + print(f"Number of edges: {total_edges}") |
| 4146 | + print(f"Number of isolated nodes: {zero_degree_count}") |
| 4147 | + print(f"Average degree: {mean_deg:.2f}") |
| 4148 | + print(f"Median degree: {median_deg:.2f}") |
| 4149 | + print(f"Max degree: {max_deg}") |
| 4150 | + if min_deg > 0: |
| 4151 | + print(f"Min non-zero degree: {min_deg}") |
| 4152 | + if bin_type != 'Raw': |
| 4153 | + print(f"Number of bins: {n_bins}") |
| 4154 | + |
| 4155 | + |
| 4156 | + max_count = np.max(counts) |
| 4157 | + |
| 4158 | + # Create widgets (same as before) |
| 4159 | + scale_widget = widgets.Dropdown( |
| 4160 | + options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'], |
| 4161 | + value='Linear-Linear', |
| 4162 | + description='Scale:' |
| 4163 | + ) |
| 4164 | + |
| 4165 | + bin_widget = widgets.Dropdown( |
| 4166 | + options=['Raw', 'Linear', 'Logarithmic'], |
| 4167 | + value='Linear', |
| 4168 | + description='Binning:' |
| 4169 | + ) |
| 4170 | + |
| 4171 | + bin_width_widget = widgets.IntSlider( |
| 4172 | + value=1, |
| 4173 | + min=1, |
| 4174 | + max=(max_deg+2)/2, |
| 4175 | + step=1, |
| 4176 | + description='Bin width:', |
| 4177 | + tooltip=('For linear binning: actual width\n' |
| 4178 | + 'For log binning: multiplicative factor') |
| 4179 | + ) |
| 4180 | + |
| 4181 | + y_max_widget = widgets.IntSlider( |
| 4182 | + value=max_count, |
| 4183 | + min=1, |
| 4184 | + max=max_count * 1.1, |
| 4185 | + step=1, |
| 4186 | + description='y-max:', |
| 4187 | + ) |
| 4188 | + |
| 4189 | + # Add x-axis range slider |
| 4190 | + x_range_widget = widgets.FloatRangeSlider( |
| 4191 | + value=[min_deg, (max_deg * 1.1) + 5], |
| 4192 | + min=0, |
| 4193 | + max=max_deg * 1.1 + 5, |
| 4194 | + step=1, |
| 4195 | + description='x-axis range:', |
| 4196 | + disabled=False, |
| 4197 | + continuous_update=True, |
| 4198 | + readout=True, |
| 4199 | + readout_format='.0f', |
| 4200 | + ) |
| 4201 | + # Create output widget for statistics |
| 4202 | + stats_output = widgets.Output() |
| 4203 | + |
| 4204 | + # Toggle switches for min/max degree lines |
| 4205 | + show_mindeg_widget = widgets.Checkbox( |
| 4206 | + value=True, |
| 4207 | + description='Show Min Degree Line', |
| 4208 | + disabled=False |
| 4209 | + ) |
| 4210 | + |
| 4211 | + show_maxdeg_widget = widgets.Checkbox( |
| 4212 | + value=True, |
| 4213 | + description='Show Max Degree Line', |
| 4214 | + disabled=False |
| 4215 | + ) |
| 4216 | + |
| 4217 | + # Create the interactive plot |
| 4218 | + interactive_plot = widgets.interactive( |
| 4219 | + update_plot, |
| 4220 | + scale_type=scale_widget, |
| 4221 | + bin_type=bin_widget, |
| 4222 | + bin_width=bin_width_widget, |
| 4223 | + y_max=y_max_widget, |
| 4224 | + x_range=x_range_widget, |
| 4225 | + show_mindeg=show_mindeg_widget, |
| 4226 | + show_maxdeg=show_maxdeg_widget |
| 4227 | + ) |
| 4228 | + |
| 4229 | + # Create a vertical box layout |
| 4230 | + vbox = widgets.VBox([interactive_plot, stats_output]) |
| 4231 | + |
| 4232 | + # Display the interactive plot and stats |
| 4233 | + display(vbox) |
0 commit comments