Skip to content

Commit da25051

Browse files
author
Erdem Sariyuce
committed
good working version
1 parent 34aa478 commit da25051

File tree

2 files changed

+292
-23
lines changed

2 files changed

+292
-23
lines changed

src/graph_notebook/magics/graph_magic.py

Lines changed: 291 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
import ast
1717
import re
1818

19+
import numpy as np
20+
import matplotlib.pyplot as plt
21+
1922
from ipyfilechooser import FileChooser
2023
from enum import Enum
2124
from copy import copy
@@ -53,7 +56,7 @@
5356
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
5457
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
5558
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
56-
normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
59+
normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \
5760
GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \
5861
GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name)
5962
from graph_notebook.network import SPARQLNetwork
@@ -3926,39 +3929,305 @@ def handle_opencypher_status(self, line, local_ns):
39263929

39273930

39283931
# degreeDistribution
3929-
# No error handling, see the calls in other magics for approrpriate error handling
3930-
3932+
# Shows the degree distribution of vertices in the graph
3933+
# TODO: Error handling
3934+
39313935
@line_magic
39323936
@needs_local_scope
39333937
@display_exceptions
39343938
@neptune_graph_only
39353939
def degreeDistribution(self, line, local_ns: dict = None):
39363940
parser = argparse.ArgumentParser()
39373941

3938-
# get the vertexLabels and edgeLabels
3939-
summary_res = self.client.statistics("propertygraph", True, "detailed", True)
3940-
summary_res.raise_for_status() # checks for HTTP error
3941-
summary_res_json = summary_res.json() # gets the json object
3942-
vertexLabels = summary_res_json['nodeLabels']
3943-
edgeLabels = summary_res_json['edgeLabels']
3944-
3945-
print(vertexLabels)
3946-
print("-----------")
3947-
print(edgeLabels)
3948-
print("-----------")
3942+
# Get the vertexLabels and edgeLabels from graph summary
3943+
try:
3944+
summary_res = self.client.statistics("propertygraph", True, "detailed", True)
3945+
summary_res.raise_for_status()
3946+
summary_res_json = summary_res.json()
3947+
available_vertex_labels = summary_res_json['graphSummary']['nodeLabels']
3948+
available_edge_labels = summary_res_json['graphSummary']['edgeLabels']
3949+
except Exception as e:
3950+
print(f"Error retrieving graph summary: {e}")
3951+
return
39493952

3950-
# traversalDirection
3951-
parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default="both",
3953+
# traversalDirection parameter
3954+
parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default='both',
39523955
help=f'Type of the degree for which the distribution is shown. Valid inputs: {TRAVERSAL_DIRECTIONS}. '
39533956
f'Default: both.',
39543957
choices=TRAVERSAL_DIRECTIONS)
39553958

3956-
# vertexLabels
3957-
parser.add_argument('--vertexLabels', default=[],
3958-
help="The vertex labels for which the induced graph is considered and the degree distribution is shown. If not supplied, "
3959-
"we will default to using all the vertex labels.")
3959+
# vertexLabels parameter
3960+
parser.add_argument('--vertexLabels', nargs='*', default=[],
3961+
help="The vertex labels for which the induced graph is considered and the degree distribution is shown. "
3962+
"If not supplied, we will default to using all the vertex labels.")
39603963

3961-
# edgeLabels
3962-
parser.add_argument('--edgeLabels', default=[],
3964+
# edgeLabels parameter
3965+
parser.add_argument('--edgeLabels', nargs='*', default=[],
39633966
help="The edge labels for which the degree distribution is shown. If not supplied, "
39643967
"we will default to using all the edge labels.")
3968+
3969+
3970+
# # Additional parameters for output control
3971+
# parser.add_argument('--export-to', type=str, default='',
3972+
# help='Export the degree distribution results to the provided file path.')
3973+
3974+
args = parser.parse_args(line.split())
3975+
3976+
# put the command line specified option as the value, if any; o.w. 'both'
3977+
td_val = args.traversalDirection
3978+
td_val = td_val.lower() if td_val else 'both'
3979+
3980+
td_dropdown = widgets.Dropdown(
3981+
options=TRAVERSAL_DIRECTIONS,
3982+
description='Traversal direction:',
3983+
disabled=False,
3984+
style=SEED_WIDGET_STYLE,
3985+
value = td_val
3986+
)
3987+
3988+
selected_vlabels = args.vertexLabels if args.vertexLabels else []
3989+
vertex_labels_select = widgets.SelectMultiple(
3990+
options=available_vertex_labels,
3991+
description='Vertex labels:',
3992+
disabled=False,
3993+
style=SEED_WIDGET_STYLE,
3994+
value = selected_vlabels
3995+
)
3996+
3997+
selected_elabels = args.edgeLabels if args.edgeLabels else []
3998+
edge_labels_select = widgets.SelectMultiple(
3999+
options=available_edge_labels,
4000+
description='Edge labels:',
4001+
disabled=False,
4002+
style=SEED_WIDGET_STYLE,
4003+
value = selected_elabels
4004+
)
4005+
4006+
submit_button = widgets.Button(description="Submit")
4007+
output = widgets.Output()
4008+
4009+
# Display widgets
4010+
display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, output)
4011+
4012+
def on_button_clicked(b):
4013+
# Get selected traversal direction
4014+
td = td_dropdown.value
4015+
vlabels = list(vertex_labels_select.value)
4016+
elabels = list(edge_labels_select.value)
4017+
4018+
# Clear the output widget before displaying new content
4019+
output.clear_output(wait=True)
4020+
4021+
# Call the function with the selected parameters
4022+
with output:
4023+
res = self.callDD(td, vlabels, elabels, local_ns)
4024+
4025+
pairs = np.array(res['results'][0]['output']['distribution'])
4026+
keys = pairs[:,0]
4027+
values = pairs[:,1]
4028+
4029+
max_deg = res['results'][0]['output']['statistics']['maxDeg']
4030+
median_deg = res['results'][0]['output']['statistics']['p50']
4031+
mean_deg = res['results'][0]['output']['statistics']['mean']
4032+
4033+
# Create the interactive visualization
4034+
self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
4035+
4036+
submit_button.on_click(on_button_clicked)
4037+
4038+
def callDD (self, td, vlabels, elabels, local_ns):
4039+
query_parts = [f'traversalDirection: "{td}"']
4040+
4041+
if vlabels:
4042+
vertex_str = ", ".join([f'"{v}"' for v in vlabels])
4043+
query_parts.append(f'vertexLabels: [{vertex_str}]')
4044+
4045+
if elabels:
4046+
edge_str = ", ".join([f'"{e}"' for e in elabels])
4047+
query_parts.append(f'edgeLabels: [{edge_str}]')
4048+
4049+
line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
4050+
4051+
# oc_rebuild_args = (f"{f'--store-to js --silent'}")
4052+
oc_rebuild_args = (f"{f'--store-to js'}")
4053+
4054+
self.handle_opencypher_query(oc_rebuild_args, line, local_ns)
4055+
4056+
return local_ns['js']
4057+
4058+
4059+
def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg):
4060+
4061+
min_deg = 0
4062+
4063+
def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
4064+
marker_size = 50
4065+
alpha = 0.6
4066+
plt.clf()
4067+
4068+
# Use the provided unique_degrees and counts
4069+
# Get zero degree count
4070+
zero_idx = np.where(unique_degrees == 0)[0]
4071+
zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
4072+
4073+
mask = unique_degrees > 0
4074+
filtered_degrees = unique_degrees[mask]
4075+
filtered_counts = counts[mask]
4076+
4077+
# Handle case when all nodes have zero degree
4078+
if len(filtered_degrees) == 0:
4079+
min_deg = 0
4080+
else:
4081+
min_deg = np.min(filtered_degrees)
4082+
4083+
n_bins = 1
4084+
if len(filtered_degrees) > 0: # Only create histogram if there are non-zero degree nodes
4085+
if bin_type != 'Raw':
4086+
if bin_type == 'Linear':
4087+
n_bins = max(1, int((max_deg - min_deg) / bin_width))
4088+
bins = np.linspace(min_deg, max_deg, n_bins + 1)
4089+
else: # Logarithmic
4090+
min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
4091+
max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
4092+
n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01)))
4093+
bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1)
4094+
4095+
all_degrees = np.repeat(filtered_degrees, filtered_counts)
4096+
4097+
plt.hist(all_degrees, bins=bins, density=False, alpha=alpha,
4098+
histtype='bar', color='#000080')
4099+
else:
4100+
# For raw data, create bars at each unique degree
4101+
plt.bar(filtered_degrees, filtered_counts, alpha=alpha,
4102+
label='Raw', color='#000080')
4103+
4104+
# Plot degree 0 separately
4105+
if zero_degree_count > 0:
4106+
plt.bar(0, zero_degree_count, color='red',
4107+
label='Isolated', alpha=alpha, width=0.2)
4108+
4109+
plt.xlim(x_range[0], x_range[1])
4110+
4111+
# Set scales based on selection
4112+
if scale_type == 'Log-Log':
4113+
plt.xscale('log')
4114+
plt.yscale('log')
4115+
plt.xlim(x_range[0]+1, x_range[1])
4116+
elif scale_type == 'Log(x)-Linear(y)':
4117+
plt.xscale('log')
4118+
plt.xlim(x_range[0]+1, x_range[1])
4119+
elif scale_type == 'Linear(x)-Log(y)':
4120+
plt.yscale('log')
4121+
4122+
plt.gca().set_ylim(top=y_max)
4123+
4124+
# Add vertical dashed lines for min and max degree if enabled
4125+
if show_mindeg and min_deg > 0:
4126+
plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}')
4127+
4128+
if show_maxdeg:
4129+
plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}')
4130+
4131+
plt.grid(True, which="both", ls="-", alpha=0.2)
4132+
plt.xlabel('Degree')
4133+
plt.ylabel('Number of nodes')
4134+
plt.legend()
4135+
4136+
plt.title(f'Degree Distribution')
4137+
4138+
# Update statistics
4139+
with stats_output:
4140+
stats_output.clear_output(wait=True)
4141+
total_nodes = sum(counts)
4142+
total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
4143+
avg_degree = sum(d * c for d, c in zip(unique_degrees, counts)) / total_nodes
4144+
print(f"Number of nodes: {total_nodes}")
4145+
print(f"Number of edges: {total_edges}")
4146+
print(f"Number of isolated nodes: {zero_degree_count}")
4147+
print(f"Average degree: {mean_deg:.2f}")
4148+
print(f"Median degree: {median_deg:.2f}")
4149+
print(f"Max degree: {max_deg}")
4150+
if min_deg > 0:
4151+
print(f"Min non-zero degree: {min_deg}")
4152+
if bin_type != 'Raw':
4153+
print(f"Number of bins: {n_bins}")
4154+
4155+
4156+
max_count = np.max(counts)
4157+
4158+
# Create widgets (same as before)
4159+
scale_widget = widgets.Dropdown(
4160+
options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'],
4161+
value='Linear-Linear',
4162+
description='Scale:'
4163+
)
4164+
4165+
bin_widget = widgets.Dropdown(
4166+
options=['Raw', 'Linear', 'Logarithmic'],
4167+
value='Linear',
4168+
description='Binning:'
4169+
)
4170+
4171+
bin_width_widget = widgets.IntSlider(
4172+
value=1,
4173+
min=1,
4174+
max=(max_deg+2)/2,
4175+
step=1,
4176+
description='Bin width:',
4177+
tooltip=('For linear binning: actual width\n'
4178+
'For log binning: multiplicative factor')
4179+
)
4180+
4181+
y_max_widget = widgets.IntSlider(
4182+
value=max_count,
4183+
min=1,
4184+
max=max_count * 1.1,
4185+
step=1,
4186+
description='y-max:',
4187+
)
4188+
4189+
# Add x-axis range slider
4190+
x_range_widget = widgets.FloatRangeSlider(
4191+
value=[min_deg, (max_deg * 1.1) + 5],
4192+
min=0,
4193+
max=max_deg * 1.1 + 5,
4194+
step=1,
4195+
description='x-axis range:',
4196+
disabled=False,
4197+
continuous_update=True,
4198+
readout=True,
4199+
readout_format='.0f',
4200+
)
4201+
# Create output widget for statistics
4202+
stats_output = widgets.Output()
4203+
4204+
# Toggle switches for min/max degree lines
4205+
show_mindeg_widget = widgets.Checkbox(
4206+
value=True,
4207+
description='Show Min Degree Line',
4208+
disabled=False
4209+
)
4210+
4211+
show_maxdeg_widget = widgets.Checkbox(
4212+
value=True,
4213+
description='Show Max Degree Line',
4214+
disabled=False
4215+
)
4216+
4217+
# Create the interactive plot
4218+
interactive_plot = widgets.interactive(
4219+
update_plot,
4220+
scale_type=scale_widget,
4221+
bin_type=bin_widget,
4222+
bin_width=bin_width_widget,
4223+
y_max=y_max_widget,
4224+
x_range=x_range_widget,
4225+
show_mindeg=show_mindeg_widget,
4226+
show_maxdeg=show_maxdeg_widget
4227+
)
4228+
4229+
# Create a vertical box layout
4230+
vbox = widgets.VBox([interactive_plot, stats_output])
4231+
4232+
# Display the interactive plot and stats
4233+
display(vbox)

src/graph_notebook/neptune/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@
174174

175175
GRAPH_PG_INFO_METRICS = {'numVertices', 'numEdges', 'numVertexProperties', 'numEdgeProperties'}
176176

177-
TRAVERSAL_DIRECTIONS = {'both', 'inbound', 'outbound'}
177+
TRAVERSAL_DIRECTIONS = ['both', 'inbound', 'outbound']
178178

179179
def is_allowed_neptune_host(hostname: str, host_allowlist: list):
180180
for host_snippet in host_allowlist:

0 commit comments

Comments
 (0)