|
640 | 640 | { |
641 | 641 | "cell_type": "code", |
642 | 642 | "execution_count": null, |
643 | | - "id": "ed900c59", |
| 643 | + "id": "d7b587c9", |
644 | 644 | "metadata": {}, |
645 | 645 | "outputs": [], |
646 | 646 | "source": [ |
647 | | - "def plot_clustering_coefficient_distribution(clustering_coefficients: pd.Series, title_prefix: str) -> None:\n", |
| 647 | + "def plot_feature_distribution(feature_values: pd.Series, feature_name: str, title_prefix: str) -> None:\n", |
648 | 648 | " \"\"\"\n", |
649 | | - " Plots the distribution of clustering coefficients.\n", |
| 649 | + " Plots the distribution of feature's values.\n", |
650 | 650 | " \n", |
651 | 651 | " Parameters\n", |
652 | 652 | " ----------\n", |
653 | | - " clustering_coefficients : pd.Series\n", |
654 | | - " Series containing clustering coefficient values.\n", |
| 653 | + " feature_values : pd.Series\n", |
| 654 | + " Series containing feature values.\n", |
655 | 655 | " text_prefix: str\n", |
656 | 656 | " Text at the beginning of the title\n", |
657 | 657 | " \"\"\"\n", |
658 | | - " if clustering_coefficients.empty:\n", |
| 658 | + " if feature_values.empty:\n", |
659 | 659 | " print(\"No data available to plot.\")\n", |
660 | 660 | " return\n", |
661 | 661 | "\n", |
662 | 662 | " plot.figure(figsize=(10, 6))\n", |
663 | 663 | " plot.figure(figsize=(10, 6))\n", |
664 | | - " plot.hist(clustering_coefficients, bins=40, color='blue', alpha=0.7, edgecolor='black')\n", |
665 | | - " plot.title(f\"{title_prefix} Distribution of Clustering Coefficients\", pad=20)\n", |
666 | | - " plot.xlabel('Clustering Coefficient')\n", |
| 664 | + " plot.hist(feature_values, bins=40, color='blue', alpha=0.7, edgecolor='black')\n", |
| 665 | + " plot.title(f\"{title_prefix} Distribution of the feature '{feature_name}'\", pad=20)\n", |
| 666 | + " plot.xlabel(feature_name)\n", |
667 | 667 | " plot.ylabel('Frequency')\n", |
668 | | - " plot.xlim(left=clustering_coefficients.min(), right=clustering_coefficients.max())\n", |
| 668 | + " plot.xlim(left=feature_values.min(), right=feature_values.max())\n", |
669 | 669 | " # plot.yscale('log') # Use logarithmic scale for better visibility of differences\n", |
670 | 670 | " plot.grid(True)\n", |
671 | 671 | " plot.tight_layout()\n", |
672 | 672 | "\n", |
673 | | - " mean = clustering_coefficients.mean()\n", |
674 | | - " standard_deviation = clustering_coefficients.std()\n", |
| 673 | + " mean = feature_values.mean()\n", |
| 674 | + " standard_deviation = feature_values.std()\n", |
675 | 675 | "\n", |
676 | 676 | " # Vertical line for the mean\n", |
677 | 677 | " plot_standard_deviation_lines('red', mean, standard_deviation, standard_deviation_factor=0)\n", |
678 | 678 | " # Vertical line for 1 x standard deviations + mean (=z-score of 1)\n", |
679 | | - " plot_standard_deviation_lines('green', mean, standard_deviation, standard_deviation_factor=1)\n", |
| 679 | + " plot_standard_deviation_lines('orange', mean, standard_deviation, standard_deviation_factor=1)\n", |
| 680 | + " # Vertical line for 2 x standard deviations + mean (=z-score of 2)\n", |
| 681 | + " plot_standard_deviation_lines('green', mean, standard_deviation, standard_deviation_factor=2)\n", |
680 | 682 | "\n", |
681 | 683 | " plot.show()" |
682 | 684 | ] |
683 | 685 | }, |
| 686 | + { |
| 687 | + "cell_type": "code", |
| 688 | + "execution_count": null, |
| 689 | + "id": "ed900c59", |
| 690 | + "metadata": {}, |
| 691 | + "outputs": [], |
| 692 | + "source": [ |
| 693 | + "def plot_clustering_coefficient_distribution(clustering_coefficients: pd.Series, title_prefix: str) -> None:\n", |
| 694 | + " \"\"\"\n", |
| 695 | + " Plots the distribution of clustering coefficients.\n", |
| 696 | + " \n", |
| 697 | + " Parameters\n", |
| 698 | + " ----------\n", |
| 699 | + " clustering_coefficients : pd.Series\n", |
| 700 | + " Series containing clustering coefficient values.\n", |
| 701 | + " text_prefix: str\n", |
| 702 | + " Text at the beginning of the title\n", |
| 703 | + " \"\"\"\n", |
| 704 | + " plot_feature_distribution(clustering_coefficients, 'Clustering Coefficient', title_prefix)" |
| 705 | + ] |
| 706 | + }, |
684 | 707 | { |
685 | 708 | "cell_type": "code", |
686 | 709 | "execution_count": null, |
687 | 710 | "id": "92aff8d9", |
688 | 711 | "metadata": {}, |
689 | 712 | "outputs": [], |
690 | 713 | "source": [ |
691 | | - "plot_clustering_coefficient_distribution(java_package_features['clusteringCoefficient'], title_prefix=\"Java Package\")" |
| 714 | + "plot_feature_distribution(java_package_features['clusteringCoefficient'], 'Clustering Coefficient', title_prefix=\"Java Package\")" |
692 | 715 | ] |
693 | 716 | }, |
694 | 717 | { |
|
798 | 821 | ")" |
799 | 822 | ] |
800 | 823 | }, |
| 824 | + { |
| 825 | + "cell_type": "markdown", |
| 826 | + "id": "38cad9cb", |
| 827 | + "metadata": {}, |
| 828 | + "source": [ |
| 829 | + "### 1.2b Betweenness Distribution" |
| 830 | + ] |
| 831 | + }, |
| 832 | + { |
| 833 | + "cell_type": "code", |
| 834 | + "execution_count": null, |
| 835 | + "id": "01c0ea0d", |
| 836 | + "metadata": {}, |
| 837 | + "outputs": [], |
| 838 | + "source": [ |
| 839 | + "plot_feature_distribution(java_package_features['betweenness'], 'Betweenness', title_prefix=\"Java Package\")" |
| 840 | + ] |
| 841 | + }, |
801 | 842 | { |
802 | 843 | "cell_type": "markdown", |
803 | 844 | "id": "630f5e4b", |
|
1402 | 1443 | "metadata": {}, |
1403 | 1444 | "outputs": [], |
1404 | 1445 | "source": [ |
1405 | | - "plot_clustering_coefficient_distribution(java_type_features['clusteringCoefficient'], title_prefix=\"Java Package\")" |
| 1446 | + "plot_feature_distribution(java_type_features['clusteringCoefficient'], 'Clustering Coefficient', title_prefix=\"Java Type\")" |
1406 | 1447 | ] |
1407 | 1448 | }, |
1408 | 1449 | { |
|
1421 | 1462 | ")" |
1422 | 1463 | ] |
1423 | 1464 | }, |
| 1465 | + { |
| 1466 | + "cell_type": "markdown", |
| 1467 | + "id": "dfb7560d", |
| 1468 | + "metadata": {}, |
| 1469 | + "source": [ |
| 1470 | + "### 2.2b Betweenness Distribution" |
| 1471 | + ] |
| 1472 | + }, |
| 1473 | + { |
| 1474 | + "cell_type": "code", |
| 1475 | + "execution_count": null, |
| 1476 | + "id": "1082ef81", |
| 1477 | + "metadata": {}, |
| 1478 | + "outputs": [], |
| 1479 | + "source": [ |
| 1480 | + "plot_feature_distribution(java_type_features['betweenness'], 'Betweenness', title_prefix=\"Java Type\")" |
| 1481 | + ] |
| 1482 | + }, |
1424 | 1483 | { |
1425 | 1484 | "cell_type": "markdown", |
1426 | 1485 | "id": "69256999", |
|
0 commit comments