1+ from structlog import get_logger
12from functools import cached_property
23from itertools import groupby , chain
34from operator import attrgetter
910from django .db .models import F , Q , Count , OuterRef , Sum , When , Value , Case
1011from django .forms import Form , ModelChoiceField , ModelForm , BooleanField
1112
13+ from algoliasearch .analytics .client import AnalyticsClientSync
14+
15+ from config import settings
1216from core .models import RenderedContent
1317from reports .generation import (
1418 generate_wordcloud ,
1519 get_mailing_list_post_stats ,
1620 get_new_subscribers_stats ,
21+ generate_mailinglist_words ,
22+ generate_algolia_words ,
23+ get_algolia_search_stats ,
1724)
1825from slack .models import Channel , SlackActivityBucket , SlackUser
26+ from versions .exceptions import BoostImportedDataException
1927from versions .models import Version , ReportConfiguration
2028from .models import (
2129 Commit ,
2937from mailing_list .models import EmailData
3038from .utils import batched , conditional_batched
3139
40+ logger = get_logger (__name__ )
41+
3242
3343class LibraryForm (ModelForm ):
3444 class Meta :
@@ -661,9 +671,13 @@ def _get_slack_stats_for_channels(
661671 }
662672
663673 def _get_dependency_data (self , library_order , version ):
664- diffs_by_id = {
665- x ["library_id" ]: x for x in version .get_dependency_diffs ().values ()
666- }
674+ try :
675+ dependency_diff_values = version .get_dependency_diffs ().values ()
676+ except BoostImportedDataException as e :
677+ logger .warning (f"Could not get dependency diffs for version { version } : { e } " )
678+ dependency_diff_values = {}
679+
680+ diffs_by_id = {x ["library_id" ]: x for x in dependency_diff_values }
667681 diffs = []
668682 for lib_id in library_order :
669683 diffs .append (diffs_by_id .get (lib_id , {}))
@@ -697,9 +711,13 @@ def get_library_data(self, libraries, library_order, prior_version, version):
697711 def get_stats (self ):
698712 report_configuration = self .cleaned_data ["report_configuration" ]
699713 version = Version .objects .filter (name = report_configuration .version ).first ()
714+ # NOTE TO FUTURE DEVS: remember to account for the fact that a report
715+ # configuration may not match with a real version in frequent cases where
716+ # reports are generated before the release version has been created.
717+ report_before_release = False if version else True
700718
701719 prior_version = None
702- if not version :
720+ if report_before_release :
703721 # if the version is not set then the user has chosen a report configuration
704722 # that's not matching a live version, so we use the most recent version
705723 version = Version .objects .filter (name = "master" ).first ()
@@ -808,10 +826,25 @@ def get_stats(self):
808826 library in [lib ["library" ] for lib in library_data ],
809827 )
810828 )
811- wordcloud_base64 , wordcloud_top_words = generate_wordcloud (
812- version , prior_version
829+ # mailinglist word cloud generation
830+ mailinglist_words = generate_mailinglist_words (prior_version , version )
831+ mailinglist_wordcloud_base64 , mailinglist_wordcloud_top_words = (
832+ generate_wordcloud (mailinglist_words , width = 1400 , height = 700 )
833+ )
834+
835+ # algolia search word cloud generation
836+ client = AnalyticsClientSync (** settings .ALGOLIA )
837+ # if the report is based on a live version, look for stats for that
838+ # version, otherwise use the stats for the prior (live) version
839+ search_version = prior_version if report_before_release else version
840+ search_list_words = generate_algolia_words (client , search_version )
841+ search_wordcloud_base64 , search_wordcloud_top_words = generate_wordcloud (
842+ search_list_words , width = 800 , height = 250
813843 )
814844
845+ search_stats = get_algolia_search_stats (client , search_version )
846+ logger .info (f"{ search_stats = } " )
847+
815848 opened_issues_count = (
816849 Issue .objects .filter (library__in = self .library_queryset )
817850 .opened_during_release (version , prior_version )
@@ -827,20 +860,23 @@ def get_stats(self):
827860 "committee_members" : committee_members ,
828861 "lines_added" : lines_added ,
829862 "lines_removed" : lines_removed ,
830- "wordcloud_base64" : wordcloud_base64 ,
831- "wordcloud_frequencies" : wordcloud_top_words ,
832863 "version" : version ,
833864 "report_configuration" : report_configuration ,
834865 "prior_version" : prior_version ,
835866 "opened_issues_count" : opened_issues_count ,
836867 "closed_issues_count" : closed_issues_count ,
868+ "mailinglist_wordcloud_base64" : mailinglist_wordcloud_base64 ,
869+ "mailinglist_wordcloud_frequencies" : mailinglist_wordcloud_top_words ,
837870 "mailinglist_counts" : mailinglist_counts ,
838871 "mailinglist_total" : total_mailinglist_count or 0 ,
839872 "mailinglist_contributor_release_count" : mailinglist_contributor_release_count , # noqa: E501
840873 "mailinglist_contributor_new_count" : mailinglist_contributor_new_count ,
841874 "mailinglist_post_stats" : mailinglist_post_stats ,
842875 "mailinglist_new_subscribers_stats" : new_subscribers_stats ,
843876 "mailinglist_charts_start_year" : prior_version .release_date .year ,
877+ "search_wordcloud_base64" : search_wordcloud_base64 ,
878+ "search_wordcloud_frequencies" : search_wordcloud_top_words ,
879+ "search_stats" : search_stats ,
844880 "commit_contributors_release_count" : commit_contributors_release_count ,
845881 "commit_contributors_new_count" : commit_contributors_new_count ,
846882 "global_contributors_new_count" : len (
0 commit comments