55# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66
77import os
8+ import tempfile
89import time
910from abc import ABC , abstractmethod
11+ from typing import Tuple , Dict
1012
13+ import fsspec
1114import pandas as pd
15+ import report_creator as rc
1216
1317from ads .common .object_storage_details import ObjectStorageDetails
1418from ads .opctl import logger
1519from ads .opctl .operator .lowcode .common .utils import default_signer
1620from ads .opctl .operator .lowcode .common .utils import (
21+ human_time_friendly ,
22+ enable_print ,
23+ disable_print ,
1724 write_data ,
1825)
26+ from .factory import SupportedModels
1927from .recommender_dataset import RecommenderDatasets
2028from ..operator_config import RecommenderOperatorConfig
29+ from plotly import graph_objects as go
30+ import matplotlib .pyplot as plt
2131
2232
2333class RecommenderOperatorBaseModel (ABC ):
2434 """The base class for the recommender detection operator models."""
2535
2636 def __init__ (self , config : RecommenderOperatorConfig , datasets : RecommenderDatasets ):
27- self .spec = config .spec
37+ self .config = config
38+ self .spec = self .config .spec
2839 self .datasets = datasets
2940
3041 def generate_report (self ):
42+ item_col = self .spec .item_column
43+ user_col = self .spec .user_column
44+ interaction_col = self .spec .interaction_column
3145 start_time = time .time ()
32- result_df = self ._build_model ()
46+ result_df , metrics = self ._build_model ()
3347 elapsed_time = time .time () - start_time
3448 logger .info ("Building the models completed in %s seconds" , elapsed_time )
49+
50+ if self .spec .generate_report :
51+ # build the report
52+ (
53+ model_description ,
54+ other_sections ,
55+ ) = self ._generate_report ()
56+
57+ header_section = rc .Block (
58+ rc .Heading ("Recommender Report" , level = 1 ),
59+ rc .Text (
60+ f"The recommendations was generated using { SupportedModels .SVD .upper ()} . { model_description } "
61+ ),
62+ rc .Group (
63+ rc .Metric (
64+ heading = "Recommendations was generated in " ,
65+ value = human_time_friendly (elapsed_time ),
66+ ),
67+ rc .Metric (
68+ heading = "Num users" ,
69+ value = len (self .datasets .users ),
70+ ),
71+ rc .Metric (
72+ heading = "Num items" ,
73+ value = len (self .datasets .items ),
74+ )
75+ ),
76+ )
77+
78+ summary = rc .Block (
79+ header_section ,
80+ )
81+ # user and item distributions in interactions
82+ user_title = rc .Heading ("User Statistics" , level = 2 )
83+ user_rating_counts = self .datasets .interactions [user_col ].value_counts ()
84+ fig_user = go .Figure (data = [go .Histogram (x = user_rating_counts , nbinsx = 100 )])
85+ fig_user .update_layout (
86+ title = f'Distribution of the number of interactions by { user_col } ' ,
87+ xaxis_title = f'Number of { interaction_col } ' ,
88+ yaxis_title = f'Number of { user_col } ' ,
89+ bargap = 0.2
90+ )
91+ item_title = rc .Heading ("Item Statistics" , level = 2 )
92+ item_rating_counts = self .datasets .interactions [item_col ].value_counts ()
93+ fig_item = go .Figure (data = [go .Histogram (x = item_rating_counts , nbinsx = 100 )])
94+ fig_item .update_layout (
95+ title = f'Distribution of the number of interactions by { item_col } ' ,
96+ xaxis_title = f'Number of { interaction_col } ' ,
97+ yaxis_title = f'Number of { item_col } ' ,
98+ bargap = 0.2
99+ )
100+ result_heatmap_title = rc .Heading ("Sample Recommendations" , level = 2 )
101+ sample_items = result_df [item_col ].head (100 ).index
102+ filtered_df = result_df [result_df [item_col ].isin (sample_items )]
103+ data = filtered_df .pivot (index = user_col , columns = item_col , values = interaction_col )
104+ fig = go .Figure (data = go .Heatmap (
105+ z = data .values ,
106+ x = data .columns ,
107+ y = data .index ,
108+ colorscale = 'Viridis'
109+ ))
110+ fig .update_layout (
111+ title = 'Recommendation heatmap of User-Item Interactions (sample)' ,
112+ width = 1500 ,
113+ height = 800 ,
114+ xaxis_title = item_col ,
115+ yaxis_title = user_col ,
116+ coloraxis_colorbar = dict (title = interaction_col )
117+ )
118+ plots = [user_title , rc .Widget (fig_user ),
119+ item_title , rc .Widget (fig_item ),
120+ result_heatmap_title , rc .Widget (fig )]
121+
122+ test_metrics_sections = [rc .DataTable (pd .DataFrame (metrics , index = [0 ]))]
123+ yaml_appendix_title = rc .Heading ("Reference: YAML File" , level = 2 )
124+ yaml_appendix = rc .Yaml (self .config .to_dict ())
125+ report_sections = (
126+ [summary ]
127+ + plots
128+ + test_metrics_sections
129+ + other_sections
130+ + [yaml_appendix_title , yaml_appendix ]
131+ )
132+
35133 # save the report and result CSV
36134 self ._save_report (
135+ report_sections = report_sections ,
37136 result_df = result_df
38137 )
39138
40- def _save_report (self , result_df ):
139+ def _evaluation_metrics (self ):
140+ pass
141+
142+ def _test_data_evaluate_metrics (self ):
143+ pass
144+
145+ def _save_report (self , report_sections : Tuple , result_df : pd .DataFrame ):
41146 """Saves resulting reports to the given folder."""
42147
43148 unique_output_dir = self .spec .output_directory .url
@@ -47,7 +152,25 @@ def _save_report(self, result_df):
47152 else :
48153 storage_options = dict ()
49154
50- # forecast csv report
155+ # report-creator html report
156+ if self .spec .generate_report :
157+ with tempfile .TemporaryDirectory () as temp_dir :
158+ report_local_path = os .path .join (temp_dir , "___report.html" )
159+ disable_print ()
160+ with rc .ReportCreator ("My Report" ) as report :
161+ report .save (rc .Block (* report_sections ), report_local_path )
162+ enable_print ()
163+
164+ report_path = os .path .join (unique_output_dir , self .spec .report_filename )
165+ with open (report_local_path ) as f1 :
166+ with fsspec .open (
167+ report_path ,
168+ "w" ,
169+ ** storage_options ,
170+ ) as f2 :
171+ f2 .write (f1 .read ())
172+
173+ # recommender csv report
51174 write_data (
52175 data = result_df ,
53176 filename = os .path .join (unique_output_dir , self .spec .recommendations_filename ),
@@ -68,7 +191,7 @@ def _generate_report(self):
68191 """
69192
70193 @abstractmethod
71- def _build_model (self ) -> pd .DataFrame :
194+ def _build_model (self ) -> [ pd .DataFrame , Dict ] :
72195 """
73196 Build the model.
74197 The method that needs to be implemented on the particular model level.
0 commit comments