|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*-- |
| 3 | + |
| 4 | +# Copyright (c) 2023, 2024 Oracle and/or its affiliates. |
| 5 | +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ |
| 6 | + |
| 7 | +import pandas as pd |
| 8 | +from .recommender_dataset import RecommenderDatasets |
| 9 | +from ..operator_config import RecommenderOperatorConfig |
| 10 | +from .factory import RecommenderOperatorBaseModel |
| 11 | +from surprise import Dataset, Reader |
| 12 | +from surprise.model_selection import train_test_split |
| 13 | +from surprise import SVD |
| 14 | +from surprise import accuracy |
| 15 | + |
| 16 | + |
| 17 | +class SVDOperatorModel(RecommenderOperatorBaseModel): |
| 18 | + """Class representing scikit surprise SVD operator model.""" |
| 19 | + |
| 20 | + def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets): |
| 21 | + super().__init__(config, datasets) |
| 22 | + self.interactions = datasets.interactions |
| 23 | + self.users = datasets.users |
| 24 | + self.items = datasets.items |
| 25 | + self.user_id = config.spec.user_column_name |
| 26 | + self.item_id = config.spec.item_column_name |
| 27 | + self.rating_col = config.spec.ratings_column_name |
| 28 | + self.test_size = 0.2 |
| 29 | + |
| 30 | + def _get_recommendations(self, user_id, algo, items, n=10): |
| 31 | + all_item_ids = items[self.item_id].unique() |
| 32 | + rated_items = self.interactions[self.interactions[self.user_id] == user_id][self.item_id] |
| 33 | + unrated_items = [item_id for item_id in all_item_ids if item_id not in rated_items.values] |
| 34 | + predictions = [algo.predict(user_id, item_id) for item_id in unrated_items] |
| 35 | + predictions.sort(key=lambda x: x.est, reverse=True) |
| 36 | + top_n_recommendations = predictions[:n] |
| 37 | + return [(pred.iid, pred.est) for pred in top_n_recommendations] |
| 38 | + |
| 39 | + def _build_model(self) -> pd.DataFrame: |
| 40 | + min_rating = self.interactions[self.rating_col].min() |
| 41 | + max_rating = self.interactions[self.rating_col].max() |
| 42 | + reader = Reader(rating_scale=(min_rating, max_rating)) |
| 43 | + data = Dataset.load_from_df(self.interactions[[self.user_id, self.item_id, self.rating_col]], reader) |
| 44 | + trainset, testset = train_test_split(data, test_size=self.test_size) |
| 45 | + algo = SVD() |
| 46 | + algo.fit(trainset) |
| 47 | + predictions = algo.test(testset) |
| 48 | + accuracy.rmse(predictions) |
| 49 | + all_recommendations = [] |
| 50 | + for user_id in self.users[self.user_id]: |
| 51 | + recommendations = self._get_recommendations(user_id, algo, self.items, n=self.spec.top_k) |
| 52 | + for item_id, est_rating in recommendations: |
| 53 | + all_recommendations.append({ |
| 54 | + self.user_id: user_id, |
| 55 | + self.item_id: item_id, |
| 56 | + self.rating_col: est_rating |
| 57 | + }) |
| 58 | + recommendations_df = pd.DataFrame(all_recommendations) |
| 59 | + return recommendations_df |
0 commit comments