recommender v1 added

prasankh · prasankh · commit 52ece37866e8 · 2024-06-06T19:25:51.000+05:30
diff --git a/ads/opctl/operator/lowcode/recommender/README.md b/ads/opctl/operator/lowcode/recommender/README.md
diff --git a/ads/opctl/operator/lowcode/recommender/__init__.py b/ads/opctl/operator/lowcode/recommender/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
diff --git a/ads/opctl/operator/lowcode/recommender/__main__.py b/ads/opctl/operator/lowcode/recommender/__main__.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import json
+import os
+import sys
+from typing import Dict, List
+
+import yaml
+
+from ads.opctl import logger
+from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
+from ads.opctl.operator.common.utils import _parse_input_args
+
+from .model.recommender_dataset import RecommenderDatasets
+from .operator_config import RecommenderOperatorConfig
+from .model.factory import RecommenderOperatorModelFactory
+
+def operate(operator_config: RecommenderOperatorConfig) -> None:
+    """Runs the recommender operator."""
+
+    datasets = RecommenderDatasets(operator_config)
+    RecommenderOperatorModelFactory.get_model(
+        operator_config, datasets
+    ).generate_report()
+
+
+def verify(spec: Dict, **kwargs: Dict) -> bool:
+    """Verifies the recommender detection operator config."""
+    operator = RecommenderOperatorConfig.from_dict(spec)
+    msg_header = (
+        f"{'*' * 50} The operator config has been successfully verified {'*' * 50}"
+    )
+    print(msg_header)
+    print(operator.to_yaml())
+    print("*" * len(msg_header))
+
+
+def main(raw_args: List[str]):
+    """The entry point of the recommender the operator."""
+    args, _ = _parse_input_args(raw_args)
+    if not args.file and not args.spec and not os.environ.get(ENV_OPERATOR_ARGS):
+        logger.info(
+            "Please specify -f[--file] or -s[--spec] or "
+            f"pass operator's arguments via {ENV_OPERATOR_ARGS} environment variable."
+        )
+        return
+
+    logger.info("-" * 100)
+    logger.info(
+        f"{'Running' if not args.verify else 'Verifying'} the recommender detection operator."
+    )
+
+    yaml_string = ""
+    if args.spec or os.environ.get(ENV_OPERATOR_ARGS):
+        operator_spec_str = args.spec or os.environ.get(ENV_OPERATOR_ARGS)
+        try:
+            yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
+        except json.JSONDecodeError:
+            yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
+        except:
+            yaml_string = operator_spec_str
+
+    operator_config = RecommenderOperatorConfig.from_yaml(
+        uri=args.file,
+        yaml_string=yaml_string,
+    )
+
+    logger.info(operator_config.to_yaml())
+
+    # run operator
+    if args.verify:
+        verify(operator_config)
+    else:
+        operate(operator_config)
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/ads/opctl/operator/lowcode/recommender/cmd.py b/ads/opctl/operator/lowcode/recommender/cmd.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from typing import Dict
+
+from ads.opctl.operator.common.operator_yaml_generator import YamlGenerator
+from ads.opctl.operator.common.utils import _load_yaml_from_uri
+
+
+def init(**kwargs: Dict) -> str:
+    """
+    Generates operator config by the schema.
+
+    Properties
+    ----------
+    kwargs: (Dict, optional).
+        Additional key value arguments.
+
+        - type: str
+            The type of the operator.
+
+    Returns
+    -------
+    str
+        The YAML specification generated based on the schema.
+    """
+
+    default_detector = [{"name": "<type>.<entity>", "action": "mask"}]
+
+    return YamlGenerator(
+        schema=_load_yaml_from_uri(__file__.replace("cmd.py", "schema.yaml"))
+    ).generate_example_dict(
+        values={"type": kwargs.get("type"), "detectors": default_detector}
+    )
diff --git a/ads/opctl/operator/lowcode/recommender/constant.py b/ads/opctl/operator/lowcode/recommender/constant.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from ads.common.extended_enum import ExtendedEnumMeta
+
+DEFAULT_SHOW_ROWS = 25
+DEFAULT_REPORT_FILENAME = "report.html"
+
+class OutputColumns(str, metaclass=ExtendedEnumMeta):
+    """output columns for recommender operator"""
+    USER_COL = "user"
+    ITEM_COL = "item"
+    SCORE = "score"
+
+class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
+    """Supported forecast metrics."""
+    RMSE = "RMSE"
+
+class SupportedModels(str, metaclass=ExtendedEnumMeta):
+    """Supported recommender models."""
+    SVD = "svd"
diff --git a/ads/opctl/operator/lowcode/recommender/environment.yaml b/ads/opctl/operator/lowcode/recommender/environment.yaml
@@ -0,0 +1,11 @@
+name: pii
+channels:
+  - conda-forge
+dependencies:
+  - python=3.9
+  - pip
+  - pip:
+      - report-creator
+      - oracle_ads[opctl]
+      - plotly
+      - scikit-surprise
diff --git a/ads/opctl/operator/lowcode/recommender/model/base_model.py b/ads/opctl/operator/lowcode/recommender/model/base_model.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import os
+import time
+from abc import ABC, abstractmethod
+
+import pandas as pd
+
+from ads.common.object_storage_details import ObjectStorageDetails
+from ads.opctl import logger
+from ads.opctl.operator.lowcode.common.utils import default_signer
+from ads.opctl.operator.lowcode.common.utils import (
+    write_data,
+)
+from .recommender_dataset import RecommenderDatasets
+from ..operator_config import RecommenderOperatorConfig
+
+
+class RecommenderOperatorBaseModel(ABC):
+    """The base class for the recommender detection operator models."""
+
+    def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets):
+        self.spec = config.spec
+        self.datasets = datasets
+
+    def generate_report(self):
+        start_time = time.time()
+        result_df = self._build_model()
+        elapsed_time = time.time() - start_time
+        logger.info("Building the models completed in %s seconds", elapsed_time)
+        # save the report and result CSV
+        self._save_report(
+            result_df=result_df
+        )
+
+    def _save_report(self, result_df):
+        """Saves resulting reports to the given folder."""
+
+        unique_output_dir = self.spec.output_directory.url
+
+        if ObjectStorageDetails.is_oci_path(unique_output_dir):
+            storage_options = default_signer()
+        else:
+            storage_options = dict()
+
+        # forecast csv report
+        write_data(
+            data=result_df,
+            filename=os.path.join(unique_output_dir, self.spec.recommendations_filename),
+            format="csv",
+            storage_options=storage_options,
+        )
+
+        logger.info(
+            f"The outputs have been successfully "
+            f"generated and placed into the directory: {unique_output_dir}."
+        )
+
+    @abstractmethod
+    def _generate_report(self):
+        """
+        Generates the report for the particular model.
+        The method that needs to be implemented on the particular model level.
+        """
+
+    @abstractmethod
+    def _build_model(self) -> pd.DataFrame:
+        """
+        Build the model.
+        The method that needs to be implemented on the particular model level.
+        """
diff --git a/ads/opctl/operator/lowcode/recommender/model/factory.py b/ads/opctl/operator/lowcode/recommender/model/factory.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from ..constant import SupportedModels
+from ..operator_config import RecommenderOperatorConfig
+from .base_model import RecommenderOperatorBaseModel
+from .recommender_dataset import RecommenderDatasets
+from .svd import SVDOperatorModel
+
+class UnSupportedModelError(Exception):
+    def __init__(self, model_type: str):
+        super().__init__(
+            f"Model: `{model_type}` "
+            f"is not supported. Supported models: {SupportedModels.values}"
+        )
+
+
+class RecommenderOperatorModelFactory:
+    """
+    The factory class helps to instantiate proper model operator based on the model type.
+    """
+
+    _MAP = {
+        SupportedModels.SVD: SVDOperatorModel
+    }
+
+    @classmethod
+    def get_model(
+        cls, operator_config: RecommenderOperatorConfig, datasets: RecommenderDatasets
+    ) -> RecommenderOperatorBaseModel:
+        """
+        Gets the operator model based on the model type.
+
+        Parameters
+        ----------
+        operator_config: RecommenderOperatorConfig
+            The recommender detection operator config.
+
+        datasets: RecommenderDatasets
+            Datasets for finding recommender
+
+        Returns
+        -------
+        RecommenderOperatorBaseModel
+            The recommender detection operator model.
+
+        Raises
+        ------
+        UnSupportedModelError
+            In case of not supported model.
+        """
+        model_type = SupportedModels.SVD
+        if model_type not in cls._MAP:
+            raise UnSupportedModelError(model_type)
+        return cls._MAP[model_type](config=operator_config, datasets=datasets)
diff --git a/ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py b/ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import pandas as pd
+
+from ..operator_config import RecommenderOperatorConfig
+
+
+class RecommenderDatasets:
+    def __init__(self, config: RecommenderOperatorConfig):
+        """Instantiates the DataIO instance.
+
+        Properties
+        ----------
+        spec: RecommenderOperatorSpec
+            The recommender operator spec.
+        """
+        spec = config.spec
+        self.interactions: pd.DataFrame = pd.read_csv(spec.interactions_data.url)
+        self.users: pd.DataFrame = pd.read_csv(spec.user_data.url)
+        self.items: pd.DataFrame = pd.read_csv(spec.item_data.url)
diff --git a/ads/opctl/operator/lowcode/recommender/model/svd.py b/ads/opctl/operator/lowcode/recommender/model/svd.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import pandas as pd
+from .recommender_dataset import RecommenderDatasets
+from ..operator_config import RecommenderOperatorConfig
+from .factory import RecommenderOperatorBaseModel
+from surprise import Dataset, Reader
+from surprise.model_selection import train_test_split
+from surprise import SVD
+from surprise import accuracy
+
+
+class SVDOperatorModel(RecommenderOperatorBaseModel):
+    """Class representing scikit surprise SVD operator model."""
+
+    def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets):
+        super().__init__(config, datasets)
+        self.interactions = datasets.interactions
+        self.users = datasets.users
+        self.items = datasets.items
+        self.user_id = config.spec.user_column_name
+        self.item_id = config.spec.item_column_name
+        self.rating_col = config.spec.ratings_column_name
+        self.test_size = 0.2
+
+    def _get_recommendations(self, user_id, algo, items, n=10):
+        all_item_ids = items[self.item_id].unique()
+        rated_items = self.interactions[self.interactions[self.user_id] == user_id][self.item_id]
+        unrated_items = [item_id for item_id in all_item_ids if item_id not in rated_items.values]
+        predictions = [algo.predict(user_id, item_id) for item_id in unrated_items]
+        predictions.sort(key=lambda x: x.est, reverse=True)
+        top_n_recommendations = predictions[:n]
+        return [(pred.iid, pred.est) for pred in top_n_recommendations]
+
+    def _build_model(self) -> pd.DataFrame:
+        min_rating = self.interactions[self.rating_col].min()
+        max_rating = self.interactions[self.rating_col].max()
+        reader = Reader(rating_scale=(min_rating, max_rating))
+        data = Dataset.load_from_df(self.interactions[[self.user_id, self.item_id, self.rating_col]], reader)
+        trainset, testset = train_test_split(data, test_size=self.test_size)
+        algo = SVD()
+        algo.fit(trainset)
+        predictions = algo.test(testset)
+        accuracy.rmse(predictions)
+        all_recommendations = []
+        for user_id in self.users[self.user_id]:
+            recommendations = self._get_recommendations(user_id, algo, self.items, n=self.spec.top_k)
+            for item_id, est_rating in recommendations:
+                all_recommendations.append({
+                    self.user_id: user_id,
+                    self.item_id: item_id,
+                    self.rating_col: est_rating
+                })
+        recommendations_df = pd.DataFrame(all_recommendations)
+        return recommendations_df
diff --git a/ads/opctl/operator/lowcode/recommender/operator_config.py b/ads/opctl/operator/lowcode/recommender/operator_config.py
diff --git a/ads/opctl/operator/lowcode/recommender/schema.yaml b/ads/opctl/operator/lowcode/recommender/schema.yaml
diff --git a/ads/opctl/operator/lowcode/recommender/utils.py b/ads/opctl/operator/lowcode/recommender/utils.py