|
| 1 | +{ |
| 2 | + "cells": [ |
| 3 | + { |
| 4 | + "cell_type": "markdown", |
| 5 | + "id": "4ab94990-19bf-4d3d-abfe-f445f3b26905", |
| 6 | + "metadata": {}, |
| 7 | + "source": [ |
| 8 | + "# 필수 패키지 설정, OpenSearch 클러스터 생성, Nori 플러그인 설치 (약 50분 소요)\n", |
| 9 | + "> 이 노트북은 SageMaker Studio* **`Data Science 3.0`** kernel 및 ml.t3.medium 인스턴스에서 테스트 되었습니다." |
| 10 | + ] |
| 11 | + }, |
| 12 | + { |
| 13 | + "cell_type": "markdown", |
| 14 | + "id": "0ae1ea41-92c7-4dc2-8dfa-36585c36d1a5", |
| 15 | + "metadata": {}, |
| 16 | + "source": [ |
| 17 | + "## 0. 필수 사항\n", |
| 18 | + "- 실습을 위해서 노트북을 실행하는 역할(Role) 에 아래 권한이 추가 되어 있어야 합니다.\n", |
| 19 | + " - AmazonOpenSearchServiceFullAccess\n", |
| 20 | + " - AmazonSSMFullAccess" |
| 21 | + ] |
| 22 | + }, |
| 23 | + { |
| 24 | + "cell_type": "markdown", |
| 25 | + "id": "2b5787ac-54cc-4a5e-8f99-86f7a5e3a066", |
| 26 | + "metadata": {}, |
| 27 | + "source": [ |
| 28 | + "## OpenSearch Client, 인증정보 SSM에 저장, 한국어 분석을 위한 노리(Nori) 플러그인 설치 스크립트" |
| 29 | + ] |
| 30 | + }, |
| 31 | + { |
| 32 | + "cell_type": "code", |
| 33 | + "execution_count": null, |
| 34 | + "id": "b7ccaebc-1f3b-4da4-92bd-565bc0d4f7b8", |
| 35 | + "metadata": { |
| 36 | + "tags": [] |
| 37 | + }, |
| 38 | + "outputs": [], |
| 39 | + "source": [ |
| 40 | + "%load_ext autoreload\n", |
| 41 | + "%autoreload 2\n", |
| 42 | + "\n", |
| 43 | + "import sys, os\n", |
| 44 | + "\n", |
| 45 | + "def add_python_path(module_path):\n", |
| 46 | + " if os.path.abspath(module_path) not in sys.path:\n", |
| 47 | + " sys.path.append(os.path.abspath(module_path))\n", |
| 48 | + " print(f\"python path: {os.path.abspath(module_path)} is added\")\n", |
| 49 | + " else:\n", |
| 50 | + " print(f\"python path: {os.path.abspath(module_path)} already exists\")\n", |
| 51 | + " print(\"sys.path: \", sys.path)\n", |
| 52 | + "\n", |
| 53 | + "module_path = \"..\"\n", |
| 54 | + "add_python_path(module_path)\n" |
| 55 | + ] |
| 56 | + }, |
| 57 | + { |
| 58 | + "cell_type": "code", |
| 59 | + "execution_count": null, |
| 60 | + "id": "689e69ed-57fc-44ae-a65d-001930e6e2bb", |
| 61 | + "metadata": { |
| 62 | + "tags": [] |
| 63 | + }, |
| 64 | + "outputs": [], |
| 65 | + "source": [ |
| 66 | + "import boto3\n", |
| 67 | + "import uuid\n", |
| 68 | + "import botocore\n", |
| 69 | + "import time" |
| 70 | + ] |
| 71 | + }, |
| 72 | + { |
| 73 | + "cell_type": "code", |
| 74 | + "execution_count": null, |
| 75 | + "id": "7107e2ff-6369-4a13-abfb-b56c3955db9c", |
| 76 | + "metadata": { |
| 77 | + "tags": [] |
| 78 | + }, |
| 79 | + "outputs": [], |
| 80 | + "source": [ |
| 81 | + "from search_utils.ssm import parameter_store" |
| 82 | + ] |
| 83 | + }, |
| 84 | + { |
| 85 | + "cell_type": "code", |
| 86 | + "execution_count": null, |
| 87 | + "id": "69f8f100-d4a2-4bb0-9b27-24bd0009a2f1", |
| 88 | + "metadata": { |
| 89 | + "tags": [] |
| 90 | + }, |
| 91 | + "outputs": [], |
| 92 | + "source": [ |
| 93 | + "DEV = True # True일 경우 1-AZ without standby로 생성, False일 경우 3-AZ with standby. 워크샵 목적일 때는 지나친 과금/리소스 방지를 위해 True로 설정하는 것을 권장\n", |
| 94 | + "VERSION = \"2.11\" # OpenSearch Version (예: 2.7 / 2.9 / 2.11)" |
| 95 | + ] |
| 96 | + }, |
| 97 | + { |
| 98 | + "cell_type": "code", |
| 99 | + "execution_count": null, |
| 100 | + "id": "599f6eff-f129-4e3d-ba90-c2afc340f49d", |
| 101 | + "metadata": { |
| 102 | + "tags": [] |
| 103 | + }, |
| 104 | + "outputs": [], |
| 105 | + "source": [ |
| 106 | + "opensearch_user_id = \"<your id>\" # ex) 'raguser'\n", |
| 107 | + "opensearch_user_password = \"<your password>\" # ex) 'MarsEarth1!'\n", |
| 108 | + "\n" |
| 109 | + ] |
| 110 | + }, |
| 111 | + { |
| 112 | + "cell_type": "code", |
| 113 | + "execution_count": null, |
| 114 | + "id": "84141435-57a9-4a60-82b1-c93d4e415e1e", |
| 115 | + "metadata": { |
| 116 | + "tags": [] |
| 117 | + }, |
| 118 | + "outputs": [], |
| 119 | + "source": [ |
| 120 | + "# 0. OpenSearch 인증정보 ssm에 저장하기\n", |
| 121 | + "\n", |
| 122 | + "region = boto3.Session().region_name\n", |
| 123 | + "account_id = boto3.client(\"sts\").get_caller_identity()[\"Account\"]\n", |
| 124 | + "opensearch = boto3.client('opensearch', region)\n", |
| 125 | + "rand_str = uuid.uuid4().hex[:8]\n", |
| 126 | + "domain_name = f'image-search-{rand_str}'\n", |
| 127 | + "\n", |
| 128 | + "cluster_config_prod = {\n", |
| 129 | + " 'InstanceCount': 3,\n", |
| 130 | + " 'InstanceType': 'r6g.large.search',\n", |
| 131 | + " 'ZoneAwarenessEnabled': True,\n", |
| 132 | + " 'DedicatedMasterEnabled': True,\n", |
| 133 | + " 'MultiAZWithStandbyEnabled': True,\n", |
| 134 | + " 'DedicatedMasterType': 'r6g.large.search',\n", |
| 135 | + " 'DedicatedMasterCount': 3\n", |
| 136 | + "}\n", |
| 137 | + "\n", |
| 138 | + "cluster_config_dev = {\n", |
| 139 | + " 'InstanceCount': 1,\n", |
| 140 | + " 'InstanceType': 'r6g.large.search',\n", |
| 141 | + " 'ZoneAwarenessEnabled': False,\n", |
| 142 | + " 'DedicatedMasterEnabled': False,\n", |
| 143 | + "}\n", |
| 144 | + "\n", |
| 145 | + "\n", |
| 146 | + "ebs_options = {\n", |
| 147 | + " 'EBSEnabled': True,\n", |
| 148 | + " 'VolumeType': 'gp3',\n", |
| 149 | + " 'VolumeSize': 100,\n", |
| 150 | + "}\n", |
| 151 | + "\n", |
| 152 | + "advanced_security_options = {\n", |
| 153 | + " 'Enabled': True,\n", |
| 154 | + " 'InternalUserDatabaseEnabled': True,\n", |
| 155 | + " 'MasterUserOptions': {\n", |
| 156 | + " 'MasterUserName': opensearch_user_id,\n", |
| 157 | + " 'MasterUserPassword': opensearch_user_password\n", |
| 158 | + " }\n", |
| 159 | + "}\n", |
| 160 | + "\n", |
| 161 | + "ap = f'{{\\\"Version\\\":\\\"2012-10-17\\\",\\\"Statement\\\":[{{\\\"Effect\\\":\\\"Allow\\\",\\\"Principal\\\":{{\\\"AWS\\\":\\\"*\\\"}},\\\"Action\\\":\\\"es:*\\\",\\\"Resource\\\":\\\"arn:aws:es:{region}:{account_id}:domain\\/{domain_name}\\/*\\\"}}]}}'\n", |
| 162 | + "\n", |
| 163 | + "if DEV:\n", |
| 164 | + " cluster_config = cluster_config_dev\n", |
| 165 | + "else:\n", |
| 166 | + " cluster_config = cluster_config_prod\n", |
| 167 | + "\n", |
| 168 | + "response = opensearch.create_domain(\n", |
| 169 | + " DomainName=domain_name,\n", |
| 170 | + " EngineVersion=f'OpenSearch_{VERSION}',\n", |
| 171 | + " ClusterConfig=cluster_config,\n", |
| 172 | + " AccessPolicies=ap,\n", |
| 173 | + " EBSOptions=ebs_options,\n", |
| 174 | + " AdvancedSecurityOptions=advanced_security_options,\n", |
| 175 | + " NodeToNodeEncryptionOptions={'Enabled': True},\n", |
| 176 | + " EncryptionAtRestOptions={'Enabled': True},\n", |
| 177 | + " DomainEndpointOptions={'EnforceHTTPS': True}\n", |
| 178 | + ")" |
| 179 | + ] |
| 180 | + }, |
| 181 | + { |
| 182 | + "cell_type": "code", |
| 183 | + "execution_count": null, |
| 184 | + "id": "d0f85455-908b-415c-92c7-c2e4cebde6fb", |
| 185 | + "metadata": { |
| 186 | + "tags": [] |
| 187 | + }, |
| 188 | + "outputs": [], |
| 189 | + "source": [ |
| 190 | + "%%time\n", |
| 191 | + "\n", |
| 192 | + "# 1. OpenSearch 설치\n", |
| 193 | + "\n", |
| 194 | + "def wait_for_domain_creation(domain_name):\n", |
| 195 | + " try:\n", |
| 196 | + " response = opensearch.describe_domain(\n", |
| 197 | + " DomainName=domain_name\n", |
| 198 | + " )\n", |
| 199 | + " # Every 60 seconds, check whether the domain is processing.\n", |
| 200 | + " while 'Endpoint' not in response['DomainStatus']:\n", |
| 201 | + " print('Creating Opensearch domain...')\n", |
| 202 | + " time.sleep(60)\n", |
| 203 | + " response = opensearch.describe_domain(\n", |
| 204 | + " DomainName=domain_name)\n", |
| 205 | + "\n", |
| 206 | + " # Once we exit the loop, the domain is ready for ingestion.\n", |
| 207 | + " endpoint = response['DomainStatus']['Endpoint']\n", |
| 208 | + " print('Domain endpoint ready to receive data: ' + endpoint)\n", |
| 209 | + " except botocore.exceptions.ClientError as error:\n", |
| 210 | + " if error.response['Error']['Code'] == 'ResourceNotFoundException':\n", |
| 211 | + " print('Domain not found.')\n", |
| 212 | + " else:\n", |
| 213 | + " raise error\n", |
| 214 | + "\n", |
| 215 | + "wait_for_domain_creation(domain_name)\n", |
| 216 | + "\n", |
| 217 | + "response = opensearch.describe_domain(DomainName=domain_name)\n", |
| 218 | + "opensearch_domain_endpoint = f\"https://{response['DomainStatus']['Endpoint']}\"\n", |
| 219 | + "\n", |
| 220 | + "# 2. OpenSearch 인증정보 ssm에 저장하기\n", |
| 221 | + "\n", |
| 222 | + "region=boto3.Session().region_name\n", |
| 223 | + "pm = parameter_store(region)\n", |
| 224 | + "\n", |
| 225 | + "pm.put_params(\n", |
| 226 | + " key=\"opensearch_domain_endpoint\",\n", |
| 227 | + " value=f'{opensearch_domain_endpoint}',\n", |
| 228 | + " overwrite=True,\n", |
| 229 | + " enc=False\n", |
| 230 | + ")\n", |
| 231 | + "\n", |
| 232 | + "pm.put_params(\n", |
| 233 | + " key=\"opensearch_user_id\",\n", |
| 234 | + " value=f'{opensearch_user_id}',\n", |
| 235 | + " overwrite=True,\n", |
| 236 | + " enc=False\n", |
| 237 | + ")\n", |
| 238 | + "\n", |
| 239 | + "pm.put_params(\n", |
| 240 | + " key=\"opensearch_user_password\",\n", |
| 241 | + " value=f'{opensearch_user_password}',\n", |
| 242 | + " overwrite=True,\n", |
| 243 | + " enc=True\n", |
| 244 | + ")\n", |
| 245 | + "\n", |
| 246 | + "# 3. 한국어 분석을 위한 노리(Nori) 플러그인 설치\n", |
| 247 | + "\n", |
| 248 | + "nori_pkg_id = {}\n", |
| 249 | + "nori_pkg_id['us-east-1'] = {\n", |
| 250 | + " '2.3': 'G196105221',\n", |
| 251 | + " '2.5': 'G240285063',\n", |
| 252 | + " '2.7': 'G16029449', \n", |
| 253 | + " '2.9': 'G60209291',\n", |
| 254 | + " '2.11': 'G181660338'\n", |
| 255 | + "}\n", |
| 256 | + "\n", |
| 257 | + "nori_pkg_id['us-west-2'] = {\n", |
| 258 | + " '2.3': 'G94047474',\n", |
| 259 | + " '2.5': 'G138227316',\n", |
| 260 | + " '2.7': 'G182407158', \n", |
| 261 | + " '2.9': 'G226587000',\n", |
| 262 | + " '2.11': 'G79602591'\n", |
| 263 | + "}\n", |
| 264 | + "\n", |
| 265 | + "pkg_response = opensearch.associate_package(\n", |
| 266 | + " PackageID=nori_pkg_id[region][VERSION], # nori plugin\n", |
| 267 | + " DomainName=domain_name\n", |
| 268 | + ")\n", |
| 269 | + "\n", |
| 270 | + "def wait_for_associate_package(domain_name, max_results=1):\n", |
| 271 | + "\n", |
| 272 | + " response = opensearch.list_packages_for_domain(\n", |
| 273 | + " DomainName=domain_name,\n", |
| 274 | + " MaxResults=1\n", |
| 275 | + " )\n", |
| 276 | + " # Every 60 seconds, check whether the domain is processing.\n", |
| 277 | + " while response['DomainPackageDetailsList'][0]['DomainPackageStatus'] == \"ASSOCIATING\":\n", |
| 278 | + " print('Associating packages...')\n", |
| 279 | + " time.sleep(60)\n", |
| 280 | + " response = opensearch.list_packages_for_domain(\n", |
| 281 | + " DomainName=domain_name,\n", |
| 282 | + " MaxResults=1\n", |
| 283 | + " )\n", |
| 284 | + "\n", |
| 285 | + " print('Nori Plugin Associated!')\n", |
| 286 | + "\n", |
| 287 | + "wait_for_associate_package(domain_name)" |
| 288 | + ] |
| 289 | + }, |
| 290 | + { |
| 291 | + "cell_type": "code", |
| 292 | + "execution_count": null, |
| 293 | + "id": "f6beecf3-d556-4fcd-af9c-ec327e0a63e9", |
| 294 | + "metadata": { |
| 295 | + "tags": [] |
| 296 | + }, |
| 297 | + "outputs": [], |
| 298 | + "source": [ |
| 299 | + "print (pm.get_params(key=\"opensearch_domain_endpoint\", enc=False))\n", |
| 300 | + "print (pm.get_params(key=\"opensearch_user_id\", enc=False))\n", |
| 301 | + "print (pm.get_params(key=\"opensearch_user_password\", enc=True))" |
| 302 | + ] |
| 303 | + }, |
| 304 | + { |
| 305 | + "cell_type": "code", |
| 306 | + "execution_count": null, |
| 307 | + "id": "fc7aba1f-2446-4a01-8546-01ba65f91c2c", |
| 308 | + "metadata": {}, |
| 309 | + "outputs": [], |
| 310 | + "source": [] |
| 311 | + }, |
| 312 | + { |
| 313 | + "cell_type": "code", |
| 314 | + "execution_count": null, |
| 315 | + "id": "73f37ef4-2038-4cb2-b976-59d91a47937f", |
| 316 | + "metadata": {}, |
| 317 | + "outputs": [], |
| 318 | + "source": [] |
| 319 | + } |
| 320 | + ], |
| 321 | + "metadata": { |
| 322 | + "kernelspec": { |
| 323 | + "display_name": "base", |
| 324 | + "language": "python", |
| 325 | + "name": "python3" |
| 326 | + }, |
| 327 | + "language_info": { |
| 328 | + "codemirror_mode": { |
| 329 | + "name": "ipython", |
| 330 | + "version": 3 |
| 331 | + }, |
| 332 | + "file_extension": ".py", |
| 333 | + "mimetype": "text/x-python", |
| 334 | + "name": "python", |
| 335 | + "nbconvert_exporter": "python", |
| 336 | + "pygments_lexer": "ipython3", |
| 337 | + "version": "3.10.13" |
| 338 | + } |
| 339 | + }, |
| 340 | + "nbformat": 4, |
| 341 | + "nbformat_minor": 5 |
| 342 | +} |
0 commit comments