Skip to content

Commit 9004a64

Browse files
author
EC2 Default User
committed
[ADD] New feature (table as image)
1 parent 8def937 commit 9004a64

File tree

94 files changed

+144
-14
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+144
-14
lines changed

genai/aws-gen-ai-kr/20_applications/02_qa_chatbot/01_preprocess_docs/05_0_load_complex_pdf_kr_opensearch.ipynb

Lines changed: 144 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@
433433
},
434434
{
435435
"cell_type": "code",
436-
"execution_count": null,
436+
"execution_count": 11,
437437
"id": "f320a808-c49a-47d3-bb45-4e96ae690606",
438438
"metadata": {
439439
"tags": []
@@ -443,7 +443,18 @@
443443
"name": "stderr",
444444
"output_type": "stream",
445445
"text": [
446-
"Conflict between variables skip_infer_table_types: ['pdf', 'jpg', 'png', 'xls', 'xlsx', 'heic'] and pdf_infer_table_structure: True, please reset skip_infer_table_types to turn on table extraction for PDFs.\n"
446+
"Conflict between variables skip_infer_table_types: ['pdf', 'jpg', 'png', 'xls', 'xlsx', 'heic'] and pdf_infer_table_structure: True, please reset skip_infer_table_types to turn on table extraction for PDFs.\n",
447+
"Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']\n",
448+
"- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
449+
"- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
450+
]
451+
},
452+
{
453+
"name": "stdout",
454+
"output_type": "stream",
455+
"text": [
456+
"CPU times: user 3min 49s, sys: 4.72 s, total: 3min 53s\n",
457+
"Wall time: 10min 40s\n"
447458
]
448459
}
449460
],
@@ -454,30 +465,149 @@
454465
},
455466
{
456467
"cell_type": "code",
457-
"execution_count": null,
468+
"execution_count": 12,
458469
"id": "d429ce28-57e6-4386-9d7d-dc4b3b5272d4",
459470
"metadata": {
471+
"collapsed": true,
472+
"jupyter": {
473+
"outputs_hidden": true
474+
},
460475
"tags": []
461476
},
462-
"outputs": [],
477+
"outputs": [
478+
{
479+
"data": {
480+
"text/plain": [
481+
"['./fig-origin/figure-3-12.jpg',\n",
482+
" './fig-origin/figure-18-42.jpg',\n",
483+
" './fig-origin/figure-37-88.jpg',\n",
484+
" './fig-origin/figure-3-15.jpg',\n",
485+
" './fig-origin/figure-18-40.jpg',\n",
486+
" './fig-origin/figure-18-44.jpg',\n",
487+
" './fig-origin/figure-15-25.jpg',\n",
488+
" './fig-origin/figure-35-85.jpg',\n",
489+
" './fig-origin/figure-32-71.jpg',\n",
490+
" './fig-origin/figure-20-50.jpg',\n",
491+
" './fig-origin/figure-1-2.jpg',\n",
492+
" './fig-origin/figure-3-21.jpg',\n",
493+
" './fig-origin/figure-18-46.jpg',\n",
494+
" './fig-origin/figure-28-58.jpg',\n",
495+
" './fig-origin/figure-28-59.jpg',\n",
496+
" './fig-origin/figure-30-63.jpg',\n",
497+
" './fig-origin/figure-30-65.jpg',\n",
498+
" './fig-origin/figure-3-10.jpg',\n",
499+
" './fig-origin/figure-34-75.jpg',\n",
500+
" './fig-origin/figure-3-8.jpg',\n",
501+
" './fig-origin/figure-17-29.jpg',\n",
502+
" './fig-origin/figure-19-49.jpg',\n",
503+
" './fig-origin/figure-3-9.jpg',\n",
504+
" './fig-origin/figure-35-83.jpg',\n",
505+
" './fig-origin/figure-28-56.jpg',\n",
506+
" './fig-origin/figure-29-61.jpg',\n",
507+
" './fig-origin/figure-35-80.jpg',\n",
508+
" './fig-origin/figure-20-53.jpg',\n",
509+
" './fig-origin/figure-18-32.jpg',\n",
510+
" './fig-origin/figure-32-72.jpg',\n",
511+
" './fig-origin/figure-28-55.jpg',\n",
512+
" './fig-origin/figure-2-5.jpg',\n",
513+
" './fig-origin/figure-3-23.jpg',\n",
514+
" './fig-origin/figure-38-89.jpg',\n",
515+
" './fig-origin/figure-18-33.jpg',\n",
516+
" './fig-origin/figure-42-93.jpg',\n",
517+
" './fig-origin/figure-18-47.jpg',\n",
518+
" './fig-origin/figure-28-57.jpg',\n",
519+
" './fig-origin/figure-18-35.jpg',\n",
520+
" './fig-origin/figure-30-62.jpg',\n",
521+
" './fig-origin/figure-3-14.jpg',\n",
522+
" './fig-origin/figure-29-60.jpg',\n",
523+
" './fig-origin/figure-3-17.jpg',\n",
524+
" './fig-origin/figure-35-78.jpg',\n",
525+
" './fig-origin/figure-35-84.jpg',\n",
526+
" './fig-origin/figure-30-66.jpg',\n",
527+
" './fig-origin/figure-3-22.jpg',\n",
528+
" './fig-origin/figure-3-19.jpg',\n",
529+
" './fig-origin/figure-18-38.jpg',\n",
530+
" './fig-origin/figure-20-51.jpg',\n",
531+
" './fig-origin/figure-32-69.jpg',\n",
532+
" './fig-origin/figure-30-64.jpg',\n",
533+
" './fig-origin/figure-16-27.jpg',\n",
534+
" './fig-origin/figure-17-30.jpg',\n",
535+
" './fig-origin/figure-3-20.jpg',\n",
536+
" './fig-origin/figure-34-77.jpg',\n",
537+
" './fig-origin/figure-33-73.jpg',\n",
538+
" './fig-origin/figure-40-92.jpg',\n",
539+
" './fig-origin/figure-15-24.jpg',\n",
540+
" './fig-origin/figure-3-16.jpg',\n",
541+
" './fig-origin/figure-31-68.jpg',\n",
542+
" './fig-origin/figure-3-6.jpg',\n",
543+
" './fig-origin/figure-35-79.jpg',\n",
544+
" './fig-origin/figure-3-11.jpg',\n",
545+
" './fig-origin/figure-18-31.jpg',\n",
546+
" './fig-origin/figure-18-34.jpg',\n",
547+
" './fig-origin/figure-1-4.jpg',\n",
548+
" './fig-origin/figure-16-28.jpg',\n",
549+
" './fig-origin/figure-18-37.jpg',\n",
550+
" './fig-origin/figure-18-43.jpg',\n",
551+
" './fig-origin/figure-32-70.jpg',\n",
552+
" './fig-origin/figure-18-41.jpg',\n",
553+
" './fig-origin/figure-20-52.jpg',\n",
554+
" './fig-origin/figure-35-86.jpg',\n",
555+
" './fig-origin/figure-18-36.jpg',\n",
556+
" './fig-origin/figure-23-54.jpg',\n",
557+
" './fig-origin/figure-3-7.jpg',\n",
558+
" './fig-origin/figure-39-91.jpg',\n",
559+
" './fig-origin/figure-18-39.jpg',\n",
560+
" './fig-origin/figure-35-81.jpg',\n",
561+
" './fig-origin/figure-1-1.jpg',\n",
562+
" './fig-origin/figure-31-67.jpg',\n",
563+
" './fig-origin/figure-37-87.jpg',\n",
564+
" './fig-origin/figure-18-48.jpg',\n",
565+
" './fig-origin/figure-16-26.jpg',\n",
566+
" './fig-origin/figure-1-3.jpg',\n",
567+
" './fig-origin/figure-39-90.jpg',\n",
568+
" './fig-origin/figure-3-18.jpg',\n",
569+
" './fig-origin/figure-34-76.jpg',\n",
570+
" './fig-origin/figure-34-74.jpg',\n",
571+
" './fig-origin/figure-3-13.jpg',\n",
572+
" './fig-origin/figure-35-82.jpg',\n",
573+
" './fig-origin/figure-18-45.jpg']"
574+
]
575+
},
576+
"execution_count": 12,
577+
"metadata": {},
578+
"output_type": "execute_result"
579+
}
580+
],
463581
"source": [
464-
"from distutils.dir_util import copy_tree\n",
465-
"from_file_path = './fig' # 복사할 폴더\n",
466-
"to_file_path = './fig-origin' # 복사 위치\n",
467-
"copy_tree(from_file_path, to_file_path)"
582+
"# from distutils.dir_util import copy_tree\n",
583+
"# from_file_path = './fig' # 복사할 폴더\n",
584+
"# to_file_path = './fig-origin' # 복사 위치\n",
585+
"# copy_tree(from_file_path, to_file_path)"
468586
]
469587
},
470588
{
471589
"cell_type": "code",
472-
"execution_count": null,
590+
"execution_count": 13,
473591
"id": "98d98c6e-868a-4aa3-9adc-34fd2e465491",
474592
"metadata": {},
475-
"outputs": [],
593+
"outputs": [
594+
{
595+
"ename": "OSError",
596+
"evalue": "[Errno 39] Directory not empty: './fig'",
597+
"output_type": "error",
598+
"traceback": [
599+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
600+
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
601+
"Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrmdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfrom_file_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m from_file_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./fig-origin\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 3\u001b[0m to_file_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./fig\u001b[39m\u001b[38;5;124m'\u001b[39m\n",
602+
"\u001b[0;31mOSError\u001b[0m: [Errno 39] Directory not empty: './fig'"
603+
]
604+
}
605+
],
476606
"source": [
477-
"os.rmdir(from_file_path)\n",
478-
"from_file_path = './fig-origin'\n",
479-
"to_file_path = './fig'\n",
480-
"copy_tree(from_file_path, to_file_path)"
607+
"# os.rmdir(from_file_path)\n",
608+
"# from_file_path = './fig-origin'\n",
609+
"# to_file_path = './fig'\n",
610+
"# copy_tree(from_file_path, to_file_path)"
481611
]
482612
},
483613
{
103 KB
16.1 KB
36.7 KB
57 KB
45.2 KB
26.5 KB
67.2 KB
36.7 KB
32.4 KB

0 commit comments

Comments
 (0)