Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ django-tags-input = "~=4.4.2"
greenlet = "~=0.4.15"
celery= "~=4.2.2"
redis = "~=3.2.1"
pdfkit = {ref = "master",git = "https://github.com/JazzCore/python-pdfkit.git",editable = true}
pdfkit = {editable = true,git = "https://github.com/JazzCore/python-pdfkit.git",ref = "master"}
favicon = ">=0.7.0"
django-hashid-field = ">=2.1.6"
python-magic-bin = {version = "*",sys_platform = "== 'win32'"}
django-currentuser = "==0.3.4"
django-rest-multiple-models = ">=2.1.3"
pypdf2 = "*"
9 changes: 8 additions & 1 deletion Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file added tests/fixtures/five_page.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/fixtures/text.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
texto
111 changes: 111 additions & 0 deletions tests/tests_artifact_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,114 @@ def test_if_document_saves_clear_thumbnails_caches(self):
document.search_thumbnail, search_thumbnail)
self.assertNotEqual(document.icon, icon)
self.assertNotEqual(document.thumbnails, thumbnails)

def test_pdf_document_num_pages(self):
"""
Testa a quantidade páginas informada para um documento em pdf
"""
document = Document()
self.assertIsNone(document.num_pages)
three_page_pdf = Path(os.path.dirname(__file__), './fixtures/pdf.pdf')
five_page_pdf = Path(os.path.dirname(__file__),
'./fixtures/five_page.pdf')
with self.open_as_django_file(three_page_pdf) as django_file:
document = Document(file=django_file)
document.save()
self.assertEqual(document.num_pages, 3)

with self.open_as_django_file(five_page_pdf) as django_file:
document.file = django_file
self.assertEqual(document.num_pages, 3)
document.save()
self.assertEqual(document.num_pages, 5)

def test_image_document_num_pages(self):
"""
Testa a quantidade páginas informada para um documento de imagem
"""
document = Document()
self.assertIsNone(document.num_pages)
image_file = Path(os.path.dirname(__file__), './fixtures/image.jpg')
with self.open_as_django_file(image_file) as django_file:
document = Document(file=django_file)
document.save()
self.assertEqual(document.num_pages, 1)

def test_unknown_document_num_pages(self):
"""
Testa a quantidade páginas informada para um documento de imagem
"""
document = Document()
self.assertIsNone(document.num_pages)
text_file = Path(os.path.dirname(__file__), './fixtures/text.txt')
with self.open_as_django_file(text_file) as django_file:
document = Document(file=django_file)
document.save()
self.assertEqual(document.num_pages, 1)

def test_pdf_file_page_previews(self):
"""
Testa a geração de imagens para cada página num arquivo pdf.
"""
document = Document()
self.assertIsNone(document.pages)
five_page_pdf = Path(os.path.dirname(__file__),
'./fixtures/five_page.pdf')
three_page_pdf = Path(os.path.dirname(__file__), './fixtures/pdf.pdf')
with self.open_as_django_file(five_page_pdf) as django_file:
document = Document(file=django_file)
document.save()
self.assertIsNotNone(document.pages)
self.assertEqual(len(document.pages), document.num_pages)

with self.open_as_django_file(three_page_pdf) as django_file:
document.file = django_file
document.save()
self.assertIsNotNone(document.pages)
self.assertEqual(len(document.pages), document.num_pages)

def test_unknown_file_pages(self):
"""
Um documento do tipo que não seja pdf deve retornar None
"""
document = Document()
self.assertIsNone(document.pages)
text_file = Path(os.path.dirname(__file__), './fixtures/text.txt')
three_page_pdf = Path(os.path.dirname(__file__), './fixtures/pdf.pdf')
with self.open_as_django_file(text_file) as django_file:
document = Document(file=django_file)
document.save()
self.assertIsNone(document.pages)

with self.open_as_django_file(three_page_pdf) as django_file:
document.file = django_file
document.save()
self.assertIsNotNone(document.pages)
self.assertEqual(len(document.pages), document.num_pages)

def test_pages_file_mime_type(self):
"""
O mimetype do documento da página deve ser uma imagem jpeg.
"""
three_page_pdf = Path(os.path.dirname(__file__), './fixtures/pdf.pdf')
with self.open_as_django_file(three_page_pdf) as django_file:
document = Document(file=django_file)
document.save()
for page in document.pages:
self.assertEqual(page.mime_type, 'image/jpeg')

def test_document_delete_pages(self):
"""
Os arquivos das imagens das páginas ser deletados quando um documento for salvo.
"""
three_page_pdf = Path(os.path.dirname(__file__), './fixtures/pdf.pdf')
generated_files = []
with self.open_as_django_file(three_page_pdf) as django_file:
document = Document(file=django_file)
document.save()
for page in document.pages:
self.assertTrue(os.path.exists(page.file.path))
generated_files.append(page.file.path)
document.save()
for generated_file in generated_files:
self.assertFalse(os.path.exists(generated_file))
24 changes: 22 additions & 2 deletions xram_memory/artifact/admin/models/document.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from easy_thumbnails.widgets import ImageClearableFileInput
from easy_thumbnails.fields import ThumbnailerField
from tags_input import admin as tags_input_admin
from xram_memory.artifact.models import Document
from xram_memory.artifact.models import Document, DocumentPage
from filer.admin.fileadmin import FileAdmin
from django.contrib import admin

Expand Down Expand Up @@ -30,12 +30,32 @@ class DocumentAdmin(FileAdmin, tags_input_admin.TagsInputAdmin):
date_hierarchy = 'uploaded_at'
search_fields = ('name',)

def num_pages(self, object):
return object.num_pages
num_pages.short_description = "Número de páginas"


DocumentAdmin.readonly_fields = DocumentAdmin.readonly_fields + \
('mime_type', 'document_id',)
('mime_type', 'document_id', 'num_pages')
DocumentAdmin.fieldsets = FileAdmin.build_fieldsets(extra_main_fields=('document_id', 'keywords',
'subjects', 'published_date',
'num_pages',
),
extra_advanced_fields=(
"mime_type",)
)


@admin.register(DocumentPage)
class DocumentPageAdmin(DocumentAdmin):
def has_view_permission(self, request, obj=None):
return False

def has_delete_permission(self, request, obj=None):
return False

def has_change_permission(self, request, obj=None):
return False

def has_add_permission(self, request, obj=None):
return False
27 changes: 27 additions & 0 deletions xram_memory/artifact/migrations/0012_documentpage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 2.1.14 on 2019-12-06 20:49

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('artifact', '0011_auto_20190529_1034'),
]

operations = [
migrations.CreateModel(
name='DocumentPage',
fields=[
('document_ptr', models.OneToOneField(auto_created=True, on_delete=django.db.models.deletion.CASCADE, parent_link=True, primary_key=True, serialize=False, to='artifact.Document')),
('page_index', models.PositiveIntegerField(verbose_name='Página')),
('parent_document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='document_pages', to='artifact.Document')),
],
options={
'abstract': False,
'base_manager_name': 'objects',
},
bases=('artifact.document',),
),
]
2 changes: 1 addition & 1 deletion xram_memory/artifact/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .artifact import Artifact
from .documents import Document
from .newspaper import Newspaper
from .documents import Document, DocumentPage
from .news import News, NewsImageCapture, NewsPDFCapture
106 changes: 103 additions & 3 deletions xram_memory/artifact/models/documents.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,25 @@
import os
import magic
from pathlib import Path
from django.db import models
from filer.models import File
from .artifact import Artifact
from PyPDF2 import PdfFileReader
from django.conf import settings
from django.db import transaction
from hashid_field import HashidField
from django.utils.timezone import now
from django.utils.text import slugify
from xram_memory.utils import FileValidator
from filer import settings as filer_settings
from boltons.cacheutils import cachedproperty
from django.core.files.base import ContentFile
from django.core.files import File as DjangoFile
from easy_thumbnails.files import get_thumbnailer
from easy_thumbnails.fields import ThumbnailerField
from xram_memory.taxonomy.models import Keyword, Subject
from xram_memory.lib.file_previews.pdf import generate_pdf_page_thumbnails
from django.urls import reverse


class Document(File):
Expand Down Expand Up @@ -129,10 +136,17 @@ def search_thumbnail(self):

@cachedproperty
def thumbnails(self):
return self.get_thumbnails()

def get_thumbnails(self, custom_aliases=[]):
"""
Retorna uma lista de thumbnails geradas
Retorna uma lista de thumbnails geradas.
As visualizações são geradas sob demanda.
"""
thumbnails_aliases = settings.THUMBNAIL_ALIASES[''].keys()
if len(custom_aliases):
thumbnails_aliases = custom_aliases
else:
thumbnails_aliases = settings.THUMBNAIL_ALIASES[''].keys()
generated_thumbnails = {}
try:
for alias in thumbnails_aliases:
Expand Down Expand Up @@ -163,13 +177,85 @@ def matches_file_type(cls, iname, ifile, request):
# Este será o modelo genérico para todos os tipos de arquivo, em substituição ao do Filer
return True

@cachedproperty
def num_pages(self):
"""
Retorna o número de páginas de um documento.
Atualmente, somente documentos pdf tem mais de uma página.
"""
try:
if not self.file:
raise ValueError
if self.mime_type == 'application/pdf':
return len(self.pages)
else:
return 1
except Exception as e:
return None

@cachedproperty
def pages(self):
"""
Se este documento for do tipo pdf, retorna uma lista com um documento para cada página.
Lazy-load: gera as páginas se não existirem.
"""
if not self.mime_type == 'application/pdf':
return None
try:
pages = DocumentPage.objects.filter(
parent_document=self).order_by('page_index')
if not len(pages):
self.generate_pages()
return self.pages
return pages
except:
return None

@transaction.atomic
def delete_pages(self):
"""
Deleta as páginas-documento deste documento.
"""
previews = DocumentPage.objects.filter(
parent_document=self).order_by('page_index')
for preview in previews:
preview.delete()

@transaction.atomic
def generate_pages(self):
"""
Gera as páginas-documento deste documento.
"""
if not self.mime_type == 'application/pdf':
return

with generate_pdf_page_thumbnails(self.file, last_page=None, fmt='jpeg') as images:
for page_index, image in enumerate(images):
with open(image.filename, 'rb') as fd:
django_file = DjangoFile(fd, name=image.filename)
filename = "{}_{}".format(
Path(self.file.name).name, page_index)
DocumentPage.objects.create(
file=django_file,
is_public=True,
is_user_object=False,
name=filename,
original_filename=image.filename,
page_index=page_index,
parent_document=self,
published_date=now(),
)

def save(self, *args, **kwargs):
# Se o documento não tiver nome, use o nome do arquivo
if not self.name:
self.name = self.label
super().save(*args, **kwargs)
# Não tente gerar imagens de páginas para documentos que são páginas eles mesmos
if not isinstance(self, DocumentPage):
self.delete_pages()
# limpe o cache das flags/campos, pois o arquivo pode ter mudado
for attr_name in ['thumbnail', 'search_thumbnail', 'icon', 'thumbnails', 'related_news']:
for attr_name in ['thumbnail', 'search_thumbnail', 'icon', 'thumbnails', 'related_news', 'num_pages', 'pages']:
try:
delattr(self, attr_name)
except AttributeError:
Expand Down Expand Up @@ -199,3 +285,17 @@ def related_news(self):
return []
else:
return news_items


class DocumentPage(Document):
parent_document = models.ForeignKey(
Document, on_delete=models.CASCADE, related_name="document_pages")
page_index = models.PositiveIntegerField("Página")

class Meta:
verbose_name = "Página de documento"
verbose_name_plural = "Páginas de documentos"

def delete(self, *args, **kwargs):
os.remove(self.file.path)
super().delete(*args, *kwargs)
Loading