diff --git a/later42/libs/content.py b/later42/libs/content.py index 29cde73..6b16c1f 100644 --- a/later42/libs/content.py +++ b/later42/libs/content.py @@ -1,7 +1,5 @@ -import requests - from bs4 import BeautifulSoup -from django.conf import settings +from newspaper import Article, Config def sanitize_img_size(html: str): @@ -13,10 +11,10 @@ def sanitize_img_size(html: str): def get_content(url: str): - if settings.READABILITY_HOST: - url = settings.READABILITY_HOST.rstrip( - '/') + '/api/content/v1/parser?url=' + url - try: - return requests.get(url).json() - except KeyError: - return None + config = Config() + config.keep_article_html = True + article = Article(url, config=config) + article.download() + article.parse() + + return article diff --git a/later42/migrations/0007_article_img.py b/later42/migrations/0007_article_img.py new file mode 100644 index 0000000..edc8703 --- /dev/null +++ b/later42/migrations/0007_article_img.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2022-11-30 06:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('later42', '0006_remove_url_content_remove_url_title_article_short_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='article', + name='img', + field=models.URLField(blank=True, null=True), + ), + ] diff --git a/later42/models/article.py b/later42/models/article.py index 895eaf2..8b4cedd 100644 --- a/later42/models/article.py +++ b/later42/models/article.py @@ -8,3 +8,4 @@ class Article(models.Model): content = models.TextField(blank=True, null=True) title = models.CharField(max_length=2000, blank=True, null=True) short = models.TextField(blank=True, null=True) + img = models.URLField(blank=True, null=True) diff --git a/later42/tasks.py b/later42/tasks.py index 12b1c86..243b666 100644 --- a/later42/tasks.py +++ b/later42/tasks.py @@ -21,8 +21,6 @@ if AIRBRAKE_PROJECT_ID is not None and AIRBRAKE_PROJECT_KEY is not None: @shared_task() def get_url_content_task(url, user_id): - print(url) - print(user_id) user = User.objects.get(pk=int(user_id)) url_object = URL(url=url, user=user) url_object.save() @@ -30,7 +28,10 @@ def get_url_content_task(url, user_id): data = get_content(url) article = Article.objects.create(url=url_object) - article.content = data['rich_content'] - article.title = data['title'] - article.short = data['excerpt'] + article.content = data.article_html + article.title = data.title + article.short = data.text[:150] + if data.has_top_image(): + article.img = data.top_image + article.save() diff --git a/later42/templates/index.html b/later42/templates/index.html index 5a033ee..fcc1116 100644 --- a/later42/templates/index.html +++ b/later42/templates/index.html @@ -15,6 +15,13 @@ + {% if d.img %} +

+ + + +

+ {% endif %} {% if d.short %}

{{ d.short }} diff --git a/later42/views/reader.py b/later42/views/reader.py index 1920413..250b35a 100644 --- a/later42/views/reader.py +++ b/later42/views/reader.py @@ -1,8 +1,5 @@ -from multiprocessing import context from django.contrib.auth.decorators import login_required -from django.shortcuts import render, redirect -from django.core.paginator import Paginator -from django.conf import settings +from django.shortcuts import render from later42.libs.content import get_content, sanitize_img_size from later42.models.article import Article from later42.models.urls import URL @@ -15,11 +12,11 @@ def get(request, url_id=None): content = {} try: article = Article.objects.get(url=url) - content['title'] = url.title + content['title'] = article.title content['url'] = url.url content['rich_content'] = sanitize_img_size(article.content) except: content = get_content(url.url) - content['rich_content'] = sanitize_img_size(content['rich_content']) + content['rich_content'] = sanitize_img_size(content.article_html) context = {'url': url, 'content': content} return render(request, 'reader.html', context) diff --git a/requirements.txt b/requirements.txt index cee8400..b4e04f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ six==1.16.0 celery[redis]==5.2.7 pybrake==1.10.0 sentry-sdk==1.11.0 +newspaper3k==0.2.8