diff --git a/later42/libs/content.py b/later42/libs/content.py index 29cde73..6b16c1f 100644 --- a/later42/libs/content.py +++ b/later42/libs/content.py @@ -1,7 +1,5 @@ -import requests - from bs4 import BeautifulSoup -from django.conf import settings +from newspaper import Article, Config def sanitize_img_size(html: str): @@ -13,10 +11,10 @@ def sanitize_img_size(html: str): def get_content(url: str): - if settings.READABILITY_HOST: - url = settings.READABILITY_HOST.rstrip( - '/') + '/api/content/v1/parser?url=' + url - try: - return requests.get(url).json() - except KeyError: - return None + config = Config() + config.keep_article_html = True + article = Article(url, config=config) + article.download() + article.parse() + + return article diff --git a/later42/migrations/0007_article_img.py b/later42/migrations/0007_article_img.py new file mode 100644 index 0000000..edc8703 --- /dev/null +++ b/later42/migrations/0007_article_img.py @@ -0,0 +1,18 @@ +# Generated by Django 4.1.3 on 2022-11-30 06:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('later42', '0006_remove_url_content_remove_url_title_article_short_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='article', + name='img', + field=models.URLField(blank=True, null=True), + ), + ] diff --git a/later42/models/article.py b/later42/models/article.py index 895eaf2..8b4cedd 100644 --- a/later42/models/article.py +++ b/later42/models/article.py @@ -8,3 +8,4 @@ class Article(models.Model): content = models.TextField(blank=True, null=True) title = models.CharField(max_length=2000, blank=True, null=True) short = models.TextField(blank=True, null=True) + img = models.URLField(blank=True, null=True) diff --git a/later42/tasks.py b/later42/tasks.py index 12b1c86..243b666 100644 --- a/later42/tasks.py +++ b/later42/tasks.py @@ -21,8 +21,6 @@ if AIRBRAKE_PROJECT_ID is not None and AIRBRAKE_PROJECT_KEY is not None: @shared_task() def get_url_content_task(url, user_id): - print(url) - print(user_id) user = User.objects.get(pk=int(user_id)) url_object = URL(url=url, user=user) url_object.save() @@ -30,7 +28,10 @@ def get_url_content_task(url, user_id): data = get_content(url) article = Article.objects.create(url=url_object) - article.content = data['rich_content'] - article.title = data['title'] - article.short = data['excerpt'] + article.content = data.article_html + article.title = data.title + article.short = data.text[:150] + if data.has_top_image(): + article.img = data.top_image + article.save() diff --git a/later42/templates/index.html b/later42/templates/index.html index 5a033ee..fcc1116 100644 --- a/later42/templates/index.html +++ b/later42/templates/index.html @@ -15,6 +15,13 @@ + {% if d.img %} +
+
+
+
+