# This is a very simple recommender system that recommends posts based on the
# current post user is reading.

import numpy as np
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from .context_processors import add_excerpt, add_num_comments
from .models import Post


def next_read(post):
    current_post = Post.objects.get(id=post.id)
    posts = Post.objects.filter(is_public=True).exclude(id=current_post.id)

    if len(posts) < 2:
        return None

    # Our method is very simple. First we compare the bodies of the posts to
    # find the similarity between them. Then we sort the posts based on their
    # similarity and return the post with the highest similarity.
    #
    # If no post has similarity > 0.5, we return the post with the highest
    # number of views, preferably in the same category. If there is no post in
    # the same category, we return the post with the highest number of views
    # regardless of the category.

    vectorizer = TfidfVectorizer(stop_words="english")
    vectors = vectorizer.fit_transform(
        [BeautifulSoup(post.body, "html.parser").text for post in posts]
    )
    current_vector = vectorizer.transform([current_post.body])
    similarity = cosine_similarity(current_vector, vectors).flatten()
    similarity = np.nan_to_num(similarity)
    max_similarity = np.argmax(similarity)

    post = posts[int(max_similarity)]
    post.excerpt = add_excerpt(post)
    post.num_comments = add_num_comments(post)
    return post