dtam-api/app/routers/animeguess.py

import os
import requests

from ..dependencies import return_animeguess_anime_list
from fastapi import APIRouter
from pydantic import BaseModel

from rapidfuzz import fuzz
from operator import itemgetter

import boto3
import json

router = APIRouter()

# Helper functions
def return_bad():
    """
    Returns HTTP 400 and message for a bad request

    Returns:
        JSON:
            statusCode: 400
            headers: Content-Type text/plain
            body: 400: Bad request
    """
    return {
        'statusCode': 400,
        'headers': {
            'Content-Type': 'text/plain'
        },
        'body': '400: Bad Request'
    }

# Models
class Metadata(BaseModel):
    id: str

class Search(BaseModel):
    query: str

# Routes
@router.post("/ag-metadata")
async def ag_metadata(metadata: Metadata):
    """
    Gets the metadata from MyAnimeList based on the inputted name

    Requires:
        POST:
            'id': MyAnimeList ID of the anime

    Returns:
        JSON:
            metadata from MyAnimeList
    """

    if not metadata.id.isdigit():
        return return_bad()

    # MAL API key
    CLIENT_ID = os.getenv('MAL_CLIENT_ID')

    # Get data from MAL
    url = f'https://api.myanimelist.net/v2/anime/{str(metadata.id)}?fields=title,studios,genres,mean,start_season,alternative_titles'
    resp = requests.get(url, headers={
        'X-MAL-CLIENT-ID': CLIENT_ID
    })

    return resp.json()

@router.post("/ag-search")
async def ag_search(search: Search):
    """
    Perform a fuzzy search for an anime title

    Requires:
        POST - 'query': search query

    Returns:
        JSON - possible anime titles
    """

    # Ensure search is at least 3 characters
    if len(search.query) < 3:
        return return_bad()
    search_item = search.query

    # Keep track of titles, ids, and scores that are possible
    data = return_animeguess_anime_list()
    anime_score_list = []

    # Cycle through parsed list
    for item in data:

        # Perform fuzzy search against titles
        fuzzy_search = fuzz.ratio(search_item.lower(), item['title'].lower())

        # Check if search item is in the title
        in_title = search_item.lower() in item['title'].lower()

        # If the query is in the title or has a high fuzzy search score
        if in_title or fuzzy_search > 65:

            # If the query is in the title, give it a higher score
            if in_title:
                # If the query is the same as the title, give it the highest score
                if search_item.lower() == item['title'].lower():
                    anime_score_list.append({'title': item['title'], 'score': 200, 'mal_id': item['mal_id']})
                else:
                    anime_score_list.append({'title': item['title'], 'score': 100, 'mal_id': item['mal_id']})
            # Else, set it to the fuzzy search score
            else:
                anime_score_list.append({'title': item['title'], 'score': fuzzy_search, 'mal_id': item['mal_id']})

        # Check synonyms if it's not in the title or a fuzzy search isn't matched
        else:
            for synonym in item['synonyms']:

                # Perform fuzzy search against each synonym
                fuzzy_search = fuzz.ratio(search_item.lower(), synonym.lower())

                # Check if the search item is in the synonym
                in_synonym = search_item.lower() in synonym.lower()

                # If the query is in the synonym or has a high fuzzy search score
                if in_synonym or fuzzy_search > 65:

                    # If the query is in the synonym give it a higher score
                    if in_synonym:
                        # If the query is the same as the synonym, give it the highest score
                        if search_item.lower() == synonym.lower():
                            anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 200, 'mal_id': item['mal_id']})
                        else:
                            anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 100, 'mal_id': item['mal_id']})
                        break
                    # Else, give it the fuzzy search score
                    else:
                        anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': fuzzy_search, 'mal_id': item['mal_id']})
                        break

    # Sort possible anime titles by score
    anime_score_list = sorted(anime_score_list, key=itemgetter('score'), reverse=True)

    # Remove score key since it is un-needed after sort
    for anime in anime_score_list:
        anime = anime.pop('score')

    return anime_score_list

@router.get("/ag-list")
async def ag_list():
    source_name = os.getenv('SOURCE_BUCKET_NAME')

    # Log into AWS
    session = boto3.Session( aws_access_key_id=os.getenv('ACCESS_KEY'), aws_secret_access_key=os.getenv('SECRET_ACCESS_KEY'))
    s3 = session.resource('s3')
    source_bucket = s3.Bucket(source_name)

    # list of anime titles
    anime_titles = []

    # get titles through S3
    for obj in source_bucket.objects.filter(Prefix="days/"):
        if obj.key.endswith("metadata.json"):
            metadata_file = obj.get()['Body'].read()
            metadata = json.loads(metadata_file)
            anime_titles.append({'title': f'{metadata["answer"]} [{metadata["eng_title"]}]'})

    return anime_titles