initial upload

2023-07-19 00:47:09 -05:00
commit c993ea8b6a
13 changed files with 881 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,164 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+.creds
+tools/*.json
+.devcontainer
--- a/10
+++ b/10
@@ -0,0 +1,10 @@
+FROM python:3.11-bookworm
+
+WORKDIR /api
+
+COPY requirements.txt .
+COPY app/ ./app
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,18 @@
+# API
+A simple API built using FastAPI with some personal routes
+
+## Running
+You can run the API by running:
+```
+uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
+```
+
+## Routes
+### /ag-metadata
+Gets information from MyAnimeList
+
+### /ag-search
+Performs a fuzzy search similar to animeguess.moe
+
+# Tools
+Extra tools
--- a/app/init.py
+++ b/app/init.py
--- a/app/dependencies.py
+++ b/app/dependencies.py
@@ -0,0 +1,20 @@
+import json
+
+# Helper functions
+def load_animeguess_anime_list():
+    """
+    Load in the parsed anime list
+    """
+    global animeguess_anime_list
+    f = open('app/parsed-anime-list-mini.json')
+    animeguess_anime_list  = json.load(f)
+
+def return_animeguess_anime_list():
+    """
+    Return the parsed anime list
+
+    Returns:
+        JSON:
+            parsed anime list
+    """
+    return animeguess_anime_list
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,28 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+
+from .dependencies import load_animeguess_anime_list
+
+from .routers import animeguess
+
+# Load anime list
+load_animeguess_anime_list()
+
+app = FastAPI()
+
+# Middleware CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+
+# Routes
+app.include_router(animeguess.router)
+
+# Health check
+@app.get("/ping")
+async def ping():
+    return {"Ping":"Pong"}
--- a/app/parsed-anime-list-mini.json
+++ b/app/parsed-anime-list-mini.json
--- a/app/routers/init.py
+++ b/app/routers/init.py
--- a/app/routers/animeguess.py
+++ b/app/routers/animeguess.py
@@ -0,0 +1,145 @@
+import os
+import requests
+
+from ..dependencies import return_animeguess_anime_list
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from rapidfuzz import fuzz
+from operator import itemgetter
+
+router = APIRouter()
+
+# Helper functions
+def return_bad():
+    """
+    Returns HTTP 400 and message for a bad request
+
+    Returns:
+        JSON:
+            statusCode: 400
+            headers: Content-Type text/plain
+            body: 400: Bad request
+    """
+    return {
+        'statusCode': 400,
+        'headers': {
+            'Content-Type': 'text/plain'
+        },
+        'body': '400: Bad Request'
+    }
+
+# Models
+class Metadata(BaseModel):
+    id: str
+
+class Search(BaseModel):
+    query: str
+
+# Routes
+@router.post("/ag-metadata")
+async def ag_metadata(metadata: Metadata):
+    """
+    Gets the metadata from MyAnimeList based on the inputted name
+
+    Requires:
+        POST: 
+            'id': MyAnimeList ID of the anime
+
+    Returns:
+        JSON: 
+            metadata from MyAnimeList
+    """
+
+    if not metadata.id.isdigit():
+        return return_bad()
+
+    # MAL API key
+    CLIENT_ID = os.getenv('MAL_CLIENT_ID')
+
+    # Get data from MAL
+    url = f'https://api.myanimelist.net/v2/anime/{str(metadata.id)}?fields=title,studios,genres,mean,start_season'
+    resp = requests.get(url, headers={
+        'X-MAL-CLIENT-ID': CLIENT_ID
+    })
+
+    return resp.json()
+
+@router.post("/ag-search")
+async def ag_search(search: Search):
+    """
+    Perform a fuzzy search for an anime title
+
+    Requires:
+        POST - 'query': search query
+
+    Returns:
+        JSON - possible anime titles
+    """
+
+    # Ensure search is at least 3 characters
+    if len(search.query) < 3:
+        return return_bad()
+    search_item = search.query
+    
+    # Keep track of titles, ids, and scores that are possible
+    data = return_animeguess_anime_list()
+    anime_score_list = []
+
+    # Cycle through parsed list
+    for item in data:
+
+        # Perform fuzzy search against titles
+        fuzzy_search = fuzz.ratio(search_item.lower(), item['title'].lower())
+
+        # Check if search item is in the title
+        in_title = search_item.lower() in item['title'].lower()
+
+        # If the query is in the title or has a high fuzzy search score
+        if in_title or fuzzy_search > 65:
+
+            # If the query is in the title, give it a higher score
+            if in_title:
+                # If the query is the same as the title, give it the highest score
+                if search_item.lower() == item['title'].lower():
+                    anime_score_list.append({'title': item['title'], 'score': 200, 'mal_id': item['mal_id']})
+                else:
+                    anime_score_list.append({'title': item['title'], 'score': 100, 'mal_id': item['mal_id']})
+            # Else, set it to the fuzzy search score
+            else:
+                anime_score_list.append({'title': item['title'], 'score': fuzzy_search, 'mal_id': item['mal_id']})
+        
+        # Check synonyms if it's not in the title or a fuzzy search isn't matched
+        else:
+            for synonym in item['synonyms']:
+
+                # Perform fuzzy search against each synonym
+                fuzzy_search = fuzz.ratio(search_item.lower(), synonym.lower())
+
+                # Check if the search item is in the synonym
+                in_synonym = search_item.lower() in synonym.lower()
+
+                # If the query is in the synonym or has a high fuzzy search score
+                if in_synonym or fuzzy_search > 65:
+
+                    # If the query is in the synonym give it a higher score
+                    if in_synonym:
+                        # If the query is the same as the synonym, give it the highest score
+                        if search_item.lower() == synonym.lower():
+                            anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 200, 'mal_id': item['mal_id']})
+                        else:
+                            anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 100, 'mal_id': item['mal_id']})
+                        break
+                    # Else, give it the fuzzy search score
+                    else:
+                        anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': fuzzy_search, 'mal_id': item['mal_id']})
+                        break
+
+    # Sort possible anime titles by score
+    anime_score_list = sorted(anime_score_list, key=itemgetter('score'), reverse=True)
+
+    # Remove score key since it is un-needed after sort
+    for anime in anime_score_list:
+        anime = anime.pop('score')
+
+    return anime_score_list
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,17 @@
+annotated-types==0.5.0
+anyio==3.7.1
+certifi==2023.5.7
+charset-normalizer==3.2.0
+click==8.1.6
+fastapi==0.100.0
+h11==0.14.0
+idna==3.4
+pydantic==2.0.3
+pydantic_core==2.3.0
+rapidfuzz==3.1.1
+requests==2.31.0
+sniffio==1.3.0
+starlette==0.27.0
+typing_extensions==4.7.1
+urllib3==2.0.3
+uvicorn==0.23.1
--- a/tools/get-mal.py
+++ b/tools/get-mal.py
@@ -0,0 +1,53 @@
+import requests
+import time
+import json
+import os
+# Setup
+CLIENT_ID = os.getenv('MAL_CLIENT_ID')
+offset = 0
+anime_list = []
+
+# Get list of TV animes using MAL API
+while offset < 5000:
+
+    # Can only get 500 at a time
+    url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=tv&limit=500&offset={offset}'
+    resp = requests.get(url, headers={
+        'X-MAL-CLIENT-ID': CLIENT_ID
+    })
+    anime = resp.json()
+
+    # Add into our list
+    for node in anime['data']:
+        anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
+
+    # Start at the next 500
+    offset = offset + 500
+
+    # Let's not spam the MAL API
+    time.sleep(1)
+
+# Get the list of movie animes using MAL API
+offset = 0
+while offset < 250:
+    
+    # Limit to 250 entries at a time
+    url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=movie&limit=250&offset={offset}'
+    resp = requests.get(url, headers={
+        'X-MAL-CLIENT-ID': CLIENT_ID
+    })
+    anime = resp.json()
+
+    # Add into our list
+    for node in anime['data']:
+        anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
+
+    # Start at the next 250
+    offset = offset + 250
+
+    # Let's not spam the MAL API
+    time.sleep(1)
+    
+# Write to disk
+with open('mal.json', 'w') as f:
+    json.dump(anime_list, f, ensure_ascii=False, indent=4)
--- a/tools/match-mal-offlinedb.py
+++ b/tools/match-mal-offlinedb.py
@@ -0,0 +1,37 @@
+import json
+
+# From get-mal.py
+f = open('mal.json')
+mal_anime = json.load(f)
+
+# File from https://github.com/manami-project/anime-offline-database
+f = open('anime-offline-database.json')
+anime_db = json.load(f)
+
+# Setup
+matched_list = {'data': []}
+unmatched_list = []
+
+# Match the title from the MAL API to the offline DB
+for mal in mal_anime:
+    found = False
+
+    for anime in anime_db['data']:
+        if anime['title'] == mal['title']:
+            anime['mal_id'] = mal['id']
+            matched_list['data'].append(anime)
+            found = True
+            break
+
+    # Create an list of unmatched titles
+    if found == False:
+        unmatched_list.append(mal['title'])
+
+# Write to disk the matched titles
+with open('matched-anime-list.json', 'w') as f:
+    json.dump(matched_list, f, ensure_ascii=False, indent=2)
+
+# Print out unmatched titles
+print(f'Could not match the following, add manually (if wanted)\n: {unmatched_list}')
+
+    
--- a/tools/parse-anime.py
+++ b/tools/parse-anime.py
@@ -0,0 +1,388 @@
+import json
+import pandas as pd
+
+# Skip entries if these match exactly
+remove_anime = [
+    # Other seasons
+    "Initial D Fifth Stage",
+    "Initial D Final Stage",
+    "Initial D Fourth Stage",
+    "Initial D Second Stage",
+    "Initial D Third Stage",
+    "Tottoko Hamtaro (2012)",
+    "Tottoko Hamtaro Dechu",
+    "Tottoko Hamtarou Hai!",
+    "Tottoko Hamtarou: Hamu Hamu Paradichu!",
+    "Naruto (Shinsaku Anime)",
+    "Naruto SD: Rock Lee no Seishun Full-Power Ninden",
+    ".hack//Roots",
+    ".hack//Tasogare no Udewa Densetsu",
+    ".hack//The Movie: Sekai no Mukou ni",
+    "Akira (Shin Anime)",
+    "Eureka Seven AO",
+    "Escaflowne",
+    "Psycho-Pass RE:Start",
+    "Psycho-Pass 3",
+    "Gundam Seed Destiny HD Remaster",
+    "Gundam: G no Reconguista",
+    "Kidou Senshi Gundam SEED Destiny",
+    "Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
+    "Mobile Suit Gundam 00: 10th Anniversary Project",
+    "Mobile Suit Gundam Seed HD Remaster",
+    "Mobile Suit Gundam UC2",
+    "Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
+    "Space Gundam V",
+    "Gundam Build Fighters",
+    "Bleach: Sennen Kessen-hen",
+    "BLEACH: Sennen Kessen-hen 3rd Cour",
+    "BLEACH: Sennen Kessen-hen 4th Cour",
+    "Bocchi the Rock! Movie",
+    "Jujutsu Kaisen 0 Movie",
+    "Dragon Ball GT",
+    "Dragon Ball Kai",
+    "Dragon Ball Kai (2014)",
+    "Shingeki! Kyojin Chuugakkou",
+    "Meitantei Conan: Zero no Tea Time",
+    "Meitantei Conan: Hannin no Hanzawa-san",
+    "Mashin Eiyuuden Wataru 2",
+    "One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
+    "Gintama.: Porori-hen",
+    "Gintama.: Shirogane no Tamashii-hen",
+    "Hunter x Hunter (2011)",
+    "Huoyan Shan Lixian Ji",
+    "Huyao Xiao Hongniang Movie: Xia Sha",
+    "Fullmetal Alchemist",
+    "Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
+    "Mirai Shounen Conan 2: Taiga Daibouken",
+    "MIRROR",
+    "Pokemon Housoukyoku",
+    "Pokemon (2019)",
+    "Sword Art Online Alternative: Gun Gale Online",
+    "Sword Art Online II",
+    "Sword Art Online: Alicization",
+    "Sylvanian Families: Freya no Happy Diary",
+    "Sylvanian Families: Mini Story",
+    "Kino no Tabi: The Beautiful World - The Animated Series",
+    "Kanon",
+    "Clannad Movie",
+    "Toaru Majutsu no Index Movie: Endymion no Kiseki",
+    "Toaru Majutsu no Index II",
+    "Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
+    "Cowboy Bebop: Tengoku no Tobira",
+    "Suzumiya Haruhi no Shoushitsu",
+    "Koukaku Kidoutai Nyuumon Arise",
+    "Koukaku Kidoutai Arise: Alternative Architecture",
+    "Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
+    "Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
+    "Yu\u2606Gi\u2606Oh! 5D's",
+    "Yu\u2606Gi\u2606Oh! Arc-V",
+    "Yu\u2606Gi\u2606Oh! (Movie)",
+    "Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
+    "Yu\u2606Gi\u2606Oh! Go Rush!",
+    "Yu\u2606Gi\u2606Oh! Go Rush!!",
+    "Yu\u2606Gi\u2606Oh! Sevens",
+    "Yu\u2606Gi\u2606Oh! VRAINS",
+    "Yu\u2606Gi\u2606Oh! Zexal",
+    "Yu\u2606Gi\u2606Oh! Zexal Second",
+    "InuYasha: Kanketsu-hen",
+    "Lupin the Third: Mine Fujiko to Iu Onna",
+    "Hidan no Aria AA",
+    "Higashi no Eden: Air Communication",
+    "Higurashi no Naku Koro ni Gou",
+    "Higurashi no Naku Koro ni Sotsu",
+    "Himawari!!",
+    "Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
+    "Fairy Tail: 100 Years Quest",
+    "Hong Mao Lan Tu MTV",
+    "Fate\/stay night: Unlimited Blade Works",
+    "Fate\/Zero",
+    "Fate\/Zero Cafe",
+    "Final Fantasy VII: Advent Children - Venice Film Festival Footage",
+    "FLCL: Shoegaze",
+    "Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
+    "Fruits Basket 1st Season",
+    "Fruits Basket: Prelude",
+    "Fate/Extra: Last Encore",
+    "Fate/Apocrypha",
+    "Fate/Grand Order: Zettai Majuu Sensen Babylonia",
+    "Fate/Extra: Last Encore - Illustrias Tendousetsu",
+    "Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
+    "Fate/stay night: Unlimited Blade Works",
+    "Fate/Zero",
+    "Fate/Zero Cafe",
+    "Time Bokan 2000: Kaitou Kiramekiman",
+    "Time Bokan 24",
+    "Zombieland Saga Movie",
+    "Zoids: Chaotic Century",
+    "Zoids: Guardian Force",
+    "Queen's Blade: Rebellion",
+    "Queen's Blade: Gyokuza wo Tsugu Mono",
+    "Shen Bing Xiaojiang Movie",
+    "Kono Subarashii Sekai ni Bakuen wo!",
+    "Kono Subarashii Sekai ni Shukufuku wo! 2",
+    "Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
+    "Little Witch Academia: Mahoujikake no Parade",
+    "Gochuumon wa Usagi desu ka?? Dear My Sister",
+    "Break Blade Movie 3: Kyoujin no Ato",
+    "Saint\u2606Oniisan (Movie)",
+    "Bungou Stray Dogs: Dead Apple",
+    "Kidou Keisatsu Patlabor 2 the Movie",
+    "Quanzhi Gaoshou: Dianfeng Rongyao",
+    "Persona 3 the Movie 4: Winter of Rebirth",
+    "Luo Xiao Hei Zhan Ji (Movie)",
+    "Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
+    "Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
+    "Black Clover: Mahou Tei no Ken",
+    "Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
+    "Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
+    "Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
+    "Stand By Me Doraemon 2",
+    "Berserk: Ougon Jidai-hen III - Kourin",
+    "K-On! Movie",
+    "Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
+    "Saenai Heroine no Sodatekata Fine",
+    "Yuru Camp\u25b3 Movie",
+    "The First Slam Dunk",
+    "Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
+
+    # Similar synonyms 
+    "Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
+    "Fuxing Ba Jie",
+    "Onigiri",
+    
+]
+
+# Skip these entries if it's a movie AND contains one of these
+skip_movie_entries = [
+    "Detective Conan",
+    "Naruto",
+    "Psycho-Pass",
+    "Girls & Panzer",
+    "Eureka Seven",
+    "Hamtarou",
+    "Initial D",
+    "Gundam",
+    "Kimetsu no Yaiba",
+    "Boku no Hero Academia",
+    "Bleach",
+    "Dragon Ball",
+    "Attack on Titan",
+    "Code Geass",
+    "Made in Abyss",
+    "One Piece",
+    "JoJo's Bizarre Adventure",
+    "YuYu Hakusho",
+    "Haikyu!!",
+    "Gintama",
+    "Hunter x Hunter",
+    "Fullmetal Alchemist",
+    "Mirai Shounen Conan",
+    "Pokemon",
+    "Pororo",
+    "Power Battle Watch Car",
+    "Precure",
+    "Sword Art Online",
+    "Sylvanian Families",
+    "Kino no Tabi",
+    "Gekijouban",
+    "Ginga Tetsudou",
+    "GHOST IN THE SHELL",
+    "Ghost in the Shell",
+    "Yu\u2606Gi\u2606Oh!",
+    "InuYasha",
+    "Lupin III",
+    "Hibike! Euphonium",
+    "Himitsu no Akko-chan",
+    "Himitsukessha Taka no Tsume",
+    "Hinomaru Hatanosuke",
+    "FLCL",
+    "Free!",
+    "Fate/Grand Order",
+]
+
+# Skip these entries if it's a TV and contains one of these:
+skip_tv_entries = [
+    "Huo Xing Wa",
+    "Huoli Shaonian Wang",
+    "Huoxing Wa",
+    "Pocket Monsters XY",
+    "Pororo",
+    "Hime Chen",
+    "Himitsu no Akko-chan",
+    "Himitsukessha Taka no Tsume",
+    "Flowering Heart",
+    "Fu Guo",
+    "Fate/kaleid liner Prisma",
+    "Fei ",
+    "Gangtie Feilong",
+    "Kuaile ",
+    "Tianyan",
+    "Time Bokan Series",
+    "Lixian",
+    "Zhang ",
+    "Zhen ",
+    "Zhi ",
+    "Zui ",
+    "Zoids ",
+    "Zi ",
+    "Qi ",
+    "Quwei",
+    "Mengxiang",
+    "Xiao ",
+    "Xun",
+    "Liang",
+    "Xiaojiang",
+    "Shen ",
+    "Konglong",
+    "Xi ",
+    "Xiaolong",
+    "Xiaoxiong",
+    "Xiaoyuan",
+    "Xin ",
+    "Xing ",
+    "Xiaokang",
+    "Xiaohu",
+    "Xianggu",
+    "Wu ",
+    "Wudang"
+]
+
+# Skip enteries if it contains 'Season xx'
+skip_seasons_entries = [
+    "Season 0",
+    "Season 2",
+    "Season 3",
+    "Season 4",
+    "Season 5",
+    "Season 6",
+    "Season 7",
+    "Season 8",
+    "Season 9",
+    "Season 10",
+    "Season 11",
+    "Season 12",
+    "Season 13",
+    "Season 14",
+    "Season 15",
+    "Season 16",
+    "Season 17",
+    "Season 18",
+    "Season 19",
+    "Season 20",
+    "season 2",
+    "season 3",
+    "season 4",
+    "season 5",
+    "season 6",
+    "season 7",
+    "season 8",
+    "season 9",
+    "2nd Season",
+    "3rd Season",
+    "4th Season",
+    "5th Season",
+    "6th Season",
+    "7th Season",
+    "8th Season",
+    "9th Season",
+    "10th Season",
+    "11th Season",
+    "Second Season",
+    "Third Season",
+    "Season II",
+    "Season III",
+    "Season Two",
+    "Part 2",
+    "Part 3",
+    "Part 4",
+    "Part 5",
+    "Part 6",
+]
+
+f = open('matched-anime-list.json')
+data = json.load(f)
+parsed = [] # list of parsed names
+
+for i in data['data']:
+
+    # Only keep movies or TV shows
+    if i['type'] == 'MOVIE' or i['type'] == 'TV':
+
+        skip_loop = False
+
+        # Remove extra unwanted entries if it's in the title
+        if i['title'] in remove_anime:
+            continue
+        
+        # Remove unwanted entries if it's in the title AND a movie
+        if i['type'] == 'MOVIE':
+            for movies in skip_movie_entries:
+                if movies in i['title']:
+                    skip_loop = True
+                    break
+
+        if skip_loop == True:
+            continue
+
+        # Remove unwanted entries if it's in the title AND a TV
+        if i['type'] == 'TV':
+            for tv in skip_tv_entries:
+                if tv in i['title']:
+                    skip_loop = True
+                    break
+        
+        if skip_loop == True:
+            continue
+
+        # Remove unwanted if it's in the seasons
+        for seasons in skip_seasons_entries:
+            if seasons in i['title']:
+                skip_loop = True
+                break
+
+        if skip_loop == True:
+            continue
+
+        toss_based_on_synonym = False
+
+        # Cycle through the synonymns
+        new_synonyms = []
+        for j in i['synonyms']:
+
+            # Remove extra unwanted enteries if it's in the synonym
+            for seasons in skip_seasons_entries:
+                if seasons in j:
+                    toss_based_on_synonym = True
+                    break
+            
+            # Remove unwanted entries if it's a synonym AND a movie
+            if i['type'] == 'MOVIE':
+                for movies in skip_movie_entries:
+                    if movies in j:
+                        toss_based_on_synonym = True
+                        break
+            
+            if toss_based_on_synonym == True:
+                break
+
+            # Only keep synonyms that don't have unicode in them
+            if j.isascii():
+                new_synonyms.append(j)
+
+        if toss_based_on_synonym == True:
+            continue
+
+        i['synonyms'] = new_synonyms
+        parsed.append(i)
+
+
+# Convert to dataframe for further parsing
+df = pd.DataFrame(parsed)
+df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
+
+# Outputs
+df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
+
+# Remove additional columns for mini version
+df = df.drop(['type'], axis=1) # remove columns
+df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records')