initial upload
This commit is contained in:
164
.gitignore
vendored
Normal file
164
.gitignore
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
.creds
|
||||
tools/*.json
|
||||
.devcontainer
|
10
Dockerfile
Normal file
10
Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
||||
FROM python:3.11-bookworm
|
||||
|
||||
WORKDIR /api
|
||||
|
||||
COPY requirements.txt .
|
||||
COPY app/ ./app
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
18
README.md
Normal file
18
README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# API
|
||||
A simple API built using FastAPI with some personal routes
|
||||
|
||||
## Running
|
||||
You can run the API by running:
|
||||
```
|
||||
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||
```
|
||||
|
||||
## Routes
|
||||
### /ag-metadata
|
||||
Gets information from MyAnimeList
|
||||
|
||||
### /ag-search
|
||||
Performs a fuzzy search similar to animeguess.moe
|
||||
|
||||
# Tools
|
||||
Extra tools
|
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
20
app/dependencies.py
Normal file
20
app/dependencies.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import json
|
||||
|
||||
# Helper functions
|
||||
def load_animeguess_anime_list():
|
||||
"""
|
||||
Load in the parsed anime list
|
||||
"""
|
||||
global animeguess_anime_list
|
||||
f = open('app/parsed-anime-list-mini.json')
|
||||
animeguess_anime_list = json.load(f)
|
||||
|
||||
def return_animeguess_anime_list():
|
||||
"""
|
||||
Return the parsed anime list
|
||||
|
||||
Returns:
|
||||
JSON:
|
||||
parsed anime list
|
||||
"""
|
||||
return animeguess_anime_list
|
28
app/main.py
Normal file
28
app/main.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from .dependencies import load_animeguess_anime_list
|
||||
|
||||
from .routers import animeguess
|
||||
|
||||
# Load anime list
|
||||
load_animeguess_anime_list()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# Middleware CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"]
|
||||
)
|
||||
|
||||
# Routes
|
||||
app.include_router(animeguess.router)
|
||||
|
||||
# Health check
|
||||
@app.get("/ping")
|
||||
async def ping():
|
||||
return {"Ping":"Pong"}
|
1
app/parsed-anime-list-mini.json
Normal file
1
app/parsed-anime-list-mini.json
Normal file
File diff suppressed because one or more lines are too long
0
app/routers/__init__.py
Normal file
0
app/routers/__init__.py
Normal file
145
app/routers/animeguess.py
Normal file
145
app/routers/animeguess.py
Normal file
@@ -0,0 +1,145 @@
|
||||
import os
|
||||
import requests
|
||||
|
||||
from ..dependencies import return_animeguess_anime_list
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from rapidfuzz import fuzz
|
||||
from operator import itemgetter
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Helper functions
|
||||
def return_bad():
|
||||
"""
|
||||
Returns HTTP 400 and message for a bad request
|
||||
|
||||
Returns:
|
||||
JSON:
|
||||
statusCode: 400
|
||||
headers: Content-Type text/plain
|
||||
body: 400: Bad request
|
||||
"""
|
||||
return {
|
||||
'statusCode': 400,
|
||||
'headers': {
|
||||
'Content-Type': 'text/plain'
|
||||
},
|
||||
'body': '400: Bad Request'
|
||||
}
|
||||
|
||||
# Models
|
||||
class Metadata(BaseModel):
|
||||
id: str
|
||||
|
||||
class Search(BaseModel):
|
||||
query: str
|
||||
|
||||
# Routes
|
||||
@router.post("/ag-metadata")
|
||||
async def ag_metadata(metadata: Metadata):
|
||||
"""
|
||||
Gets the metadata from MyAnimeList based on the inputted name
|
||||
|
||||
Requires:
|
||||
POST:
|
||||
'id': MyAnimeList ID of the anime
|
||||
|
||||
Returns:
|
||||
JSON:
|
||||
metadata from MyAnimeList
|
||||
"""
|
||||
|
||||
if not metadata.id.isdigit():
|
||||
return return_bad()
|
||||
|
||||
# MAL API key
|
||||
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
|
||||
|
||||
# Get data from MAL
|
||||
url = f'https://api.myanimelist.net/v2/anime/{str(metadata.id)}?fields=title,studios,genres,mean,start_season'
|
||||
resp = requests.get(url, headers={
|
||||
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||
})
|
||||
|
||||
return resp.json()
|
||||
|
||||
@router.post("/ag-search")
|
||||
async def ag_search(search: Search):
|
||||
"""
|
||||
Perform a fuzzy search for an anime title
|
||||
|
||||
Requires:
|
||||
POST - 'query': search query
|
||||
|
||||
Returns:
|
||||
JSON - possible anime titles
|
||||
"""
|
||||
|
||||
# Ensure search is at least 3 characters
|
||||
if len(search.query) < 3:
|
||||
return return_bad()
|
||||
search_item = search.query
|
||||
|
||||
# Keep track of titles, ids, and scores that are possible
|
||||
data = return_animeguess_anime_list()
|
||||
anime_score_list = []
|
||||
|
||||
# Cycle through parsed list
|
||||
for item in data:
|
||||
|
||||
# Perform fuzzy search against titles
|
||||
fuzzy_search = fuzz.ratio(search_item.lower(), item['title'].lower())
|
||||
|
||||
# Check if search item is in the title
|
||||
in_title = search_item.lower() in item['title'].lower()
|
||||
|
||||
# If the query is in the title or has a high fuzzy search score
|
||||
if in_title or fuzzy_search > 65:
|
||||
|
||||
# If the query is in the title, give it a higher score
|
||||
if in_title:
|
||||
# If the query is the same as the title, give it the highest score
|
||||
if search_item.lower() == item['title'].lower():
|
||||
anime_score_list.append({'title': item['title'], 'score': 200, 'mal_id': item['mal_id']})
|
||||
else:
|
||||
anime_score_list.append({'title': item['title'], 'score': 100, 'mal_id': item['mal_id']})
|
||||
# Else, set it to the fuzzy search score
|
||||
else:
|
||||
anime_score_list.append({'title': item['title'], 'score': fuzzy_search, 'mal_id': item['mal_id']})
|
||||
|
||||
# Check synonyms if it's not in the title or a fuzzy search isn't matched
|
||||
else:
|
||||
for synonym in item['synonyms']:
|
||||
|
||||
# Perform fuzzy search against each synonym
|
||||
fuzzy_search = fuzz.ratio(search_item.lower(), synonym.lower())
|
||||
|
||||
# Check if the search item is in the synonym
|
||||
in_synonym = search_item.lower() in synonym.lower()
|
||||
|
||||
# If the query is in the synonym or has a high fuzzy search score
|
||||
if in_synonym or fuzzy_search > 65:
|
||||
|
||||
# If the query is in the synonym give it a higher score
|
||||
if in_synonym:
|
||||
# If the query is the same as the synonym, give it the highest score
|
||||
if search_item.lower() == synonym.lower():
|
||||
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 200, 'mal_id': item['mal_id']})
|
||||
else:
|
||||
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 100, 'mal_id': item['mal_id']})
|
||||
break
|
||||
# Else, give it the fuzzy search score
|
||||
else:
|
||||
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': fuzzy_search, 'mal_id': item['mal_id']})
|
||||
break
|
||||
|
||||
# Sort possible anime titles by score
|
||||
anime_score_list = sorted(anime_score_list, key=itemgetter('score'), reverse=True)
|
||||
|
||||
# Remove score key since it is un-needed after sort
|
||||
for anime in anime_score_list:
|
||||
anime = anime.pop('score')
|
||||
|
||||
return anime_score_list
|
17
requirements.txt
Normal file
17
requirements.txt
Normal file
@@ -0,0 +1,17 @@
|
||||
annotated-types==0.5.0
|
||||
anyio==3.7.1
|
||||
certifi==2023.5.7
|
||||
charset-normalizer==3.2.0
|
||||
click==8.1.6
|
||||
fastapi==0.100.0
|
||||
h11==0.14.0
|
||||
idna==3.4
|
||||
pydantic==2.0.3
|
||||
pydantic_core==2.3.0
|
||||
rapidfuzz==3.1.1
|
||||
requests==2.31.0
|
||||
sniffio==1.3.0
|
||||
starlette==0.27.0
|
||||
typing_extensions==4.7.1
|
||||
urllib3==2.0.3
|
||||
uvicorn==0.23.1
|
53
tools/get-mal.py
Normal file
53
tools/get-mal.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
# Setup
|
||||
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
|
||||
offset = 0
|
||||
anime_list = []
|
||||
|
||||
# Get list of TV animes using MAL API
|
||||
while offset < 5000:
|
||||
|
||||
# Can only get 500 at a time
|
||||
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=tv&limit=500&offset={offset}'
|
||||
resp = requests.get(url, headers={
|
||||
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||
})
|
||||
anime = resp.json()
|
||||
|
||||
# Add into our list
|
||||
for node in anime['data']:
|
||||
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||
|
||||
# Start at the next 500
|
||||
offset = offset + 500
|
||||
|
||||
# Let's not spam the MAL API
|
||||
time.sleep(1)
|
||||
|
||||
# Get the list of movie animes using MAL API
|
||||
offset = 0
|
||||
while offset < 250:
|
||||
|
||||
# Limit to 250 entries at a time
|
||||
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=movie&limit=250&offset={offset}'
|
||||
resp = requests.get(url, headers={
|
||||
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||
})
|
||||
anime = resp.json()
|
||||
|
||||
# Add into our list
|
||||
for node in anime['data']:
|
||||
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||
|
||||
# Start at the next 250
|
||||
offset = offset + 250
|
||||
|
||||
# Let's not spam the MAL API
|
||||
time.sleep(1)
|
||||
|
||||
# Write to disk
|
||||
with open('mal.json', 'w') as f:
|
||||
json.dump(anime_list, f, ensure_ascii=False, indent=4)
|
37
tools/match-mal-offlinedb.py
Normal file
37
tools/match-mal-offlinedb.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import json
|
||||
|
||||
# From get-mal.py
|
||||
f = open('mal.json')
|
||||
mal_anime = json.load(f)
|
||||
|
||||
# File from https://github.com/manami-project/anime-offline-database
|
||||
f = open('anime-offline-database.json')
|
||||
anime_db = json.load(f)
|
||||
|
||||
# Setup
|
||||
matched_list = {'data': []}
|
||||
unmatched_list = []
|
||||
|
||||
# Match the title from the MAL API to the offline DB
|
||||
for mal in mal_anime:
|
||||
found = False
|
||||
|
||||
for anime in anime_db['data']:
|
||||
if anime['title'] == mal['title']:
|
||||
anime['mal_id'] = mal['id']
|
||||
matched_list['data'].append(anime)
|
||||
found = True
|
||||
break
|
||||
|
||||
# Create an list of unmatched titles
|
||||
if found == False:
|
||||
unmatched_list.append(mal['title'])
|
||||
|
||||
# Write to disk the matched titles
|
||||
with open('matched-anime-list.json', 'w') as f:
|
||||
json.dump(matched_list, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Print out unmatched titles
|
||||
print(f'Could not match the following, add manually (if wanted)\n: {unmatched_list}')
|
||||
|
||||
|
388
tools/parse-anime.py
Normal file
388
tools/parse-anime.py
Normal file
@@ -0,0 +1,388 @@
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
# Skip entries if these match exactly
|
||||
remove_anime = [
|
||||
# Other seasons
|
||||
"Initial D Fifth Stage",
|
||||
"Initial D Final Stage",
|
||||
"Initial D Fourth Stage",
|
||||
"Initial D Second Stage",
|
||||
"Initial D Third Stage",
|
||||
"Tottoko Hamtaro (2012)",
|
||||
"Tottoko Hamtaro Dechu",
|
||||
"Tottoko Hamtarou Hai!",
|
||||
"Tottoko Hamtarou: Hamu Hamu Paradichu!",
|
||||
"Naruto (Shinsaku Anime)",
|
||||
"Naruto SD: Rock Lee no Seishun Full-Power Ninden",
|
||||
".hack//Roots",
|
||||
".hack//Tasogare no Udewa Densetsu",
|
||||
".hack//The Movie: Sekai no Mukou ni",
|
||||
"Akira (Shin Anime)",
|
||||
"Eureka Seven AO",
|
||||
"Escaflowne",
|
||||
"Psycho-Pass RE:Start",
|
||||
"Psycho-Pass 3",
|
||||
"Gundam Seed Destiny HD Remaster",
|
||||
"Gundam: G no Reconguista",
|
||||
"Kidou Senshi Gundam SEED Destiny",
|
||||
"Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
|
||||
"Mobile Suit Gundam 00: 10th Anniversary Project",
|
||||
"Mobile Suit Gundam Seed HD Remaster",
|
||||
"Mobile Suit Gundam UC2",
|
||||
"Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
|
||||
"Space Gundam V",
|
||||
"Gundam Build Fighters",
|
||||
"Bleach: Sennen Kessen-hen",
|
||||
"BLEACH: Sennen Kessen-hen 3rd Cour",
|
||||
"BLEACH: Sennen Kessen-hen 4th Cour",
|
||||
"Bocchi the Rock! Movie",
|
||||
"Jujutsu Kaisen 0 Movie",
|
||||
"Dragon Ball GT",
|
||||
"Dragon Ball Kai",
|
||||
"Dragon Ball Kai (2014)",
|
||||
"Shingeki! Kyojin Chuugakkou",
|
||||
"Meitantei Conan: Zero no Tea Time",
|
||||
"Meitantei Conan: Hannin no Hanzawa-san",
|
||||
"Mashin Eiyuuden Wataru 2",
|
||||
"One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
|
||||
"Gintama.: Porori-hen",
|
||||
"Gintama.: Shirogane no Tamashii-hen",
|
||||
"Hunter x Hunter (2011)",
|
||||
"Huoyan Shan Lixian Ji",
|
||||
"Huyao Xiao Hongniang Movie: Xia Sha",
|
||||
"Fullmetal Alchemist",
|
||||
"Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
|
||||
"Mirai Shounen Conan 2: Taiga Daibouken",
|
||||
"MIRROR",
|
||||
"Pokemon Housoukyoku",
|
||||
"Pokemon (2019)",
|
||||
"Sword Art Online Alternative: Gun Gale Online",
|
||||
"Sword Art Online II",
|
||||
"Sword Art Online: Alicization",
|
||||
"Sylvanian Families: Freya no Happy Diary",
|
||||
"Sylvanian Families: Mini Story",
|
||||
"Kino no Tabi: The Beautiful World - The Animated Series",
|
||||
"Kanon",
|
||||
"Clannad Movie",
|
||||
"Toaru Majutsu no Index Movie: Endymion no Kiseki",
|
||||
"Toaru Majutsu no Index II",
|
||||
"Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
|
||||
"Cowboy Bebop: Tengoku no Tobira",
|
||||
"Suzumiya Haruhi no Shoushitsu",
|
||||
"Koukaku Kidoutai Nyuumon Arise",
|
||||
"Koukaku Kidoutai Arise: Alternative Architecture",
|
||||
"Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
|
||||
"Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
|
||||
"Yu\u2606Gi\u2606Oh! 5D's",
|
||||
"Yu\u2606Gi\u2606Oh! Arc-V",
|
||||
"Yu\u2606Gi\u2606Oh! (Movie)",
|
||||
"Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
|
||||
"Yu\u2606Gi\u2606Oh! Go Rush!",
|
||||
"Yu\u2606Gi\u2606Oh! Go Rush!!",
|
||||
"Yu\u2606Gi\u2606Oh! Sevens",
|
||||
"Yu\u2606Gi\u2606Oh! VRAINS",
|
||||
"Yu\u2606Gi\u2606Oh! Zexal",
|
||||
"Yu\u2606Gi\u2606Oh! Zexal Second",
|
||||
"InuYasha: Kanketsu-hen",
|
||||
"Lupin the Third: Mine Fujiko to Iu Onna",
|
||||
"Hidan no Aria AA",
|
||||
"Higashi no Eden: Air Communication",
|
||||
"Higurashi no Naku Koro ni Gou",
|
||||
"Higurashi no Naku Koro ni Sotsu",
|
||||
"Himawari!!",
|
||||
"Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
|
||||
"Fairy Tail: 100 Years Quest",
|
||||
"Hong Mao Lan Tu MTV",
|
||||
"Fate\/stay night: Unlimited Blade Works",
|
||||
"Fate\/Zero",
|
||||
"Fate\/Zero Cafe",
|
||||
"Final Fantasy VII: Advent Children - Venice Film Festival Footage",
|
||||
"FLCL: Shoegaze",
|
||||
"Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
|
||||
"Fruits Basket 1st Season",
|
||||
"Fruits Basket: Prelude",
|
||||
"Fate/Extra: Last Encore",
|
||||
"Fate/Apocrypha",
|
||||
"Fate/Grand Order: Zettai Majuu Sensen Babylonia",
|
||||
"Fate/Extra: Last Encore - Illustrias Tendousetsu",
|
||||
"Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
|
||||
"Fate/stay night: Unlimited Blade Works",
|
||||
"Fate/Zero",
|
||||
"Fate/Zero Cafe",
|
||||
"Time Bokan 2000: Kaitou Kiramekiman",
|
||||
"Time Bokan 24",
|
||||
"Zombieland Saga Movie",
|
||||
"Zoids: Chaotic Century",
|
||||
"Zoids: Guardian Force",
|
||||
"Queen's Blade: Rebellion",
|
||||
"Queen's Blade: Gyokuza wo Tsugu Mono",
|
||||
"Shen Bing Xiaojiang Movie",
|
||||
"Kono Subarashii Sekai ni Bakuen wo!",
|
||||
"Kono Subarashii Sekai ni Shukufuku wo! 2",
|
||||
"Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
|
||||
"Little Witch Academia: Mahoujikake no Parade",
|
||||
"Gochuumon wa Usagi desu ka?? Dear My Sister",
|
||||
"Break Blade Movie 3: Kyoujin no Ato",
|
||||
"Saint\u2606Oniisan (Movie)",
|
||||
"Bungou Stray Dogs: Dead Apple",
|
||||
"Kidou Keisatsu Patlabor 2 the Movie",
|
||||
"Quanzhi Gaoshou: Dianfeng Rongyao",
|
||||
"Persona 3 the Movie 4: Winter of Rebirth",
|
||||
"Luo Xiao Hei Zhan Ji (Movie)",
|
||||
"Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
|
||||
"Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
|
||||
"Black Clover: Mahou Tei no Ken",
|
||||
"Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
|
||||
"Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
|
||||
"Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
|
||||
"Stand By Me Doraemon 2",
|
||||
"Berserk: Ougon Jidai-hen III - Kourin",
|
||||
"K-On! Movie",
|
||||
"Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
|
||||
"Saenai Heroine no Sodatekata Fine",
|
||||
"Yuru Camp\u25b3 Movie",
|
||||
"The First Slam Dunk",
|
||||
"Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
|
||||
|
||||
# Similar synonyms
|
||||
"Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
|
||||
"Fuxing Ba Jie",
|
||||
"Onigiri",
|
||||
|
||||
]
|
||||
|
||||
# Skip these entries if it's a movie AND contains one of these
|
||||
skip_movie_entries = [
|
||||
"Detective Conan",
|
||||
"Naruto",
|
||||
"Psycho-Pass",
|
||||
"Girls & Panzer",
|
||||
"Eureka Seven",
|
||||
"Hamtarou",
|
||||
"Initial D",
|
||||
"Gundam",
|
||||
"Kimetsu no Yaiba",
|
||||
"Boku no Hero Academia",
|
||||
"Bleach",
|
||||
"Dragon Ball",
|
||||
"Attack on Titan",
|
||||
"Code Geass",
|
||||
"Made in Abyss",
|
||||
"One Piece",
|
||||
"JoJo's Bizarre Adventure",
|
||||
"YuYu Hakusho",
|
||||
"Haikyu!!",
|
||||
"Gintama",
|
||||
"Hunter x Hunter",
|
||||
"Fullmetal Alchemist",
|
||||
"Mirai Shounen Conan",
|
||||
"Pokemon",
|
||||
"Pororo",
|
||||
"Power Battle Watch Car",
|
||||
"Precure",
|
||||
"Sword Art Online",
|
||||
"Sylvanian Families",
|
||||
"Kino no Tabi",
|
||||
"Gekijouban",
|
||||
"Ginga Tetsudou",
|
||||
"GHOST IN THE SHELL",
|
||||
"Ghost in the Shell",
|
||||
"Yu\u2606Gi\u2606Oh!",
|
||||
"InuYasha",
|
||||
"Lupin III",
|
||||
"Hibike! Euphonium",
|
||||
"Himitsu no Akko-chan",
|
||||
"Himitsukessha Taka no Tsume",
|
||||
"Hinomaru Hatanosuke",
|
||||
"FLCL",
|
||||
"Free!",
|
||||
"Fate/Grand Order",
|
||||
]
|
||||
|
||||
# Skip these entries if it's a TV and contains one of these:
|
||||
skip_tv_entries = [
|
||||
"Huo Xing Wa",
|
||||
"Huoli Shaonian Wang",
|
||||
"Huoxing Wa",
|
||||
"Pocket Monsters XY",
|
||||
"Pororo",
|
||||
"Hime Chen",
|
||||
"Himitsu no Akko-chan",
|
||||
"Himitsukessha Taka no Tsume",
|
||||
"Flowering Heart",
|
||||
"Fu Guo",
|
||||
"Fate/kaleid liner Prisma",
|
||||
"Fei ",
|
||||
"Gangtie Feilong",
|
||||
"Kuaile ",
|
||||
"Tianyan",
|
||||
"Time Bokan Series",
|
||||
"Lixian",
|
||||
"Zhang ",
|
||||
"Zhen ",
|
||||
"Zhi ",
|
||||
"Zui ",
|
||||
"Zoids ",
|
||||
"Zi ",
|
||||
"Qi ",
|
||||
"Quwei",
|
||||
"Mengxiang",
|
||||
"Xiao ",
|
||||
"Xun",
|
||||
"Liang",
|
||||
"Xiaojiang",
|
||||
"Shen ",
|
||||
"Konglong",
|
||||
"Xi ",
|
||||
"Xiaolong",
|
||||
"Xiaoxiong",
|
||||
"Xiaoyuan",
|
||||
"Xin ",
|
||||
"Xing ",
|
||||
"Xiaokang",
|
||||
"Xiaohu",
|
||||
"Xianggu",
|
||||
"Wu ",
|
||||
"Wudang"
|
||||
]
|
||||
|
||||
# Skip enteries if it contains 'Season xx'
|
||||
skip_seasons_entries = [
|
||||
"Season 0",
|
||||
"Season 2",
|
||||
"Season 3",
|
||||
"Season 4",
|
||||
"Season 5",
|
||||
"Season 6",
|
||||
"Season 7",
|
||||
"Season 8",
|
||||
"Season 9",
|
||||
"Season 10",
|
||||
"Season 11",
|
||||
"Season 12",
|
||||
"Season 13",
|
||||
"Season 14",
|
||||
"Season 15",
|
||||
"Season 16",
|
||||
"Season 17",
|
||||
"Season 18",
|
||||
"Season 19",
|
||||
"Season 20",
|
||||
"season 2",
|
||||
"season 3",
|
||||
"season 4",
|
||||
"season 5",
|
||||
"season 6",
|
||||
"season 7",
|
||||
"season 8",
|
||||
"season 9",
|
||||
"2nd Season",
|
||||
"3rd Season",
|
||||
"4th Season",
|
||||
"5th Season",
|
||||
"6th Season",
|
||||
"7th Season",
|
||||
"8th Season",
|
||||
"9th Season",
|
||||
"10th Season",
|
||||
"11th Season",
|
||||
"Second Season",
|
||||
"Third Season",
|
||||
"Season II",
|
||||
"Season III",
|
||||
"Season Two",
|
||||
"Part 2",
|
||||
"Part 3",
|
||||
"Part 4",
|
||||
"Part 5",
|
||||
"Part 6",
|
||||
]
|
||||
|
||||
f = open('matched-anime-list.json')
|
||||
data = json.load(f)
|
||||
parsed = [] # list of parsed names
|
||||
|
||||
for i in data['data']:
|
||||
|
||||
# Only keep movies or TV shows
|
||||
if i['type'] == 'MOVIE' or i['type'] == 'TV':
|
||||
|
||||
skip_loop = False
|
||||
|
||||
# Remove extra unwanted entries if it's in the title
|
||||
if i['title'] in remove_anime:
|
||||
continue
|
||||
|
||||
# Remove unwanted entries if it's in the title AND a movie
|
||||
if i['type'] == 'MOVIE':
|
||||
for movies in skip_movie_entries:
|
||||
if movies in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
# Remove unwanted entries if it's in the title AND a TV
|
||||
if i['type'] == 'TV':
|
||||
for tv in skip_tv_entries:
|
||||
if tv in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
# Remove unwanted if it's in the seasons
|
||||
for seasons in skip_seasons_entries:
|
||||
if seasons in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
toss_based_on_synonym = False
|
||||
|
||||
# Cycle through the synonymns
|
||||
new_synonyms = []
|
||||
for j in i['synonyms']:
|
||||
|
||||
# Remove extra unwanted enteries if it's in the synonym
|
||||
for seasons in skip_seasons_entries:
|
||||
if seasons in j:
|
||||
toss_based_on_synonym = True
|
||||
break
|
||||
|
||||
# Remove unwanted entries if it's a synonym AND a movie
|
||||
if i['type'] == 'MOVIE':
|
||||
for movies in skip_movie_entries:
|
||||
if movies in j:
|
||||
toss_based_on_synonym = True
|
||||
break
|
||||
|
||||
if toss_based_on_synonym == True:
|
||||
break
|
||||
|
||||
# Only keep synonyms that don't have unicode in them
|
||||
if j.isascii():
|
||||
new_synonyms.append(j)
|
||||
|
||||
if toss_based_on_synonym == True:
|
||||
continue
|
||||
|
||||
i['synonyms'] = new_synonyms
|
||||
parsed.append(i)
|
||||
|
||||
|
||||
# Convert to dataframe for further parsing
|
||||
df = pd.DataFrame(parsed)
|
||||
df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
|
||||
|
||||
# Outputs
|
||||
df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
|
||||
|
||||
# Remove additional columns for mini version
|
||||
df = df.drop(['type'], axis=1) # remove columns
|
||||
df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records')
|
Reference in New Issue
Block a user