initial upload
This commit is contained in:
164
.gitignore
vendored
Normal file
164
.gitignore
vendored
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
#poetry.lock
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
#pdm.lock
|
||||||
|
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||||
|
# in version control.
|
||||||
|
# https://pdm.fming.dev/#use-with-ide
|
||||||
|
.pdm.toml
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
#.idea/
|
||||||
|
|
||||||
|
.creds
|
||||||
|
tools/*.json
|
||||||
|
.devcontainer
|
10
Dockerfile
Normal file
10
Dockerfile
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
FROM python:3.11-bookworm
|
||||||
|
|
||||||
|
WORKDIR /api
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
COPY app/ ./app
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
18
README.md
Normal file
18
README.md
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
# API
|
||||||
|
A simple API built using FastAPI with some personal routes
|
||||||
|
|
||||||
|
## Running
|
||||||
|
You can run the API by running:
|
||||||
|
```
|
||||||
|
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
## Routes
|
||||||
|
### /ag-metadata
|
||||||
|
Gets information from MyAnimeList
|
||||||
|
|
||||||
|
### /ag-search
|
||||||
|
Performs a fuzzy search similar to animeguess.moe
|
||||||
|
|
||||||
|
# Tools
|
||||||
|
Extra tools
|
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
20
app/dependencies.py
Normal file
20
app/dependencies.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
# Helper functions
|
||||||
|
def load_animeguess_anime_list():
|
||||||
|
"""
|
||||||
|
Load in the parsed anime list
|
||||||
|
"""
|
||||||
|
global animeguess_anime_list
|
||||||
|
f = open('app/parsed-anime-list-mini.json')
|
||||||
|
animeguess_anime_list = json.load(f)
|
||||||
|
|
||||||
|
def return_animeguess_anime_list():
|
||||||
|
"""
|
||||||
|
Return the parsed anime list
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON:
|
||||||
|
parsed anime list
|
||||||
|
"""
|
||||||
|
return animeguess_anime_list
|
28
app/main.py
Normal file
28
app/main.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
from .dependencies import load_animeguess_anime_list
|
||||||
|
|
||||||
|
from .routers import animeguess
|
||||||
|
|
||||||
|
# Load anime list
|
||||||
|
load_animeguess_anime_list()
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
# Middleware CORS
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Routes
|
||||||
|
app.include_router(animeguess.router)
|
||||||
|
|
||||||
|
# Health check
|
||||||
|
@app.get("/ping")
|
||||||
|
async def ping():
|
||||||
|
return {"Ping":"Pong"}
|
1
app/parsed-anime-list-mini.json
Normal file
1
app/parsed-anime-list-mini.json
Normal file
File diff suppressed because one or more lines are too long
0
app/routers/__init__.py
Normal file
0
app/routers/__init__.py
Normal file
145
app/routers/animeguess.py
Normal file
145
app/routers/animeguess.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
import os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ..dependencies import return_animeguess_anime_list
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
from operator import itemgetter
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
# Helper functions
|
||||||
|
def return_bad():
|
||||||
|
"""
|
||||||
|
Returns HTTP 400 and message for a bad request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON:
|
||||||
|
statusCode: 400
|
||||||
|
headers: Content-Type text/plain
|
||||||
|
body: 400: Bad request
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
'statusCode': 400,
|
||||||
|
'headers': {
|
||||||
|
'Content-Type': 'text/plain'
|
||||||
|
},
|
||||||
|
'body': '400: Bad Request'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Models
|
||||||
|
class Metadata(BaseModel):
|
||||||
|
id: str
|
||||||
|
|
||||||
|
class Search(BaseModel):
|
||||||
|
query: str
|
||||||
|
|
||||||
|
# Routes
|
||||||
|
@router.post("/ag-metadata")
|
||||||
|
async def ag_metadata(metadata: Metadata):
|
||||||
|
"""
|
||||||
|
Gets the metadata from MyAnimeList based on the inputted name
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
POST:
|
||||||
|
'id': MyAnimeList ID of the anime
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON:
|
||||||
|
metadata from MyAnimeList
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not metadata.id.isdigit():
|
||||||
|
return return_bad()
|
||||||
|
|
||||||
|
# MAL API key
|
||||||
|
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
|
||||||
|
|
||||||
|
# Get data from MAL
|
||||||
|
url = f'https://api.myanimelist.net/v2/anime/{str(metadata.id)}?fields=title,studios,genres,mean,start_season'
|
||||||
|
resp = requests.get(url, headers={
|
||||||
|
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||||
|
})
|
||||||
|
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
@router.post("/ag-search")
|
||||||
|
async def ag_search(search: Search):
|
||||||
|
"""
|
||||||
|
Perform a fuzzy search for an anime title
|
||||||
|
|
||||||
|
Requires:
|
||||||
|
POST - 'query': search query
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON - possible anime titles
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Ensure search is at least 3 characters
|
||||||
|
if len(search.query) < 3:
|
||||||
|
return return_bad()
|
||||||
|
search_item = search.query
|
||||||
|
|
||||||
|
# Keep track of titles, ids, and scores that are possible
|
||||||
|
data = return_animeguess_anime_list()
|
||||||
|
anime_score_list = []
|
||||||
|
|
||||||
|
# Cycle through parsed list
|
||||||
|
for item in data:
|
||||||
|
|
||||||
|
# Perform fuzzy search against titles
|
||||||
|
fuzzy_search = fuzz.ratio(search_item.lower(), item['title'].lower())
|
||||||
|
|
||||||
|
# Check if search item is in the title
|
||||||
|
in_title = search_item.lower() in item['title'].lower()
|
||||||
|
|
||||||
|
# If the query is in the title or has a high fuzzy search score
|
||||||
|
if in_title or fuzzy_search > 65:
|
||||||
|
|
||||||
|
# If the query is in the title, give it a higher score
|
||||||
|
if in_title:
|
||||||
|
# If the query is the same as the title, give it the highest score
|
||||||
|
if search_item.lower() == item['title'].lower():
|
||||||
|
anime_score_list.append({'title': item['title'], 'score': 200, 'mal_id': item['mal_id']})
|
||||||
|
else:
|
||||||
|
anime_score_list.append({'title': item['title'], 'score': 100, 'mal_id': item['mal_id']})
|
||||||
|
# Else, set it to the fuzzy search score
|
||||||
|
else:
|
||||||
|
anime_score_list.append({'title': item['title'], 'score': fuzzy_search, 'mal_id': item['mal_id']})
|
||||||
|
|
||||||
|
# Check synonyms if it's not in the title or a fuzzy search isn't matched
|
||||||
|
else:
|
||||||
|
for synonym in item['synonyms']:
|
||||||
|
|
||||||
|
# Perform fuzzy search against each synonym
|
||||||
|
fuzzy_search = fuzz.ratio(search_item.lower(), synonym.lower())
|
||||||
|
|
||||||
|
# Check if the search item is in the synonym
|
||||||
|
in_synonym = search_item.lower() in synonym.lower()
|
||||||
|
|
||||||
|
# If the query is in the synonym or has a high fuzzy search score
|
||||||
|
if in_synonym or fuzzy_search > 65:
|
||||||
|
|
||||||
|
# If the query is in the synonym give it a higher score
|
||||||
|
if in_synonym:
|
||||||
|
# If the query is the same as the synonym, give it the highest score
|
||||||
|
if search_item.lower() == synonym.lower():
|
||||||
|
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 200, 'mal_id': item['mal_id']})
|
||||||
|
else:
|
||||||
|
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': 100, 'mal_id': item['mal_id']})
|
||||||
|
break
|
||||||
|
# Else, give it the fuzzy search score
|
||||||
|
else:
|
||||||
|
anime_score_list.append({'title': f'{item["title"]} [{synonym}]', 'score': fuzzy_search, 'mal_id': item['mal_id']})
|
||||||
|
break
|
||||||
|
|
||||||
|
# Sort possible anime titles by score
|
||||||
|
anime_score_list = sorted(anime_score_list, key=itemgetter('score'), reverse=True)
|
||||||
|
|
||||||
|
# Remove score key since it is un-needed after sort
|
||||||
|
for anime in anime_score_list:
|
||||||
|
anime = anime.pop('score')
|
||||||
|
|
||||||
|
return anime_score_list
|
17
requirements.txt
Normal file
17
requirements.txt
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
annotated-types==0.5.0
|
||||||
|
anyio==3.7.1
|
||||||
|
certifi==2023.5.7
|
||||||
|
charset-normalizer==3.2.0
|
||||||
|
click==8.1.6
|
||||||
|
fastapi==0.100.0
|
||||||
|
h11==0.14.0
|
||||||
|
idna==3.4
|
||||||
|
pydantic==2.0.3
|
||||||
|
pydantic_core==2.3.0
|
||||||
|
rapidfuzz==3.1.1
|
||||||
|
requests==2.31.0
|
||||||
|
sniffio==1.3.0
|
||||||
|
starlette==0.27.0
|
||||||
|
typing_extensions==4.7.1
|
||||||
|
urllib3==2.0.3
|
||||||
|
uvicorn==0.23.1
|
53
tools/get-mal.py
Normal file
53
tools/get-mal.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
# Setup
|
||||||
|
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
|
||||||
|
offset = 0
|
||||||
|
anime_list = []
|
||||||
|
|
||||||
|
# Get list of TV animes using MAL API
|
||||||
|
while offset < 5000:
|
||||||
|
|
||||||
|
# Can only get 500 at a time
|
||||||
|
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=tv&limit=500&offset={offset}'
|
||||||
|
resp = requests.get(url, headers={
|
||||||
|
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||||
|
})
|
||||||
|
anime = resp.json()
|
||||||
|
|
||||||
|
# Add into our list
|
||||||
|
for node in anime['data']:
|
||||||
|
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||||
|
|
||||||
|
# Start at the next 500
|
||||||
|
offset = offset + 500
|
||||||
|
|
||||||
|
# Let's not spam the MAL API
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Get the list of movie animes using MAL API
|
||||||
|
offset = 0
|
||||||
|
while offset < 250:
|
||||||
|
|
||||||
|
# Limit to 250 entries at a time
|
||||||
|
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=movie&limit=250&offset={offset}'
|
||||||
|
resp = requests.get(url, headers={
|
||||||
|
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||||
|
})
|
||||||
|
anime = resp.json()
|
||||||
|
|
||||||
|
# Add into our list
|
||||||
|
for node in anime['data']:
|
||||||
|
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||||
|
|
||||||
|
# Start at the next 250
|
||||||
|
offset = offset + 250
|
||||||
|
|
||||||
|
# Let's not spam the MAL API
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Write to disk
|
||||||
|
with open('mal.json', 'w') as f:
|
||||||
|
json.dump(anime_list, f, ensure_ascii=False, indent=4)
|
37
tools/match-mal-offlinedb.py
Normal file
37
tools/match-mal-offlinedb.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
# From get-mal.py
|
||||||
|
f = open('mal.json')
|
||||||
|
mal_anime = json.load(f)
|
||||||
|
|
||||||
|
# File from https://github.com/manami-project/anime-offline-database
|
||||||
|
f = open('anime-offline-database.json')
|
||||||
|
anime_db = json.load(f)
|
||||||
|
|
||||||
|
# Setup
|
||||||
|
matched_list = {'data': []}
|
||||||
|
unmatched_list = []
|
||||||
|
|
||||||
|
# Match the title from the MAL API to the offline DB
|
||||||
|
for mal in mal_anime:
|
||||||
|
found = False
|
||||||
|
|
||||||
|
for anime in anime_db['data']:
|
||||||
|
if anime['title'] == mal['title']:
|
||||||
|
anime['mal_id'] = mal['id']
|
||||||
|
matched_list['data'].append(anime)
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Create an list of unmatched titles
|
||||||
|
if found == False:
|
||||||
|
unmatched_list.append(mal['title'])
|
||||||
|
|
||||||
|
# Write to disk the matched titles
|
||||||
|
with open('matched-anime-list.json', 'w') as f:
|
||||||
|
json.dump(matched_list, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
# Print out unmatched titles
|
||||||
|
print(f'Could not match the following, add manually (if wanted)\n: {unmatched_list}')
|
||||||
|
|
||||||
|
|
388
tools/parse-anime.py
Normal file
388
tools/parse-anime.py
Normal file
@@ -0,0 +1,388 @@
|
|||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Skip entries if these match exactly
|
||||||
|
remove_anime = [
|
||||||
|
# Other seasons
|
||||||
|
"Initial D Fifth Stage",
|
||||||
|
"Initial D Final Stage",
|
||||||
|
"Initial D Fourth Stage",
|
||||||
|
"Initial D Second Stage",
|
||||||
|
"Initial D Third Stage",
|
||||||
|
"Tottoko Hamtaro (2012)",
|
||||||
|
"Tottoko Hamtaro Dechu",
|
||||||
|
"Tottoko Hamtarou Hai!",
|
||||||
|
"Tottoko Hamtarou: Hamu Hamu Paradichu!",
|
||||||
|
"Naruto (Shinsaku Anime)",
|
||||||
|
"Naruto SD: Rock Lee no Seishun Full-Power Ninden",
|
||||||
|
".hack//Roots",
|
||||||
|
".hack//Tasogare no Udewa Densetsu",
|
||||||
|
".hack//The Movie: Sekai no Mukou ni",
|
||||||
|
"Akira (Shin Anime)",
|
||||||
|
"Eureka Seven AO",
|
||||||
|
"Escaflowne",
|
||||||
|
"Psycho-Pass RE:Start",
|
||||||
|
"Psycho-Pass 3",
|
||||||
|
"Gundam Seed Destiny HD Remaster",
|
||||||
|
"Gundam: G no Reconguista",
|
||||||
|
"Kidou Senshi Gundam SEED Destiny",
|
||||||
|
"Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
|
||||||
|
"Mobile Suit Gundam 00: 10th Anniversary Project",
|
||||||
|
"Mobile Suit Gundam Seed HD Remaster",
|
||||||
|
"Mobile Suit Gundam UC2",
|
||||||
|
"Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
|
||||||
|
"Space Gundam V",
|
||||||
|
"Gundam Build Fighters",
|
||||||
|
"Bleach: Sennen Kessen-hen",
|
||||||
|
"BLEACH: Sennen Kessen-hen 3rd Cour",
|
||||||
|
"BLEACH: Sennen Kessen-hen 4th Cour",
|
||||||
|
"Bocchi the Rock! Movie",
|
||||||
|
"Jujutsu Kaisen 0 Movie",
|
||||||
|
"Dragon Ball GT",
|
||||||
|
"Dragon Ball Kai",
|
||||||
|
"Dragon Ball Kai (2014)",
|
||||||
|
"Shingeki! Kyojin Chuugakkou",
|
||||||
|
"Meitantei Conan: Zero no Tea Time",
|
||||||
|
"Meitantei Conan: Hannin no Hanzawa-san",
|
||||||
|
"Mashin Eiyuuden Wataru 2",
|
||||||
|
"One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
|
||||||
|
"Gintama.: Porori-hen",
|
||||||
|
"Gintama.: Shirogane no Tamashii-hen",
|
||||||
|
"Hunter x Hunter (2011)",
|
||||||
|
"Huoyan Shan Lixian Ji",
|
||||||
|
"Huyao Xiao Hongniang Movie: Xia Sha",
|
||||||
|
"Fullmetal Alchemist",
|
||||||
|
"Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
|
||||||
|
"Mirai Shounen Conan 2: Taiga Daibouken",
|
||||||
|
"MIRROR",
|
||||||
|
"Pokemon Housoukyoku",
|
||||||
|
"Pokemon (2019)",
|
||||||
|
"Sword Art Online Alternative: Gun Gale Online",
|
||||||
|
"Sword Art Online II",
|
||||||
|
"Sword Art Online: Alicization",
|
||||||
|
"Sylvanian Families: Freya no Happy Diary",
|
||||||
|
"Sylvanian Families: Mini Story",
|
||||||
|
"Kino no Tabi: The Beautiful World - The Animated Series",
|
||||||
|
"Kanon",
|
||||||
|
"Clannad Movie",
|
||||||
|
"Toaru Majutsu no Index Movie: Endymion no Kiseki",
|
||||||
|
"Toaru Majutsu no Index II",
|
||||||
|
"Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
|
||||||
|
"Cowboy Bebop: Tengoku no Tobira",
|
||||||
|
"Suzumiya Haruhi no Shoushitsu",
|
||||||
|
"Koukaku Kidoutai Nyuumon Arise",
|
||||||
|
"Koukaku Kidoutai Arise: Alternative Architecture",
|
||||||
|
"Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
|
||||||
|
"Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
|
||||||
|
"Yu\u2606Gi\u2606Oh! 5D's",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Arc-V",
|
||||||
|
"Yu\u2606Gi\u2606Oh! (Movie)",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Go Rush!",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Go Rush!!",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Sevens",
|
||||||
|
"Yu\u2606Gi\u2606Oh! VRAINS",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Zexal",
|
||||||
|
"Yu\u2606Gi\u2606Oh! Zexal Second",
|
||||||
|
"InuYasha: Kanketsu-hen",
|
||||||
|
"Lupin the Third: Mine Fujiko to Iu Onna",
|
||||||
|
"Hidan no Aria AA",
|
||||||
|
"Higashi no Eden: Air Communication",
|
||||||
|
"Higurashi no Naku Koro ni Gou",
|
||||||
|
"Higurashi no Naku Koro ni Sotsu",
|
||||||
|
"Himawari!!",
|
||||||
|
"Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
|
||||||
|
"Fairy Tail: 100 Years Quest",
|
||||||
|
"Hong Mao Lan Tu MTV",
|
||||||
|
"Fate\/stay night: Unlimited Blade Works",
|
||||||
|
"Fate\/Zero",
|
||||||
|
"Fate\/Zero Cafe",
|
||||||
|
"Final Fantasy VII: Advent Children - Venice Film Festival Footage",
|
||||||
|
"FLCL: Shoegaze",
|
||||||
|
"Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
|
||||||
|
"Fruits Basket 1st Season",
|
||||||
|
"Fruits Basket: Prelude",
|
||||||
|
"Fate/Extra: Last Encore",
|
||||||
|
"Fate/Apocrypha",
|
||||||
|
"Fate/Grand Order: Zettai Majuu Sensen Babylonia",
|
||||||
|
"Fate/Extra: Last Encore - Illustrias Tendousetsu",
|
||||||
|
"Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
|
||||||
|
"Fate/stay night: Unlimited Blade Works",
|
||||||
|
"Fate/Zero",
|
||||||
|
"Fate/Zero Cafe",
|
||||||
|
"Time Bokan 2000: Kaitou Kiramekiman",
|
||||||
|
"Time Bokan 24",
|
||||||
|
"Zombieland Saga Movie",
|
||||||
|
"Zoids: Chaotic Century",
|
||||||
|
"Zoids: Guardian Force",
|
||||||
|
"Queen's Blade: Rebellion",
|
||||||
|
"Queen's Blade: Gyokuza wo Tsugu Mono",
|
||||||
|
"Shen Bing Xiaojiang Movie",
|
||||||
|
"Kono Subarashii Sekai ni Bakuen wo!",
|
||||||
|
"Kono Subarashii Sekai ni Shukufuku wo! 2",
|
||||||
|
"Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
|
||||||
|
"Little Witch Academia: Mahoujikake no Parade",
|
||||||
|
"Gochuumon wa Usagi desu ka?? Dear My Sister",
|
||||||
|
"Break Blade Movie 3: Kyoujin no Ato",
|
||||||
|
"Saint\u2606Oniisan (Movie)",
|
||||||
|
"Bungou Stray Dogs: Dead Apple",
|
||||||
|
"Kidou Keisatsu Patlabor 2 the Movie",
|
||||||
|
"Quanzhi Gaoshou: Dianfeng Rongyao",
|
||||||
|
"Persona 3 the Movie 4: Winter of Rebirth",
|
||||||
|
"Luo Xiao Hei Zhan Ji (Movie)",
|
||||||
|
"Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
|
||||||
|
"Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
|
||||||
|
"Black Clover: Mahou Tei no Ken",
|
||||||
|
"Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
|
||||||
|
"Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
|
||||||
|
"Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
|
||||||
|
"Stand By Me Doraemon 2",
|
||||||
|
"Berserk: Ougon Jidai-hen III - Kourin",
|
||||||
|
"K-On! Movie",
|
||||||
|
"Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
|
||||||
|
"Saenai Heroine no Sodatekata Fine",
|
||||||
|
"Yuru Camp\u25b3 Movie",
|
||||||
|
"The First Slam Dunk",
|
||||||
|
"Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
|
||||||
|
|
||||||
|
# Similar synonyms
|
||||||
|
"Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
|
||||||
|
"Fuxing Ba Jie",
|
||||||
|
"Onigiri",
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
# Skip these entries if it's a movie AND contains one of these
|
||||||
|
skip_movie_entries = [
|
||||||
|
"Detective Conan",
|
||||||
|
"Naruto",
|
||||||
|
"Psycho-Pass",
|
||||||
|
"Girls & Panzer",
|
||||||
|
"Eureka Seven",
|
||||||
|
"Hamtarou",
|
||||||
|
"Initial D",
|
||||||
|
"Gundam",
|
||||||
|
"Kimetsu no Yaiba",
|
||||||
|
"Boku no Hero Academia",
|
||||||
|
"Bleach",
|
||||||
|
"Dragon Ball",
|
||||||
|
"Attack on Titan",
|
||||||
|
"Code Geass",
|
||||||
|
"Made in Abyss",
|
||||||
|
"One Piece",
|
||||||
|
"JoJo's Bizarre Adventure",
|
||||||
|
"YuYu Hakusho",
|
||||||
|
"Haikyu!!",
|
||||||
|
"Gintama",
|
||||||
|
"Hunter x Hunter",
|
||||||
|
"Fullmetal Alchemist",
|
||||||
|
"Mirai Shounen Conan",
|
||||||
|
"Pokemon",
|
||||||
|
"Pororo",
|
||||||
|
"Power Battle Watch Car",
|
||||||
|
"Precure",
|
||||||
|
"Sword Art Online",
|
||||||
|
"Sylvanian Families",
|
||||||
|
"Kino no Tabi",
|
||||||
|
"Gekijouban",
|
||||||
|
"Ginga Tetsudou",
|
||||||
|
"GHOST IN THE SHELL",
|
||||||
|
"Ghost in the Shell",
|
||||||
|
"Yu\u2606Gi\u2606Oh!",
|
||||||
|
"InuYasha",
|
||||||
|
"Lupin III",
|
||||||
|
"Hibike! Euphonium",
|
||||||
|
"Himitsu no Akko-chan",
|
||||||
|
"Himitsukessha Taka no Tsume",
|
||||||
|
"Hinomaru Hatanosuke",
|
||||||
|
"FLCL",
|
||||||
|
"Free!",
|
||||||
|
"Fate/Grand Order",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Skip these entries if it's a TV and contains one of these:
|
||||||
|
skip_tv_entries = [
|
||||||
|
"Huo Xing Wa",
|
||||||
|
"Huoli Shaonian Wang",
|
||||||
|
"Huoxing Wa",
|
||||||
|
"Pocket Monsters XY",
|
||||||
|
"Pororo",
|
||||||
|
"Hime Chen",
|
||||||
|
"Himitsu no Akko-chan",
|
||||||
|
"Himitsukessha Taka no Tsume",
|
||||||
|
"Flowering Heart",
|
||||||
|
"Fu Guo",
|
||||||
|
"Fate/kaleid liner Prisma",
|
||||||
|
"Fei ",
|
||||||
|
"Gangtie Feilong",
|
||||||
|
"Kuaile ",
|
||||||
|
"Tianyan",
|
||||||
|
"Time Bokan Series",
|
||||||
|
"Lixian",
|
||||||
|
"Zhang ",
|
||||||
|
"Zhen ",
|
||||||
|
"Zhi ",
|
||||||
|
"Zui ",
|
||||||
|
"Zoids ",
|
||||||
|
"Zi ",
|
||||||
|
"Qi ",
|
||||||
|
"Quwei",
|
||||||
|
"Mengxiang",
|
||||||
|
"Xiao ",
|
||||||
|
"Xun",
|
||||||
|
"Liang",
|
||||||
|
"Xiaojiang",
|
||||||
|
"Shen ",
|
||||||
|
"Konglong",
|
||||||
|
"Xi ",
|
||||||
|
"Xiaolong",
|
||||||
|
"Xiaoxiong",
|
||||||
|
"Xiaoyuan",
|
||||||
|
"Xin ",
|
||||||
|
"Xing ",
|
||||||
|
"Xiaokang",
|
||||||
|
"Xiaohu",
|
||||||
|
"Xianggu",
|
||||||
|
"Wu ",
|
||||||
|
"Wudang"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Skip enteries if it contains 'Season xx'
|
||||||
|
skip_seasons_entries = [
|
||||||
|
"Season 0",
|
||||||
|
"Season 2",
|
||||||
|
"Season 3",
|
||||||
|
"Season 4",
|
||||||
|
"Season 5",
|
||||||
|
"Season 6",
|
||||||
|
"Season 7",
|
||||||
|
"Season 8",
|
||||||
|
"Season 9",
|
||||||
|
"Season 10",
|
||||||
|
"Season 11",
|
||||||
|
"Season 12",
|
||||||
|
"Season 13",
|
||||||
|
"Season 14",
|
||||||
|
"Season 15",
|
||||||
|
"Season 16",
|
||||||
|
"Season 17",
|
||||||
|
"Season 18",
|
||||||
|
"Season 19",
|
||||||
|
"Season 20",
|
||||||
|
"season 2",
|
||||||
|
"season 3",
|
||||||
|
"season 4",
|
||||||
|
"season 5",
|
||||||
|
"season 6",
|
||||||
|
"season 7",
|
||||||
|
"season 8",
|
||||||
|
"season 9",
|
||||||
|
"2nd Season",
|
||||||
|
"3rd Season",
|
||||||
|
"4th Season",
|
||||||
|
"5th Season",
|
||||||
|
"6th Season",
|
||||||
|
"7th Season",
|
||||||
|
"8th Season",
|
||||||
|
"9th Season",
|
||||||
|
"10th Season",
|
||||||
|
"11th Season",
|
||||||
|
"Second Season",
|
||||||
|
"Third Season",
|
||||||
|
"Season II",
|
||||||
|
"Season III",
|
||||||
|
"Season Two",
|
||||||
|
"Part 2",
|
||||||
|
"Part 3",
|
||||||
|
"Part 4",
|
||||||
|
"Part 5",
|
||||||
|
"Part 6",
|
||||||
|
]
|
||||||
|
|
||||||
|
f = open('matched-anime-list.json')
|
||||||
|
data = json.load(f)
|
||||||
|
parsed = [] # list of parsed names
|
||||||
|
|
||||||
|
for i in data['data']:
|
||||||
|
|
||||||
|
# Only keep movies or TV shows
|
||||||
|
if i['type'] == 'MOVIE' or i['type'] == 'TV':
|
||||||
|
|
||||||
|
skip_loop = False
|
||||||
|
|
||||||
|
# Remove extra unwanted entries if it's in the title
|
||||||
|
if i['title'] in remove_anime:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Remove unwanted entries if it's in the title AND a movie
|
||||||
|
if i['type'] == 'MOVIE':
|
||||||
|
for movies in skip_movie_entries:
|
||||||
|
if movies in i['title']:
|
||||||
|
skip_loop = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if skip_loop == True:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Remove unwanted entries if it's in the title AND a TV
|
||||||
|
if i['type'] == 'TV':
|
||||||
|
for tv in skip_tv_entries:
|
||||||
|
if tv in i['title']:
|
||||||
|
skip_loop = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if skip_loop == True:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Remove unwanted if it's in the seasons
|
||||||
|
for seasons in skip_seasons_entries:
|
||||||
|
if seasons in i['title']:
|
||||||
|
skip_loop = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if skip_loop == True:
|
||||||
|
continue
|
||||||
|
|
||||||
|
toss_based_on_synonym = False
|
||||||
|
|
||||||
|
# Cycle through the synonymns
|
||||||
|
new_synonyms = []
|
||||||
|
for j in i['synonyms']:
|
||||||
|
|
||||||
|
# Remove extra unwanted enteries if it's in the synonym
|
||||||
|
for seasons in skip_seasons_entries:
|
||||||
|
if seasons in j:
|
||||||
|
toss_based_on_synonym = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# Remove unwanted entries if it's a synonym AND a movie
|
||||||
|
if i['type'] == 'MOVIE':
|
||||||
|
for movies in skip_movie_entries:
|
||||||
|
if movies in j:
|
||||||
|
toss_based_on_synonym = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if toss_based_on_synonym == True:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Only keep synonyms that don't have unicode in them
|
||||||
|
if j.isascii():
|
||||||
|
new_synonyms.append(j)
|
||||||
|
|
||||||
|
if toss_based_on_synonym == True:
|
||||||
|
continue
|
||||||
|
|
||||||
|
i['synonyms'] = new_synonyms
|
||||||
|
parsed.append(i)
|
||||||
|
|
||||||
|
|
||||||
|
# Convert to dataframe for further parsing
|
||||||
|
df = pd.DataFrame(parsed)
|
||||||
|
df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
|
||||||
|
|
||||||
|
# Outputs
|
||||||
|
df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
|
||||||
|
|
||||||
|
# Remove additional columns for mini version
|
||||||
|
df = df.drop(['type'], axis=1) # remove columns
|
||||||
|
df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records')
|
Reference in New Issue
Block a user