initial upload

This commit is contained in:
2023-07-19 00:47:09 -05:00
commit c993ea8b6a
13 changed files with 881 additions and 0 deletions

53
tools/get-mal.py Normal file
View File

@@ -0,0 +1,53 @@
import requests
import time
import json
import os
# Setup
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
offset = 0
anime_list = []
# Get list of TV animes using MAL API
while offset < 5000:
# Can only get 500 at a time
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=tv&limit=500&offset={offset}'
resp = requests.get(url, headers={
'X-MAL-CLIENT-ID': CLIENT_ID
})
anime = resp.json()
# Add into our list
for node in anime['data']:
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
# Start at the next 500
offset = offset + 500
# Let's not spam the MAL API
time.sleep(1)
# Get the list of movie animes using MAL API
offset = 0
while offset < 250:
# Limit to 250 entries at a time
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=movie&limit=250&offset={offset}'
resp = requests.get(url, headers={
'X-MAL-CLIENT-ID': CLIENT_ID
})
anime = resp.json()
# Add into our list
for node in anime['data']:
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
# Start at the next 250
offset = offset + 250
# Let's not spam the MAL API
time.sleep(1)
# Write to disk
with open('mal.json', 'w') as f:
json.dump(anime_list, f, ensure_ascii=False, indent=4)

View File

@@ -0,0 +1,37 @@
import json
# From get-mal.py
f = open('mal.json')
mal_anime = json.load(f)
# File from https://github.com/manami-project/anime-offline-database
f = open('anime-offline-database.json')
anime_db = json.load(f)
# Setup
matched_list = {'data': []}
unmatched_list = []
# Match the title from the MAL API to the offline DB
for mal in mal_anime:
found = False
for anime in anime_db['data']:
if anime['title'] == mal['title']:
anime['mal_id'] = mal['id']
matched_list['data'].append(anime)
found = True
break
# Create an list of unmatched titles
if found == False:
unmatched_list.append(mal['title'])
# Write to disk the matched titles
with open('matched-anime-list.json', 'w') as f:
json.dump(matched_list, f, ensure_ascii=False, indent=2)
# Print out unmatched titles
print(f'Could not match the following, add manually (if wanted)\n: {unmatched_list}')

388
tools/parse-anime.py Normal file
View File

@@ -0,0 +1,388 @@
import json
import pandas as pd
# Skip entries if these match exactly
remove_anime = [
# Other seasons
"Initial D Fifth Stage",
"Initial D Final Stage",
"Initial D Fourth Stage",
"Initial D Second Stage",
"Initial D Third Stage",
"Tottoko Hamtaro (2012)",
"Tottoko Hamtaro Dechu",
"Tottoko Hamtarou Hai!",
"Tottoko Hamtarou: Hamu Hamu Paradichu!",
"Naruto (Shinsaku Anime)",
"Naruto SD: Rock Lee no Seishun Full-Power Ninden",
".hack//Roots",
".hack//Tasogare no Udewa Densetsu",
".hack//The Movie: Sekai no Mukou ni",
"Akira (Shin Anime)",
"Eureka Seven AO",
"Escaflowne",
"Psycho-Pass RE:Start",
"Psycho-Pass 3",
"Gundam Seed Destiny HD Remaster",
"Gundam: G no Reconguista",
"Kidou Senshi Gundam SEED Destiny",
"Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
"Mobile Suit Gundam 00: 10th Anniversary Project",
"Mobile Suit Gundam Seed HD Remaster",
"Mobile Suit Gundam UC2",
"Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
"Space Gundam V",
"Gundam Build Fighters",
"Bleach: Sennen Kessen-hen",
"BLEACH: Sennen Kessen-hen 3rd Cour",
"BLEACH: Sennen Kessen-hen 4th Cour",
"Bocchi the Rock! Movie",
"Jujutsu Kaisen 0 Movie",
"Dragon Ball GT",
"Dragon Ball Kai",
"Dragon Ball Kai (2014)",
"Shingeki! Kyojin Chuugakkou",
"Meitantei Conan: Zero no Tea Time",
"Meitantei Conan: Hannin no Hanzawa-san",
"Mashin Eiyuuden Wataru 2",
"One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
"Gintama.: Porori-hen",
"Gintama.: Shirogane no Tamashii-hen",
"Hunter x Hunter (2011)",
"Huoyan Shan Lixian Ji",
"Huyao Xiao Hongniang Movie: Xia Sha",
"Fullmetal Alchemist",
"Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
"Mirai Shounen Conan 2: Taiga Daibouken",
"MIRROR",
"Pokemon Housoukyoku",
"Pokemon (2019)",
"Sword Art Online Alternative: Gun Gale Online",
"Sword Art Online II",
"Sword Art Online: Alicization",
"Sylvanian Families: Freya no Happy Diary",
"Sylvanian Families: Mini Story",
"Kino no Tabi: The Beautiful World - The Animated Series",
"Kanon",
"Clannad Movie",
"Toaru Majutsu no Index Movie: Endymion no Kiseki",
"Toaru Majutsu no Index II",
"Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
"Cowboy Bebop: Tengoku no Tobira",
"Suzumiya Haruhi no Shoushitsu",
"Koukaku Kidoutai Nyuumon Arise",
"Koukaku Kidoutai Arise: Alternative Architecture",
"Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
"Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
"Yu\u2606Gi\u2606Oh! 5D's",
"Yu\u2606Gi\u2606Oh! Arc-V",
"Yu\u2606Gi\u2606Oh! (Movie)",
"Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
"Yu\u2606Gi\u2606Oh! Go Rush!",
"Yu\u2606Gi\u2606Oh! Go Rush!!",
"Yu\u2606Gi\u2606Oh! Sevens",
"Yu\u2606Gi\u2606Oh! VRAINS",
"Yu\u2606Gi\u2606Oh! Zexal",
"Yu\u2606Gi\u2606Oh! Zexal Second",
"InuYasha: Kanketsu-hen",
"Lupin the Third: Mine Fujiko to Iu Onna",
"Hidan no Aria AA",
"Higashi no Eden: Air Communication",
"Higurashi no Naku Koro ni Gou",
"Higurashi no Naku Koro ni Sotsu",
"Himawari!!",
"Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
"Fairy Tail: 100 Years Quest",
"Hong Mao Lan Tu MTV",
"Fate\/stay night: Unlimited Blade Works",
"Fate\/Zero",
"Fate\/Zero Cafe",
"Final Fantasy VII: Advent Children - Venice Film Festival Footage",
"FLCL: Shoegaze",
"Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
"Fruits Basket 1st Season",
"Fruits Basket: Prelude",
"Fate/Extra: Last Encore",
"Fate/Apocrypha",
"Fate/Grand Order: Zettai Majuu Sensen Babylonia",
"Fate/Extra: Last Encore - Illustrias Tendousetsu",
"Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
"Fate/stay night: Unlimited Blade Works",
"Fate/Zero",
"Fate/Zero Cafe",
"Time Bokan 2000: Kaitou Kiramekiman",
"Time Bokan 24",
"Zombieland Saga Movie",
"Zoids: Chaotic Century",
"Zoids: Guardian Force",
"Queen's Blade: Rebellion",
"Queen's Blade: Gyokuza wo Tsugu Mono",
"Shen Bing Xiaojiang Movie",
"Kono Subarashii Sekai ni Bakuen wo!",
"Kono Subarashii Sekai ni Shukufuku wo! 2",
"Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
"Little Witch Academia: Mahoujikake no Parade",
"Gochuumon wa Usagi desu ka?? Dear My Sister",
"Break Blade Movie 3: Kyoujin no Ato",
"Saint\u2606Oniisan (Movie)",
"Bungou Stray Dogs: Dead Apple",
"Kidou Keisatsu Patlabor 2 the Movie",
"Quanzhi Gaoshou: Dianfeng Rongyao",
"Persona 3 the Movie 4: Winter of Rebirth",
"Luo Xiao Hei Zhan Ji (Movie)",
"Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
"Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
"Black Clover: Mahou Tei no Ken",
"Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
"Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
"Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
"Stand By Me Doraemon 2",
"Berserk: Ougon Jidai-hen III - Kourin",
"K-On! Movie",
"Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
"Saenai Heroine no Sodatekata Fine",
"Yuru Camp\u25b3 Movie",
"The First Slam Dunk",
"Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
# Similar synonyms
"Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
"Fuxing Ba Jie",
"Onigiri",
]
# Skip these entries if it's a movie AND contains one of these
skip_movie_entries = [
"Detective Conan",
"Naruto",
"Psycho-Pass",
"Girls & Panzer",
"Eureka Seven",
"Hamtarou",
"Initial D",
"Gundam",
"Kimetsu no Yaiba",
"Boku no Hero Academia",
"Bleach",
"Dragon Ball",
"Attack on Titan",
"Code Geass",
"Made in Abyss",
"One Piece",
"JoJo's Bizarre Adventure",
"YuYu Hakusho",
"Haikyu!!",
"Gintama",
"Hunter x Hunter",
"Fullmetal Alchemist",
"Mirai Shounen Conan",
"Pokemon",
"Pororo",
"Power Battle Watch Car",
"Precure",
"Sword Art Online",
"Sylvanian Families",
"Kino no Tabi",
"Gekijouban",
"Ginga Tetsudou",
"GHOST IN THE SHELL",
"Ghost in the Shell",
"Yu\u2606Gi\u2606Oh!",
"InuYasha",
"Lupin III",
"Hibike! Euphonium",
"Himitsu no Akko-chan",
"Himitsukessha Taka no Tsume",
"Hinomaru Hatanosuke",
"FLCL",
"Free!",
"Fate/Grand Order",
]
# Skip these entries if it's a TV and contains one of these:
skip_tv_entries = [
"Huo Xing Wa",
"Huoli Shaonian Wang",
"Huoxing Wa",
"Pocket Monsters XY",
"Pororo",
"Hime Chen",
"Himitsu no Akko-chan",
"Himitsukessha Taka no Tsume",
"Flowering Heart",
"Fu Guo",
"Fate/kaleid liner Prisma",
"Fei ",
"Gangtie Feilong",
"Kuaile ",
"Tianyan",
"Time Bokan Series",
"Lixian",
"Zhang ",
"Zhen ",
"Zhi ",
"Zui ",
"Zoids ",
"Zi ",
"Qi ",
"Quwei",
"Mengxiang",
"Xiao ",
"Xun",
"Liang",
"Xiaojiang",
"Shen ",
"Konglong",
"Xi ",
"Xiaolong",
"Xiaoxiong",
"Xiaoyuan",
"Xin ",
"Xing ",
"Xiaokang",
"Xiaohu",
"Xianggu",
"Wu ",
"Wudang"
]
# Skip enteries if it contains 'Season xx'
skip_seasons_entries = [
"Season 0",
"Season 2",
"Season 3",
"Season 4",
"Season 5",
"Season 6",
"Season 7",
"Season 8",
"Season 9",
"Season 10",
"Season 11",
"Season 12",
"Season 13",
"Season 14",
"Season 15",
"Season 16",
"Season 17",
"Season 18",
"Season 19",
"Season 20",
"season 2",
"season 3",
"season 4",
"season 5",
"season 6",
"season 7",
"season 8",
"season 9",
"2nd Season",
"3rd Season",
"4th Season",
"5th Season",
"6th Season",
"7th Season",
"8th Season",
"9th Season",
"10th Season",
"11th Season",
"Second Season",
"Third Season",
"Season II",
"Season III",
"Season Two",
"Part 2",
"Part 3",
"Part 4",
"Part 5",
"Part 6",
]
f = open('matched-anime-list.json')
data = json.load(f)
parsed = [] # list of parsed names
for i in data['data']:
# Only keep movies or TV shows
if i['type'] == 'MOVIE' or i['type'] == 'TV':
skip_loop = False
# Remove extra unwanted entries if it's in the title
if i['title'] in remove_anime:
continue
# Remove unwanted entries if it's in the title AND a movie
if i['type'] == 'MOVIE':
for movies in skip_movie_entries:
if movies in i['title']:
skip_loop = True
break
if skip_loop == True:
continue
# Remove unwanted entries if it's in the title AND a TV
if i['type'] == 'TV':
for tv in skip_tv_entries:
if tv in i['title']:
skip_loop = True
break
if skip_loop == True:
continue
# Remove unwanted if it's in the seasons
for seasons in skip_seasons_entries:
if seasons in i['title']:
skip_loop = True
break
if skip_loop == True:
continue
toss_based_on_synonym = False
# Cycle through the synonymns
new_synonyms = []
for j in i['synonyms']:
# Remove extra unwanted enteries if it's in the synonym
for seasons in skip_seasons_entries:
if seasons in j:
toss_based_on_synonym = True
break
# Remove unwanted entries if it's a synonym AND a movie
if i['type'] == 'MOVIE':
for movies in skip_movie_entries:
if movies in j:
toss_based_on_synonym = True
break
if toss_based_on_synonym == True:
break
# Only keep synonyms that don't have unicode in them
if j.isascii():
new_synonyms.append(j)
if toss_based_on_synonym == True:
continue
i['synonyms'] = new_synonyms
parsed.append(i)
# Convert to dataframe for further parsing
df = pd.DataFrame(parsed)
df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
# Outputs
df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
# Remove additional columns for mini version
df = df.drop(['type'], axis=1) # remove columns
df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records')