initial upload
This commit is contained in:
53
tools/get-mal.py
Normal file
53
tools/get-mal.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
# Setup
|
||||
CLIENT_ID = os.getenv('MAL_CLIENT_ID')
|
||||
offset = 0
|
||||
anime_list = []
|
||||
|
||||
# Get list of TV animes using MAL API
|
||||
while offset < 5000:
|
||||
|
||||
# Can only get 500 at a time
|
||||
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=tv&limit=500&offset={offset}'
|
||||
resp = requests.get(url, headers={
|
||||
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||
})
|
||||
anime = resp.json()
|
||||
|
||||
# Add into our list
|
||||
for node in anime['data']:
|
||||
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||
|
||||
# Start at the next 500
|
||||
offset = offset + 500
|
||||
|
||||
# Let's not spam the MAL API
|
||||
time.sleep(1)
|
||||
|
||||
# Get the list of movie animes using MAL API
|
||||
offset = 0
|
||||
while offset < 250:
|
||||
|
||||
# Limit to 250 entries at a time
|
||||
url = f'https://api.myanimelist.net/v2/anime/ranking?ranking_type=movie&limit=250&offset={offset}'
|
||||
resp = requests.get(url, headers={
|
||||
'X-MAL-CLIENT-ID': CLIENT_ID
|
||||
})
|
||||
anime = resp.json()
|
||||
|
||||
# Add into our list
|
||||
for node in anime['data']:
|
||||
anime_list.append({'title': node['node']['title'], 'id': node['node']['id']})
|
||||
|
||||
# Start at the next 250
|
||||
offset = offset + 250
|
||||
|
||||
# Let's not spam the MAL API
|
||||
time.sleep(1)
|
||||
|
||||
# Write to disk
|
||||
with open('mal.json', 'w') as f:
|
||||
json.dump(anime_list, f, ensure_ascii=False, indent=4)
|
37
tools/match-mal-offlinedb.py
Normal file
37
tools/match-mal-offlinedb.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import json
|
||||
|
||||
# From get-mal.py
|
||||
f = open('mal.json')
|
||||
mal_anime = json.load(f)
|
||||
|
||||
# File from https://github.com/manami-project/anime-offline-database
|
||||
f = open('anime-offline-database.json')
|
||||
anime_db = json.load(f)
|
||||
|
||||
# Setup
|
||||
matched_list = {'data': []}
|
||||
unmatched_list = []
|
||||
|
||||
# Match the title from the MAL API to the offline DB
|
||||
for mal in mal_anime:
|
||||
found = False
|
||||
|
||||
for anime in anime_db['data']:
|
||||
if anime['title'] == mal['title']:
|
||||
anime['mal_id'] = mal['id']
|
||||
matched_list['data'].append(anime)
|
||||
found = True
|
||||
break
|
||||
|
||||
# Create an list of unmatched titles
|
||||
if found == False:
|
||||
unmatched_list.append(mal['title'])
|
||||
|
||||
# Write to disk the matched titles
|
||||
with open('matched-anime-list.json', 'w') as f:
|
||||
json.dump(matched_list, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Print out unmatched titles
|
||||
print(f'Could not match the following, add manually (if wanted)\n: {unmatched_list}')
|
||||
|
||||
|
388
tools/parse-anime.py
Normal file
388
tools/parse-anime.py
Normal file
@@ -0,0 +1,388 @@
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
# Skip entries if these match exactly
|
||||
remove_anime = [
|
||||
# Other seasons
|
||||
"Initial D Fifth Stage",
|
||||
"Initial D Final Stage",
|
||||
"Initial D Fourth Stage",
|
||||
"Initial D Second Stage",
|
||||
"Initial D Third Stage",
|
||||
"Tottoko Hamtaro (2012)",
|
||||
"Tottoko Hamtaro Dechu",
|
||||
"Tottoko Hamtarou Hai!",
|
||||
"Tottoko Hamtarou: Hamu Hamu Paradichu!",
|
||||
"Naruto (Shinsaku Anime)",
|
||||
"Naruto SD: Rock Lee no Seishun Full-Power Ninden",
|
||||
".hack//Roots",
|
||||
".hack//Tasogare no Udewa Densetsu",
|
||||
".hack//The Movie: Sekai no Mukou ni",
|
||||
"Akira (Shin Anime)",
|
||||
"Eureka Seven AO",
|
||||
"Escaflowne",
|
||||
"Psycho-Pass RE:Start",
|
||||
"Psycho-Pass 3",
|
||||
"Gundam Seed Destiny HD Remaster",
|
||||
"Gundam: G no Reconguista",
|
||||
"Kidou Senshi Gundam SEED Destiny",
|
||||
"Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
|
||||
"Mobile Suit Gundam 00: 10th Anniversary Project",
|
||||
"Mobile Suit Gundam Seed HD Remaster",
|
||||
"Mobile Suit Gundam UC2",
|
||||
"Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
|
||||
"Space Gundam V",
|
||||
"Gundam Build Fighters",
|
||||
"Bleach: Sennen Kessen-hen",
|
||||
"BLEACH: Sennen Kessen-hen 3rd Cour",
|
||||
"BLEACH: Sennen Kessen-hen 4th Cour",
|
||||
"Bocchi the Rock! Movie",
|
||||
"Jujutsu Kaisen 0 Movie",
|
||||
"Dragon Ball GT",
|
||||
"Dragon Ball Kai",
|
||||
"Dragon Ball Kai (2014)",
|
||||
"Shingeki! Kyojin Chuugakkou",
|
||||
"Meitantei Conan: Zero no Tea Time",
|
||||
"Meitantei Conan: Hannin no Hanzawa-san",
|
||||
"Mashin Eiyuuden Wataru 2",
|
||||
"One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
|
||||
"Gintama.: Porori-hen",
|
||||
"Gintama.: Shirogane no Tamashii-hen",
|
||||
"Hunter x Hunter (2011)",
|
||||
"Huoyan Shan Lixian Ji",
|
||||
"Huyao Xiao Hongniang Movie: Xia Sha",
|
||||
"Fullmetal Alchemist",
|
||||
"Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
|
||||
"Mirai Shounen Conan 2: Taiga Daibouken",
|
||||
"MIRROR",
|
||||
"Pokemon Housoukyoku",
|
||||
"Pokemon (2019)",
|
||||
"Sword Art Online Alternative: Gun Gale Online",
|
||||
"Sword Art Online II",
|
||||
"Sword Art Online: Alicization",
|
||||
"Sylvanian Families: Freya no Happy Diary",
|
||||
"Sylvanian Families: Mini Story",
|
||||
"Kino no Tabi: The Beautiful World - The Animated Series",
|
||||
"Kanon",
|
||||
"Clannad Movie",
|
||||
"Toaru Majutsu no Index Movie: Endymion no Kiseki",
|
||||
"Toaru Majutsu no Index II",
|
||||
"Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
|
||||
"Cowboy Bebop: Tengoku no Tobira",
|
||||
"Suzumiya Haruhi no Shoushitsu",
|
||||
"Koukaku Kidoutai Nyuumon Arise",
|
||||
"Koukaku Kidoutai Arise: Alternative Architecture",
|
||||
"Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
|
||||
"Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
|
||||
"Yu\u2606Gi\u2606Oh! 5D's",
|
||||
"Yu\u2606Gi\u2606Oh! Arc-V",
|
||||
"Yu\u2606Gi\u2606Oh! (Movie)",
|
||||
"Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
|
||||
"Yu\u2606Gi\u2606Oh! Go Rush!",
|
||||
"Yu\u2606Gi\u2606Oh! Go Rush!!",
|
||||
"Yu\u2606Gi\u2606Oh! Sevens",
|
||||
"Yu\u2606Gi\u2606Oh! VRAINS",
|
||||
"Yu\u2606Gi\u2606Oh! Zexal",
|
||||
"Yu\u2606Gi\u2606Oh! Zexal Second",
|
||||
"InuYasha: Kanketsu-hen",
|
||||
"Lupin the Third: Mine Fujiko to Iu Onna",
|
||||
"Hidan no Aria AA",
|
||||
"Higashi no Eden: Air Communication",
|
||||
"Higurashi no Naku Koro ni Gou",
|
||||
"Higurashi no Naku Koro ni Sotsu",
|
||||
"Himawari!!",
|
||||
"Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
|
||||
"Fairy Tail: 100 Years Quest",
|
||||
"Hong Mao Lan Tu MTV",
|
||||
"Fate\/stay night: Unlimited Blade Works",
|
||||
"Fate\/Zero",
|
||||
"Fate\/Zero Cafe",
|
||||
"Final Fantasy VII: Advent Children - Venice Film Festival Footage",
|
||||
"FLCL: Shoegaze",
|
||||
"Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
|
||||
"Fruits Basket 1st Season",
|
||||
"Fruits Basket: Prelude",
|
||||
"Fate/Extra: Last Encore",
|
||||
"Fate/Apocrypha",
|
||||
"Fate/Grand Order: Zettai Majuu Sensen Babylonia",
|
||||
"Fate/Extra: Last Encore - Illustrias Tendousetsu",
|
||||
"Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
|
||||
"Fate/stay night: Unlimited Blade Works",
|
||||
"Fate/Zero",
|
||||
"Fate/Zero Cafe",
|
||||
"Time Bokan 2000: Kaitou Kiramekiman",
|
||||
"Time Bokan 24",
|
||||
"Zombieland Saga Movie",
|
||||
"Zoids: Chaotic Century",
|
||||
"Zoids: Guardian Force",
|
||||
"Queen's Blade: Rebellion",
|
||||
"Queen's Blade: Gyokuza wo Tsugu Mono",
|
||||
"Shen Bing Xiaojiang Movie",
|
||||
"Kono Subarashii Sekai ni Bakuen wo!",
|
||||
"Kono Subarashii Sekai ni Shukufuku wo! 2",
|
||||
"Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
|
||||
"Little Witch Academia: Mahoujikake no Parade",
|
||||
"Gochuumon wa Usagi desu ka?? Dear My Sister",
|
||||
"Break Blade Movie 3: Kyoujin no Ato",
|
||||
"Saint\u2606Oniisan (Movie)",
|
||||
"Bungou Stray Dogs: Dead Apple",
|
||||
"Kidou Keisatsu Patlabor 2 the Movie",
|
||||
"Quanzhi Gaoshou: Dianfeng Rongyao",
|
||||
"Persona 3 the Movie 4: Winter of Rebirth",
|
||||
"Luo Xiao Hei Zhan Ji (Movie)",
|
||||
"Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
|
||||
"Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
|
||||
"Black Clover: Mahou Tei no Ken",
|
||||
"Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
|
||||
"Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
|
||||
"Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
|
||||
"Stand By Me Doraemon 2",
|
||||
"Berserk: Ougon Jidai-hen III - Kourin",
|
||||
"K-On! Movie",
|
||||
"Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
|
||||
"Saenai Heroine no Sodatekata Fine",
|
||||
"Yuru Camp\u25b3 Movie",
|
||||
"The First Slam Dunk",
|
||||
"Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
|
||||
|
||||
# Similar synonyms
|
||||
"Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
|
||||
"Fuxing Ba Jie",
|
||||
"Onigiri",
|
||||
|
||||
]
|
||||
|
||||
# Skip these entries if it's a movie AND contains one of these
|
||||
skip_movie_entries = [
|
||||
"Detective Conan",
|
||||
"Naruto",
|
||||
"Psycho-Pass",
|
||||
"Girls & Panzer",
|
||||
"Eureka Seven",
|
||||
"Hamtarou",
|
||||
"Initial D",
|
||||
"Gundam",
|
||||
"Kimetsu no Yaiba",
|
||||
"Boku no Hero Academia",
|
||||
"Bleach",
|
||||
"Dragon Ball",
|
||||
"Attack on Titan",
|
||||
"Code Geass",
|
||||
"Made in Abyss",
|
||||
"One Piece",
|
||||
"JoJo's Bizarre Adventure",
|
||||
"YuYu Hakusho",
|
||||
"Haikyu!!",
|
||||
"Gintama",
|
||||
"Hunter x Hunter",
|
||||
"Fullmetal Alchemist",
|
||||
"Mirai Shounen Conan",
|
||||
"Pokemon",
|
||||
"Pororo",
|
||||
"Power Battle Watch Car",
|
||||
"Precure",
|
||||
"Sword Art Online",
|
||||
"Sylvanian Families",
|
||||
"Kino no Tabi",
|
||||
"Gekijouban",
|
||||
"Ginga Tetsudou",
|
||||
"GHOST IN THE SHELL",
|
||||
"Ghost in the Shell",
|
||||
"Yu\u2606Gi\u2606Oh!",
|
||||
"InuYasha",
|
||||
"Lupin III",
|
||||
"Hibike! Euphonium",
|
||||
"Himitsu no Akko-chan",
|
||||
"Himitsukessha Taka no Tsume",
|
||||
"Hinomaru Hatanosuke",
|
||||
"FLCL",
|
||||
"Free!",
|
||||
"Fate/Grand Order",
|
||||
]
|
||||
|
||||
# Skip these entries if it's a TV and contains one of these:
|
||||
skip_tv_entries = [
|
||||
"Huo Xing Wa",
|
||||
"Huoli Shaonian Wang",
|
||||
"Huoxing Wa",
|
||||
"Pocket Monsters XY",
|
||||
"Pororo",
|
||||
"Hime Chen",
|
||||
"Himitsu no Akko-chan",
|
||||
"Himitsukessha Taka no Tsume",
|
||||
"Flowering Heart",
|
||||
"Fu Guo",
|
||||
"Fate/kaleid liner Prisma",
|
||||
"Fei ",
|
||||
"Gangtie Feilong",
|
||||
"Kuaile ",
|
||||
"Tianyan",
|
||||
"Time Bokan Series",
|
||||
"Lixian",
|
||||
"Zhang ",
|
||||
"Zhen ",
|
||||
"Zhi ",
|
||||
"Zui ",
|
||||
"Zoids ",
|
||||
"Zi ",
|
||||
"Qi ",
|
||||
"Quwei",
|
||||
"Mengxiang",
|
||||
"Xiao ",
|
||||
"Xun",
|
||||
"Liang",
|
||||
"Xiaojiang",
|
||||
"Shen ",
|
||||
"Konglong",
|
||||
"Xi ",
|
||||
"Xiaolong",
|
||||
"Xiaoxiong",
|
||||
"Xiaoyuan",
|
||||
"Xin ",
|
||||
"Xing ",
|
||||
"Xiaokang",
|
||||
"Xiaohu",
|
||||
"Xianggu",
|
||||
"Wu ",
|
||||
"Wudang"
|
||||
]
|
||||
|
||||
# Skip enteries if it contains 'Season xx'
|
||||
skip_seasons_entries = [
|
||||
"Season 0",
|
||||
"Season 2",
|
||||
"Season 3",
|
||||
"Season 4",
|
||||
"Season 5",
|
||||
"Season 6",
|
||||
"Season 7",
|
||||
"Season 8",
|
||||
"Season 9",
|
||||
"Season 10",
|
||||
"Season 11",
|
||||
"Season 12",
|
||||
"Season 13",
|
||||
"Season 14",
|
||||
"Season 15",
|
||||
"Season 16",
|
||||
"Season 17",
|
||||
"Season 18",
|
||||
"Season 19",
|
||||
"Season 20",
|
||||
"season 2",
|
||||
"season 3",
|
||||
"season 4",
|
||||
"season 5",
|
||||
"season 6",
|
||||
"season 7",
|
||||
"season 8",
|
||||
"season 9",
|
||||
"2nd Season",
|
||||
"3rd Season",
|
||||
"4th Season",
|
||||
"5th Season",
|
||||
"6th Season",
|
||||
"7th Season",
|
||||
"8th Season",
|
||||
"9th Season",
|
||||
"10th Season",
|
||||
"11th Season",
|
||||
"Second Season",
|
||||
"Third Season",
|
||||
"Season II",
|
||||
"Season III",
|
||||
"Season Two",
|
||||
"Part 2",
|
||||
"Part 3",
|
||||
"Part 4",
|
||||
"Part 5",
|
||||
"Part 6",
|
||||
]
|
||||
|
||||
f = open('matched-anime-list.json')
|
||||
data = json.load(f)
|
||||
parsed = [] # list of parsed names
|
||||
|
||||
for i in data['data']:
|
||||
|
||||
# Only keep movies or TV shows
|
||||
if i['type'] == 'MOVIE' or i['type'] == 'TV':
|
||||
|
||||
skip_loop = False
|
||||
|
||||
# Remove extra unwanted entries if it's in the title
|
||||
if i['title'] in remove_anime:
|
||||
continue
|
||||
|
||||
# Remove unwanted entries if it's in the title AND a movie
|
||||
if i['type'] == 'MOVIE':
|
||||
for movies in skip_movie_entries:
|
||||
if movies in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
# Remove unwanted entries if it's in the title AND a TV
|
||||
if i['type'] == 'TV':
|
||||
for tv in skip_tv_entries:
|
||||
if tv in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
# Remove unwanted if it's in the seasons
|
||||
for seasons in skip_seasons_entries:
|
||||
if seasons in i['title']:
|
||||
skip_loop = True
|
||||
break
|
||||
|
||||
if skip_loop == True:
|
||||
continue
|
||||
|
||||
toss_based_on_synonym = False
|
||||
|
||||
# Cycle through the synonymns
|
||||
new_synonyms = []
|
||||
for j in i['synonyms']:
|
||||
|
||||
# Remove extra unwanted enteries if it's in the synonym
|
||||
for seasons in skip_seasons_entries:
|
||||
if seasons in j:
|
||||
toss_based_on_synonym = True
|
||||
break
|
||||
|
||||
# Remove unwanted entries if it's a synonym AND a movie
|
||||
if i['type'] == 'MOVIE':
|
||||
for movies in skip_movie_entries:
|
||||
if movies in j:
|
||||
toss_based_on_synonym = True
|
||||
break
|
||||
|
||||
if toss_based_on_synonym == True:
|
||||
break
|
||||
|
||||
# Only keep synonyms that don't have unicode in them
|
||||
if j.isascii():
|
||||
new_synonyms.append(j)
|
||||
|
||||
if toss_based_on_synonym == True:
|
||||
continue
|
||||
|
||||
i['synonyms'] = new_synonyms
|
||||
parsed.append(i)
|
||||
|
||||
|
||||
# Convert to dataframe for further parsing
|
||||
df = pd.DataFrame(parsed)
|
||||
df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
|
||||
|
||||
# Outputs
|
||||
df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
|
||||
|
||||
# Remove additional columns for mini version
|
||||
df = df.drop(['type'], axis=1) # remove columns
|
||||
df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records')
|
Reference in New Issue
Block a user