400 lines
11 KiB
Python
400 lines
11 KiB
Python
import json
|
|
import pandas as pd
|
|
|
|
# Skip entries if these match exactly
|
|
remove_anime = [
|
|
# Other seasons
|
|
"Initial D Fifth Stage",
|
|
"Initial D Final Stage",
|
|
"Initial D Fourth Stage",
|
|
"Initial D Second Stage",
|
|
"Initial D Third Stage",
|
|
"Tottoko Hamtaro (2012)",
|
|
"Tottoko Hamtaro Dechu",
|
|
"Tottoko Hamtarou Hai!",
|
|
"Tottoko Hamtarou: Hamu Hamu Paradichu!",
|
|
"Naruto (Shinsaku Anime)",
|
|
"Naruto SD: Rock Lee no Seishun Full-Power Ninden",
|
|
".hack//Roots",
|
|
".hack//Tasogare no Udewa Densetsu",
|
|
".hack//The Movie: Sekai no Mukou ni",
|
|
"Akira (Shin Anime)",
|
|
"Eureka Seven AO",
|
|
"Escaflowne",
|
|
"Psycho-Pass RE:Start",
|
|
"Psycho-Pass 3",
|
|
"Gundam Seed Destiny HD Remaster",
|
|
"Gundam: G no Reconguista",
|
|
"Kidou Senshi Gundam SEED Destiny",
|
|
"Kidou Senshi Gundam: Tekketsu no Orphans - Tokubetsu-hen",
|
|
"Mobile Suit Gundam 00: 10th Anniversary Project",
|
|
"Mobile Suit Gundam Seed HD Remaster",
|
|
"Mobile Suit Gundam UC2",
|
|
"Mobile Suit SD Gundam The Movie: Musha Knight Commando: SD Gundam Scramble",
|
|
"Space Gundam V",
|
|
"Gundam Build Fighters",
|
|
"Bleach: Sennen Kessen-hen",
|
|
"BLEACH: Sennen Kessen-hen 3rd Cour",
|
|
"BLEACH: Sennen Kessen-hen 4th Cour",
|
|
"Bocchi the Rock! Movie",
|
|
"Jujutsu Kaisen 0 Movie",
|
|
"Dragon Ball GT",
|
|
"Dragon Ball Kai",
|
|
"Dragon Ball Kai (2014)",
|
|
"Shingeki! Kyojin Chuugakkou",
|
|
"Meitantei Conan: Zero no Tea Time",
|
|
"Meitantei Conan: Hannin no Hanzawa-san",
|
|
"Mashin Eiyuuden Wataru 2",
|
|
"One Piece: Mugiwara no Ichimi \u2013 Minna e \u201cTearai, Suimin o!\u201d Kodomo-tachi Ouen SP",
|
|
"Gintama.: Porori-hen",
|
|
"Gintama.: Shirogane no Tamashii-hen",
|
|
"Hunter x Hunter (2011)",
|
|
"Huoyan Shan Lixian Ji",
|
|
"Huyao Xiao Hongniang Movie: Xia Sha",
|
|
"Fullmetal Alchemist",
|
|
"Fushigi Dagashiya: Zenitendou Movie - Tsuri Taiyaki",
|
|
"Mirai Shounen Conan 2: Taiga Daibouken",
|
|
"MIRROR",
|
|
"Pokemon Housoukyoku",
|
|
"Pokemon (2019)",
|
|
"Sword Art Online Alternative: Gun Gale Online",
|
|
"Sword Art Online II",
|
|
"Sword Art Online: Alicization",
|
|
"Sylvanian Families: Freya no Happy Diary",
|
|
"Sylvanian Families: Mini Story",
|
|
"Kino no Tabi: The Beautiful World - The Animated Series",
|
|
"Kanon",
|
|
"Clannad Movie",
|
|
"Toaru Majutsu no Index Movie: Endymion no Kiseki",
|
|
"Toaru Majutsu no Index II",
|
|
"Ano Hi Mita Hana no Namae wo Bokutachi wa Mada Shiranai. Movie",
|
|
"Cowboy Bebop: Tengoku no Tobira",
|
|
"Suzumiya Haruhi no Shoushitsu",
|
|
"Koukaku Kidoutai Nyuumon Arise",
|
|
"Koukaku Kidoutai Arise: Alternative Architecture",
|
|
"Koukaku Kidoutai: Stand Alone Complex - Tachikoma na Hibi (TV)",
|
|
"Koukaku Kidoutai: Stand Alone Complex 2nd GIG",
|
|
"Yu\u2606Gi\u2606Oh! 5D's",
|
|
"Yu\u2606Gi\u2606Oh! Arc-V",
|
|
"Yu\u2606Gi\u2606Oh! (Movie)",
|
|
"Yu\u2606Gi\u2606Oh! Duel Monsters ALEX",
|
|
"Yu\u2606Gi\u2606Oh! Go Rush!",
|
|
"Yu\u2606Gi\u2606Oh! Go Rush!!",
|
|
"Yu\u2606Gi\u2606Oh! Sevens",
|
|
"Yu\u2606Gi\u2606Oh! VRAINS",
|
|
"Yu\u2606Gi\u2606Oh! Zexal",
|
|
"Yu\u2606Gi\u2606Oh! Zexal Second",
|
|
"InuYasha: Kanketsu-hen",
|
|
"Lupin the Third: Mine Fujiko to Iu Onna",
|
|
"Hidan no Aria AA",
|
|
"Higashi no Eden: Air Communication",
|
|
"Higurashi no Naku Koro ni Gou",
|
|
"Higurashi no Naku Koro ni Sotsu",
|
|
"Himawari!!",
|
|
"Zutto Mae kara Suki deshita. Kokuhaku Jikkou Iinkai",
|
|
"Fairy Tail: 100 Years Quest",
|
|
"Hong Mao Lan Tu MTV",
|
|
"Fate\/stay night: Unlimited Blade Works",
|
|
"Fate\/Zero",
|
|
"Fate\/Zero Cafe",
|
|
"Final Fantasy VII: Advent Children - Venice Film Festival Footage",
|
|
"FLCL: Shoegaze",
|
|
"Free! Dive to the Future: Ima kara demo Wakaru \u201cFree! Series\u201d",
|
|
"Fruits Basket 1st Season",
|
|
"Fruits Basket: Prelude",
|
|
"Fate/Extra: Last Encore",
|
|
"Fate/Apocrypha",
|
|
"Fate/Grand Order: Zettai Majuu Sensen Babylonia",
|
|
"Fate/Extra: Last Encore - Illustrias Tendousetsu",
|
|
"Fate/kaleid liner Prisma\u2606Illya: Prisma\u2606Phantasm",
|
|
"Fate/stay night: Unlimited Blade Works",
|
|
"Fate/Zero",
|
|
"Fate/Zero Cafe",
|
|
"Time Bokan 2000: Kaitou Kiramekiman",
|
|
"Time Bokan 24",
|
|
"Zombieland Saga Movie",
|
|
"Zoids: Chaotic Century",
|
|
"Zoids: Guardian Force",
|
|
"Queen's Blade: Rebellion",
|
|
"Queen's Blade: Gyokuza wo Tsugu Mono",
|
|
"Shen Bing Xiaojiang Movie",
|
|
"Kono Subarashii Sekai ni Bakuen wo!",
|
|
"Kono Subarashii Sekai ni Shukufuku wo! 2",
|
|
"Kono Subarashii Sekai ni Shukufuku wo! Movie: Kurenai Densetsu",
|
|
"Little Witch Academia: Mahoujikake no Parade",
|
|
"Gochuumon wa Usagi desu ka?? Dear My Sister",
|
|
"Break Blade Movie 3: Kyoujin no Ato",
|
|
"Saint\u2606Oniisan (Movie)",
|
|
"Bungou Stray Dogs: Dead Apple",
|
|
"Kidou Keisatsu Patlabor 2 the Movie",
|
|
"Quanzhi Gaoshou: Dianfeng Rongyao",
|
|
"Persona 3 the Movie 4: Winter of Rebirth",
|
|
"Luo Xiao Hei Zhan Ji (Movie)",
|
|
"Chuunibyou demo Koi ga Shitai! Movie: Take On Me",
|
|
"Mahou Shoujo Lyrical Nanoha: The Movie 2nd A's",
|
|
"Black Clover: Mahou Tei no Ken",
|
|
"Natsume Yuujinchou: Ishi Okoshi to Ayashiki Raihousha",
|
|
"Kyoukai no Kanata Movie 2: I'll Be Here - Mirai-hen",
|
|
"Doraemon Movie 31: Shin Nobita to Tetsujin Heidan - Habatake Tenshi-tachi",
|
|
"Stand By Me Doraemon 2",
|
|
"Berserk: Ougon Jidai-hen III - Kourin",
|
|
"K-On! Movie",
|
|
"Violet Evergarden Gaiden: Eien to Jidou Shuki Ningyou",
|
|
"Saenai Heroine no Sodatekata Fine",
|
|
"Yuru Camp\u25b3 Movie",
|
|
"The First Slam Dunk",
|
|
"Kaguya-sama wa Kokurasetai: First Kiss wa Owaranai",
|
|
"White Album"
|
|
|
|
# Similar synonyms
|
|
"Shi Er Shengxiao: Fuxing Gao Zhao Zhu Xiao Ba",
|
|
"Fuxing Ba Jie",
|
|
"Onigiri",
|
|
|
|
]
|
|
|
|
# Skip these entries if it's a movie AND contains one of these
|
|
skip_movie_entries = [
|
|
"Detective Conan",
|
|
"Naruto",
|
|
"Psycho-Pass",
|
|
"Girls & Panzer",
|
|
"Eureka Seven",
|
|
"Hamtarou",
|
|
"Initial D",
|
|
"Gundam",
|
|
"Kimetsu no Yaiba",
|
|
"Boku no Hero Academia",
|
|
"Bleach",
|
|
"Dragon Ball",
|
|
"Attack on Titan",
|
|
"Code Geass",
|
|
"Made in Abyss",
|
|
"One Piece",
|
|
"JoJo's Bizarre Adventure",
|
|
"YuYu Hakusho",
|
|
"Haikyu!!",
|
|
"Gintama",
|
|
"Hunter x Hunter",
|
|
"Fullmetal Alchemist",
|
|
"Mirai Shounen Conan",
|
|
"Pokemon",
|
|
"Pororo",
|
|
"Power Battle Watch Car",
|
|
"Precure",
|
|
"Sword Art Online",
|
|
"Sylvanian Families",
|
|
"Kino no Tabi",
|
|
"Gekijouban",
|
|
"Ginga Tetsudou",
|
|
"GHOST IN THE SHELL",
|
|
"Ghost in the Shell",
|
|
"Yu\u2606Gi\u2606Oh!",
|
|
"InuYasha",
|
|
"Lupin III",
|
|
"Hibike! Euphonium",
|
|
"Himitsu no Akko-chan",
|
|
"Himitsukessha Taka no Tsume",
|
|
"Hinomaru Hatanosuke",
|
|
"FLCL",
|
|
"Free!",
|
|
"Fate/Grand Order",
|
|
]
|
|
|
|
# Skip these entries if it's a TV and contains one of these:
|
|
skip_tv_entries = [
|
|
"Huo Xing Wa",
|
|
"Huoli Shaonian Wang",
|
|
"Huoxing Wa",
|
|
"Pocket Monsters XY",
|
|
"Pororo",
|
|
"Hime Chen",
|
|
"Himitsu no Akko-chan",
|
|
"Himitsukessha Taka no Tsume",
|
|
"Flowering Heart",
|
|
"Fu Guo",
|
|
"Fate/kaleid liner Prisma",
|
|
"Fei ",
|
|
"Gangtie Feilong",
|
|
"Kuaile ",
|
|
"Tianyan",
|
|
"Time Bokan Series",
|
|
"Lixian",
|
|
"Zhang ",
|
|
"Zhen ",
|
|
"Zhi ",
|
|
"Zui ",
|
|
"Zoids ",
|
|
"Zi ",
|
|
"Qi ",
|
|
"Quwei",
|
|
"Mengxiang",
|
|
"Xiao ",
|
|
"Xun",
|
|
"Liang",
|
|
"Xiaojiang",
|
|
"Shen ",
|
|
"Konglong",
|
|
"Xi ",
|
|
"Xiaolong",
|
|
"Xiaoxiong",
|
|
"Xiaoyuan",
|
|
"Xin ",
|
|
"Xing ",
|
|
"Xiaokang",
|
|
"Xiaohu",
|
|
"Xianggu",
|
|
"Wu ",
|
|
"Wudang"
|
|
]
|
|
|
|
# Skip enteries if it contains 'Season xx'
|
|
skip_seasons_entries = [
|
|
"Season 0",
|
|
"Season 2",
|
|
"Season 3",
|
|
"Season 4",
|
|
"Season 5",
|
|
"Season 6",
|
|
"Season 7",
|
|
"Season 8",
|
|
"Season 9",
|
|
"Season 10",
|
|
"Season 11",
|
|
"Season 12",
|
|
"Season 13",
|
|
"Season 14",
|
|
"Season 15",
|
|
"Season 16",
|
|
"Season 17",
|
|
"Season 18",
|
|
"Season 19",
|
|
"Season 20",
|
|
"season 2",
|
|
"season 3",
|
|
"season 4",
|
|
"season 5",
|
|
"season 6",
|
|
"season 7",
|
|
"season 8",
|
|
"season 9",
|
|
"2nd Season",
|
|
"3rd Season",
|
|
"4th Season",
|
|
"5th Season",
|
|
"6th Season",
|
|
"7th Season",
|
|
"8th Season",
|
|
"9th Season",
|
|
"10th Season",
|
|
"11th Season",
|
|
"Second Season",
|
|
"Third Season",
|
|
"Season II",
|
|
"Season III",
|
|
"Season Two",
|
|
"Part 2",
|
|
"Part 3",
|
|
"Part 4",
|
|
"Part 5",
|
|
"Part 6",
|
|
]
|
|
|
|
# Exclude from removal
|
|
exclude_from_removal = [
|
|
"Kara no Kyoukai Movie: Mirai Fukuin",
|
|
"White Album 2"
|
|
]
|
|
|
|
f = open('matched-anime-list.json')
|
|
data = json.load(f)
|
|
parsed = [] # list of parsed names
|
|
|
|
for i in data['data']:
|
|
|
|
# Only keep movies or TV shows
|
|
if i['type'] == 'MOVIE' or i['type'] == 'TV':
|
|
|
|
skip_loop = False
|
|
keep_entry = False
|
|
|
|
if i['title'] in exclude_from_removal:
|
|
keep_entry = True
|
|
|
|
# Remove extra unwanted entries if it's in the title
|
|
if i['title'] in remove_anime and not keep_entry:
|
|
continue
|
|
|
|
# Remove unwanted entries if it's in the title AND a movie
|
|
if i['type'] == 'MOVIE' and not keep_entry:
|
|
for movies in skip_movie_entries:
|
|
if movies in i['title']:
|
|
skip_loop = True
|
|
break
|
|
|
|
if skip_loop == True:
|
|
continue
|
|
|
|
# Remove unwanted entries if it's in the title AND a TV
|
|
if i['type'] == 'TV' and not keep_entry:
|
|
for tv in skip_tv_entries:
|
|
if tv in i['title']:
|
|
skip_loop = True
|
|
break
|
|
|
|
if skip_loop == True:
|
|
continue
|
|
|
|
# Remove unwanted if it's in the seasons
|
|
for seasons in skip_seasons_entries:
|
|
if seasons in i['title'] and not keep_entry:
|
|
skip_loop = True
|
|
break
|
|
|
|
if skip_loop == True:
|
|
continue
|
|
|
|
toss_based_on_synonym = False
|
|
|
|
# Cycle through the synonymns
|
|
new_synonyms = []
|
|
for j in i['synonyms']:
|
|
|
|
# Remove extra unwanted enteries if it's in the synonym
|
|
for seasons in skip_seasons_entries:
|
|
if seasons in j and not keep_entry:
|
|
toss_based_on_synonym = True
|
|
break
|
|
|
|
# Remove unwanted entries if it's a synonym AND a movie
|
|
if i['type'] == 'MOVIE' and not keep_entry:
|
|
for movies in skip_movie_entries:
|
|
if movies in j:
|
|
toss_based_on_synonym = True
|
|
break
|
|
|
|
if toss_based_on_synonym == True:
|
|
break
|
|
|
|
# Only keep synonyms that don't have unicode in them
|
|
if j.isascii():
|
|
new_synonyms.append(j)
|
|
|
|
if toss_based_on_synonym == True:
|
|
continue
|
|
|
|
i['synonyms'] = new_synonyms
|
|
parsed.append(i)
|
|
|
|
|
|
# Convert to dataframe for further parsing
|
|
df = pd.DataFrame(parsed)
|
|
df = df.drop(['sources', 'status', 'picture', 'thumbnail', 'relations', 'tags', 'episodes', 'animeSeason'], axis=1) # remove columns
|
|
|
|
# Outputs
|
|
df.reset_index().to_json(r'parsed-anime-list.json', orient='records', indent=2)
|
|
|
|
# Remove additional columns for mini version
|
|
df = df.drop(['type'], axis=1) # remove columns
|
|
print(df.count)
|
|
df.reset_index().to_json(r'parsed-anime-list-mini.json', orient='records') |