Initial commit: Add webtoon downloader

This commit is contained in:
2024-12-19 13:58:12 +01:00
commit d5a73f342e
53 changed files with 1173 additions and 0 deletions

2
MyWebtoon.bat Normal file
View File

@@ -0,0 +1,2 @@
@echo off
python "%~dp0main.py" %*

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

0
converter/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

178
converter/converter.py Normal file
View File

@@ -0,0 +1,178 @@
import json
from pathlib import Path
import shutil
from PIL import Image
from data.path_constant import ANDROID_ASSETS, DOWNLOAD_DIR, NETWORK_DIR
class WebtoonConverter:
def __init__(self, webtoon_path: Path):
self.webtoon_path = webtoon_path
self.webtoon_path_network = NETWORK_DIR / webtoon_path.name
self.img_extensions = {'.png', '.jpg', '.jpeg', '.webp'}
def do_convert(self):
if self.webtoon_path.is_dir() and self.has_new_episode():
print(self.webtoon_path)
self.copy_information()
for item_path in self.webtoon_path.iterdir():
if item_path.is_dir():
episode_path = item_path
if self.is_new_episode(episode_path):
print(f"new episode: {episode_path}")
self.delete_over_width_image(episode_path)
self.concat_images(episode_path)
elif item_path.suffix.lower() in self.img_extensions:
thumbnail_path = item_path
self.copy_thumbnail(thumbnail_path)
def has_new_episode(self) -> bool:
self.webtoon_path_network.mkdir(parents=True, exist_ok=True)
return self._count_episodes(self.webtoon_path) > self._count_episodes(self.webtoon_path_network)
def _count_episodes(self, path: Path):
episodes = [d for d in path.iterdir() if d.is_dir()]
return len(episodes)
def copy_information(self):
info_path_local = self.webtoon_path / 'information.json'
info_path_network = self.webtoon_path_network / 'information.json'
self.updateTag(info_path_local)
copy_necessary = True
if info_path_network.exists():
with open(info_path_local, "r", encoding='utf-8') as json_file:
local_information = json.load(json_file)
with open(info_path_network, "r", encoding='utf-8') as json_file:
network_information = json.load(json_file)
if (
local_information["title"] == network_information["title"] and
local_information["author"] == network_information["author"] and
local_information["tag"] == network_information["tag"] and
local_information["description"] == network_information["description"] and
local_information["thumbnail_name"] == network_information["thumbnail_name"]
):
copy_necessary = False
if (copy_necessary):
try:
shutil.copyfile(info_path_local, info_path_network)
print(f"File '{info_path_local}' copied to '{info_path_network}'.")
except FileNotFoundError:
print(f"Source file '{info_path_local}' not found.")
def updateTag(self, path: Path):
update_necessary = False
if path.exists():
with open(path, "r", encoding='utf-8') as json_file:
existing_information = json.load(json_file)
tag = existing_information["tag"]
print(tag)
if '冒險' in tag and tag != '冒險':
tag = '冒險'
update_necessary = True
elif '愛情' in tag and tag != '愛情':
tag = '愛情'
update_necessary = True
elif 'BL' in tag and tag != 'BL':
tag = 'BL'
update_necessary = True
elif '武俠' in tag:
tag = '冒險'
update_necessary = True
elif '大人系' in tag:
tag = '愛情'
update_necessary = True
elif '驚悚' in tag:
tag = '劇情'
update_necessary = True
elif '奇幻' in tag:
tag = '劇情'
update_necessary = True
elif '宮廷' in tag:
tag = '劇情'
update_necessary = True
elif '懸疑' in tag:
tag = '劇情'
update_necessary = True
if update_necessary:
information = {
"title": existing_information["title"],
"author": existing_information["author"],
"tag": tag,
"description": existing_information["description"],
"thumbnail_name": existing_information["thumbnail_name"]
}
with open(path, 'w', encoding='utf-8') as json_file:
json.dump(information, json_file, ensure_ascii=False, indent=2)
print(f"{path} is saved.")
def copy_thumbnail(self, thumbnail_path: Path):
assets_path = ANDROID_ASSETS / thumbnail_path.name
if (not assets_path.exists()):
try:
shutil.copyfile(thumbnail_path, assets_path)
print(f"File '{thumbnail_path}' copied to '{assets_path}'.")
except FileNotFoundError:
print(f"Source file '{thumbnail_path}' not found.")
def delete_over_width_image(self, episode_path: Path):
for img_path in episode_path.iterdir():
if self._is_image_800(img_path):
img_path.unlink()
print(f"delete {img_path}")
def _is_image_800(self, image_path: Path) -> bool:
try:
with Image.open(image_path) as img:
return img.width >= 800
except Exception as e:
print(f"Error opening image {image_path}: {e}")
return False
def is_new_episode(self, episode_path: Path) -> bool:
episode_path_network = self.webtoon_path_network / episode_path.name
return not episode_path_network.exists()
def concat_images(self, episode_path: Path):
episode_path_network = self.webtoon_path_network / episode_path.name
episode_path_network.mkdir(parents=True, exist_ok=True)
result_images = []
total_height = 0
result_index = 1
for img_path in episode_path.iterdir():
if img_path.suffix.lower() in self.img_extensions:
with open(img_path, 'rb') as img_file:
img = Image.open(img_file)
img.load()
if total_height + img.height > 28800:
self.save_concatenated_image(result_images, episode_path_network, result_index)
result_index += 1
result_images = []
total_height = 0
result_images.append(img)
total_height += img.height
if result_images:
self.save_concatenated_image(result_images, episode_path_network, result_index)
def save_concatenated_image(self, images: list, output_episode_path: Path, index: int):
img_width = images[0].width # Assuming all images have the same width
total_height = sum(img.height for img in images)
result_image = Image.new('RGB', (img_width, total_height))
y_offset = 0
for img in images:
result_image.paste(img, (0, y_offset))
y_offset += img.height
output_file = output_episode_path / f"{index}.jpg"
print(f"saving '{output_file}'")
result_image.save(output_file)

0
data/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

25
data/kakao_cookie.py Normal file
View File

@@ -0,0 +1,25 @@
from dataclasses import dataclass
CLIENT_ID = 2155768539
@dataclass
class Cookie:
name: str
userID: str
ant: str
COOKIES = [
Cookie(name="ithi", userID="twnu7577d258564215", ant="MFivJ2uk0eyBd7G28D0_4WSk3QXdpHXxp1rkDaNXdCU~"),#ok
Cookie(name="ym", userID="twnu18c780bce30104", ant=""),
Cookie(name="83", userID="twnud41942de09830d", ant=""),
Cookie(name="bjl", userID="twnuf8429dee79c3d3", ant=""), #ok
Cookie(name="yy", userID="twnucbb3bdfce95b85", ant=""),
Cookie(name="hk", userID="twnuf622dd45e496ea", ant="ypc2JaDoKwfgghdheiFRCJvBjWid78M9djJooqOeMnY~"),
Cookie(name="aa", userID="twnuc0728a46c25738", ant=""), #ok
Cookie(name="bb", userID="twnu407ef7f1a046fd", ant="pSQPuFHTEVSztUuDcP4eboMqyY5La0Hb5JRWYILj1z8~"),
Cookie(name="wn", userID="twnu7322f207fb75ab", ant="4q3ArCVX_yx5fTq0kWWCanc60SXEnUU3QyuF0wys8Hc~")
]
COOKIE_NAME = 'ithi'
URL_TYPE = '1' # 1, 3, 7, m, p
TASK_TYPE = 'dc' # d, c

42
data/kakao_request.py Normal file
View File

@@ -0,0 +1,42 @@
from data.kakao_cookie import CLIENT_ID
class KakaoRequest:
def __init__(self, timestamp, nonce):
self.client_id = CLIENT_ID
self.timestamp = timestamp
self.nonce = nonce
self.app_id = f"KP.{self.client_id}.{self.timestamp + 1}"
def get_episode_headers(self, ant):
return {
"Accept": "application/json, text/plain, */*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zht",
"Cache-Control": "no-cache",
"Cookie": f"theme=dark; _kp_collector={self.app_id}; atn={ant}",
"Dnt": "1",
"Origin": "https://tw.kakaowebtoon.com",
"Pragma": "no-cache",
"Referer": "https://tw.kakaowebtoon.com/",
"Sec-Ch-Ua": '"Not A(Brand";v="99", "Microsoft Edge";v="121", "Chromium";v="121"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Sec-Gpc": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
}
def get_post_headers(self, ant):
return self.get_episode_headers(ant) | {"Content-Type": "application/json;charset=UTF-8"}
def get_payload(self, episode_id):
return {
"id": episode_id,
"type": "AES_CBC_WEBP",
"nonce": self.nonce,
"timestamp": str(self.timestamp),
"download": False,
"webAppId": self.app_id,
}

11
data/path_constant.py Normal file
View File

@@ -0,0 +1,11 @@
from pathlib import Path
DOWNLOAD_DIR = Path('E:/') / 'Webtoon'
NETWORK_DIR = Path('//TRUENAS') / 'Media' / 'Webtoon'
TEMP_DOWNLOAD_DIR = Path('E:/') / 'Temp_Webtoon'
DOWNLOAD_LIST_TXT = Path(DOWNLOAD_DIR) / 'download.txt'
TEMP_DOWNLOAD_LIST_TXT = Path(TEMP_DOWNLOAD_DIR) / 'download_kakao.txt'
ANDROID_ASSETS = Path('E:/') / 'Projects' / 'AndroidStudioProjects' / 'WebtoonViewer' / 'app' / 'src' / 'main' / 'assets'

62
data/special_list.py Normal file
View File

@@ -0,0 +1,62 @@
WEBTOON_NOT_PROCESSED = [
'陷阱', # 完结
'8級魔法師再臨', # 完结
'婚姻這門生意[18+]', # 完结
'守護女主角哥哥的方法', # KakaoTW完结
'轉生後變成天才', # KakaoTW完结
'兩個繼承人', # KakaoTW完结
'患上不出道就會死的病', # KakaoTW完结
'無法品味的男人', # KakaoTW完结
'摘下偽善男主角的面具', # KakaoTW完结
'皇家婚姻', # KakaoTW完结
'鐵血家族獵犬的重生', # KakaoTW完结
'重生百次的最強玩家', # KakaoTW完结
'我獨自升級', # KakaoTW完结
'結局創造者', # 停更
'黑影之夜', # 季休
'狂魔重生記', # 季休
'在魔法學院偽裝教師', # 季休
'兔子與黑豹的共生關係', # 付费 / 季休
'成為家人的方法', # YDS下载后续
]
WEBTOON_18_BONUS = [
'婚姻這門生意[18+]'
]
KAKAO_ONLY_MAIN_ACCOUNT = [
'152', # 骷髏士兵卷土重來
'167', # 試著改變故事類型吧 P
'222', # 成為我筆下男主角的妻子
'247', # 領主夫人罷工中
'322', # 婚姻這門生意 P
'330', # 同情的形態 P
'399', # 噬魔法師
'424', # 地下城見聞錄
'587', # Pickmeup
'591', # 武當奇俠
'736', # Boss大人請振作
'784', # 永遠的謊言
'787', # 魔法師的校園生存法則
'862', # 符文之子
]
KAKAO_1 = [
'41'
]
KAKAO_3 = [
'303', # 天才詐欺犯的雙重身分
]
KAKAO_7 = [
'41', # 反派角色只有死亡結局
'116', # 惡女重生
'200', # 暴君就該配惡女
'233', # 少女賭神愛黛兒
]
KAKAO_PAY = [
'230', # 兔子與黑豹的共生關係
'516', # 結局創造者
]

39
data/webtoon_request.py Normal file
View File

@@ -0,0 +1,39 @@
def get_webtoon_headers():
return {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
"Referer": "http://www.webtoons.com",
"Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
"Sec-Ch-Ua-Arch": '"x86"',
"Sec-Ch-Ua-Bitness": '"64"',
"Sec-Ch-Ua-Full-Version-List": '"Not_A Brand";v="8.0.0.0", "Chromium";v="120.0.6099.130", "Google Chrome";v="120.0.6099.130"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Model": '""',
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Ch-Ua-Platform-Version": '"15.0.0"',
"Sec-Ch-Ua-Wow64": "?0",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
def get_bomtoon_headers():
return {
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7,zh-CN;q=0.6,zh;q=0.5",
"Priority": "i",
"Referer": "https://www.bomtoon.tw/",
"Sec-Ch-Ua": '"Not_A Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
"Sec-Ch-Ua-Mobile": "?0",
"Sec-Ch-Ua-Platform": '"Windows"',
"Sec-Fetch-Dest": "image",
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Site": "cross-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
}

0
downloaders/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

62
downloaders/bomtoon.py Normal file
View File

@@ -0,0 +1,62 @@
from pathlib import Path
from typing import TYPE_CHECKING
from bs4 import BeautifulSoup
import httpx
import requests
from data.path_constant import DOWNLOAD_DIR
from data.webtoon_request import get_bomtoon_headers
from downloaders.downloader import Downloader
class Bomtoon(Downloader):
def __init__(self, webtoon_id):
super().__init__(webtoon_id)
self.headers = get_bomtoon_headers()
def _fetch_information(self, url):
res = requests.get(url, headers=self.headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
title = soup.find('title')
if title:
self.title = title.get_text().split('-')[0].strip()
author = soup.find('meta', attrs={'name': 'author'})
if author:
self.author = author.get('content')
description = soup.find('meta', attrs={'property': 'og:description'})
if description:
self.description = description.get('content')
tags = soup.find('meta', attrs={'name': 'keywords'})
if tags:
tags_list = tags.get('content').split(',')
if '連載' in tags_list[0]:
self.tag = tags_list[1]
else:
self.tag = tags_list[0]
self.thumbnail_url = ""
self.thumbnail_name = self.webtoon_id + '.jpg'
else:
print(f"fetch_information: {res.status_code}")
def _fetch_episode_information(self):
pass
def _get_episode_image_urls(self, episode_index) -> list[str]:
pass
async def _download_image(
self,
episode_path: Path,
url: str,
image_no: int
) -> None:
pass

43
downloaders/decrypt.py Normal file
View File

@@ -0,0 +1,43 @@
import base64
import hashlib
from contextlib import suppress
from WebtoonScraper.exceptions import MissingOptionalDependencyError
class Decrypt :
def __init__(self, aid, episodeId, timestamp, nonce, userId, zid):
self._aid = aid
self._episodeId = episodeId
self._timestamp = timestamp
self._nonce = nonce
self._userId = userId
self._zid = zid
@classmethod
def get_aes(cls):
with suppress(AttributeError):
return cls.AES
try:
from Cryptodome.Cipher import AES
except ImportError:
raise ImportError("Missing optional dependency 'pycryptodomex'. Please install it to use this functionality.")
cls.AES = AES
return cls.AES
@classmethod
def _decrypt(cls, data: bytes, key: bytes, iv: bytes) -> bytes:
AES = cls.get_aes()
cipher = AES.new(key, AES.MODE_CBC, iv)
return cipher.decrypt(data)
def get_decrypt_infomations(self) -> tuple[bytes, bytes]:
temp_key = hashlib.sha256(f"{self._userId}{self._episodeId}{self._timestamp}".encode()).digest()
temp_iv = hashlib.sha256(f"{self._nonce}{self._timestamp}".encode()).digest()[:16]
encrypted_key = base64.b64decode(self._aid)
encrypted_iv = base64.b64decode(self._zid)
key = self._decrypt(encrypted_key, temp_key, temp_iv)[:16]
iv = self._decrypt(encrypted_iv, temp_key, temp_iv)[:16]
return key, iv

168
downloaders/downloader.py Normal file
View File

@@ -0,0 +1,168 @@
import asyncio
import html
import json
from pathlib import Path
import pyfilename as pf
import shutil
import time
from httpx import AsyncClient
import requests
from data.special_list import WEBTOON_18_BONUS
class Downloader:
def __init__(self, webtoon_id: any) -> None:
self.webtoon_id = webtoon_id
self.client = AsyncClient()
self.lately_downloaded_episode: list[Path] = []
self.new_webtoon = ""
def download_webtoon(self, url, path:Path) -> None:
self._fetch_information(url)
self.webtoon_path = path / self.title
self.webtoon_path.mkdir(parents=True, exist_ok=True)
self._save_information()
if self.thumbnail_url != "":
self._download_thumbnail()
self._fetch_episode_information()
unobtained_episodes = self._get_unobtained_episodes()
if len(unobtained_episodes) > 0:
self.new_webtoon = self.title
try:
asyncio.run(
self._download_episodes(unobtained_episodes)
)
except Exception as e:
print(f"Error _download_episodes: {e}")
def _fetch_information(self, url) -> None:
pass
def _save_information(self) -> None:
information_path = self.webtoon_path / 'information.json'
save_necessary = True
if information_path.exists():
with open(information_path, "r", encoding='utf-8') as json_file:
existing_information = json.load(json_file)
if (
existing_information["title"] == self.title and
existing_information["author"] == self.author and
existing_information["description"] == self.description and
existing_information["thumbnail_name"] == self.thumbnail_name
):
save_necessary = False
if (save_necessary):
information = {
"title": self.title,
"author": self.author,
"tag": self.tag,
"description": self.description,
"thumbnail_name": self.thumbnail_name
}
with open(information_path, 'w', encoding='utf-8') as json_file:
json.dump(information, json_file, ensure_ascii=False, indent=2)
print(f"{information_path} is saved.")
def _download_thumbnail(self) -> None:
thumbnail_path = self.webtoon_path / self.thumbnail_name
if not thumbnail_path.exists():
response = requests.get(self.thumbnail_url)
if response.status_code == 200:
image_raw = response.content
thumbnail_path.write_bytes(image_raw)
print(f"{thumbnail_path} is saved.")
else:
print(response.status_code)
def _fetch_episode_information(self) -> None:
pass
def _get_unobtained_episodes(self) -> list[int]:
downloaded_episodes = []
for dir in self.webtoon_path.glob('*'):
if dir.is_dir():
downloaded_episodes.append(int(dir.name.split('.')[0]))
if self.title in WEBTOON_18_BONUS:
count = len(self.readablities_index_list) - len(downloaded_episodes)
if count > 0:
episodes = self.readablities_index_list[-count:]
else :
diffrence = set(self.readablities_index_list) - set(downloaded_episodes)
episodes = list(diffrence)
print(f"{self.title} unobtained episodes: {episodes}")
return episodes
async def _download_episodes(self, episode_index_list: list[int]) -> None:
async with self.client:
for episode_index in episode_index_list:
episode_name = self.episode_titles[episode_index]
episode_title = self._get_safe_file_name(episode_index, episode_name)
# episode_title = self._get_safe_file_name(f"{episode_index}.{self.episode_titles[episode_index]}")
print(episode_title)
episode_path = self.webtoon_path / episode_title
episode_path.mkdir(parents=True, exist_ok=True)
time.sleep(2)
is_download_sucessful = await self._download_episode(episode_index, episode_path)
if is_download_sucessful:
self.lately_downloaded_episode.append(episode_path)
print(f"Download {self.episode_titles[episode_index]} sucessful.")
else:
print(f"Error _download_episode: {self.episode_titles[episode_index]}")
break
async def _download_episode(self, episode_index: int, episode_path: Path) -> bool:
episode_images_url = self._get_episode_image_urls(episode_index)
if not episode_images_url:
print(f"Failed get image url for: {episode_path}")
return False
try:
await asyncio.gather(
*(
self._download_image(episode_path, element, i)
for i, element in enumerate(episode_images_url)
)
)
except Exception as e:
shutil.rmtree(episode_path)
raise
return True
def _get_episode_image_urls(self, episode_index: int) -> list[str] | None:
pass
async def _download_image(self, episode_path: Path, url: str, image_no: int) -> None:
pass
def _get_safe_file_name(self, episode_index: int, episode_name: str) -> str:
if self.title == '全知讀者視角':
episode_name = f"Ep{episode_name.split('.')[2]}"
episode_name = episode_name.replace("", " (")
episode_name = episode_name.replace("", ")")
elif self.title == '怪力亂神':
episode_name = episode_name.replace('話. ', '')
episode_title = f"{episode_index}.{episode_name}"
return pf.convert(html.unescape(episode_title))

View File

@@ -0,0 +1,141 @@
from pathlib import Path
import random
import sys
import time
from bs4 import BeautifulSoup
from httpx import AsyncClient, RequestError, HTTPStatusError
import httpx
import requests
from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT
from data.kakao_cookie import Cookie
from data.kakao_request import KakaoRequest
from downloaders.decrypt import Decrypt
from downloaders.downloader import Downloader
class KakaoWebtoon(Downloader):
def __init__(self, webtoon_id: int, cookie: Cookie):
super().__init__(webtoon_id)
self._timestamp = int(time.time() * 1000)
chars = [*range(0x30, 0x3A), *range(0x61, 0x7B)]
self._nonce = "".join(chr(i) for i in random.choices(chars, k=10))
self.kakaoRequest = KakaoRequest(self._timestamp, self._nonce)
self.cookie = cookie
self.episode_headers = self.kakaoRequest.get_episode_headers(self.cookie.ant)
self.post_headers = self.kakaoRequest.get_post_headers(self.cookie.ant)
def verify_cookie(self) -> bool:
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30"
res = requests.get(url, headers=self.episode_headers)
return res.status_code == 200
def _fetch_information(self, url):
res = requests.get(url, headers=self.episode_headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
description = soup.find('meta', attrs={'name': 'description'})
if description:
self.description = description.get('content')
thumbnail_url = soup.find('meta', attrs={'property': 'og:image'})
if thumbnail_url:
self.thumbnail_url = thumbnail_url.get('content')
all_p = soup.find_all('p')
self.title = all_p[0].get_text()
self.author = all_p[1].get_text()
self.tag = all_p[2].get_text()
self.thumbnail_name = self.webtoon_id + '.' + self.thumbnail_url.split('.')[-1]
def _fetch_episode_information(self):
offset = 0
limit = 30
is_last: bool = False
webtoon_episodes_data = []
while not is_last:
url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}"
res = requests.get(url, headers=self.episode_headers)
if res.status_code == 200:
json_data = res.json()
webtoon_episodes_data += json_data["data"]["episodes"]
offset += limit
is_last = json_data["meta"]["pagination"]["last"]
else:
print("_fetch_episode_information")
print(self.cookie.name)
print(res.status_code)
sys.exit()
episode_ids: list[int] = []
seo_ids: list[str] = []
numbers: list[int] = []
episode_titles: list[str] = []
readablities: list[bool] = []
for information in reversed(webtoon_episodes_data):
episode_ids.append(information["id"])
seo_ids.append(information["seoId"])
numbers.append(information["no"])
episode_titles.append(information["title"])
readablities.append(information["readable"])
self.episode_ids = episode_ids
self.seo_ids = seo_ids
self.episode_titles = episode_titles
self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True]
def _get_episode_image_urls(self, episode_index) -> list[tuple[str, bytes, bytes]] | None:
episode_id = self.episode_ids[episode_index]
url = f"https://gateway.tw.kakaowebtoon.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources"
payload = self.kakaoRequest.get_payload(episode_id)
res = requests.post(url, headers=self.post_headers, json=payload)
data = res.json()["data"]
aid = data["media"]["aid"]
zid = data["media"]["zid"]
self.decrypt = Decrypt(aid, episode_id, self._timestamp, self._nonce, self.cookie.userID, zid)
key, iv = self.decrypt.get_decrypt_infomations()
return [(i["url"], key, iv) for i in data["media"]["files"]]
async def _download_image(
self,
episode_path: Path,
url: tuple[str, bytes, bytes],
image_no: int
) -> None:
real_url, key, iv = url
file_extension = 'webp'
file_name = f"{image_no:03d}.{file_extension}"
file_path = episode_path /file_name
try:
image_raw: bytes = (await self.client.get(real_url, headers=self.episode_headers)).content
except httpx.RequestError as e:
print(f"An error occurred while requesting {url}: {e}")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
except httpx.TimeoutException as e:
print(f"Timeout error occurred: {e}")
except httpx.UnsupportedProtocol as e:
print(f"Unsupported protocol error occurred: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
except Exception as e:
print(f"Error get image_raw: {file_path}: {e}")
decrypted_data = self.decrypt._decrypt(image_raw, key, iv)
file_path.write_bytes(decrypted_data)
async def close(self):
await self.client.aclose()

122
downloaders/webtoon_com.py Normal file
View File

@@ -0,0 +1,122 @@
from pathlib import Path
import time
from typing import TYPE_CHECKING
from bs4 import BeautifulSoup
from httpx import AsyncClient, HTTPStatusError, RequestError
import httpx
import requests
from data.path_constant import DOWNLOAD_DIR
from data.webtoon_request import get_webtoon_headers
from downloaders.downloader import Downloader
class Webtoon(Downloader):
def __init__(self, webtoon_id: int):
super().__init__(webtoon_id)
self.headers = get_webtoon_headers()
self.base_url = "https://www.webtoons.com/en/action/jungle-juice"
def _fetch_information(self, url):
res = requests.get(url, headers=self.headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
title = soup.find('meta', attrs={'property': 'og:title'})
if title:
self.title = title.get('content')
description = soup.find('meta', attrs={'property': 'og:description'})
if description:
self.description = description.get('content')
thumbnail_url = soup.find('meta', attrs={'property': 'og:image'})
if thumbnail_url:
self.thumbnail_url = thumbnail_url.get('content')
author_list = soup.find_all('h3')
h3_texts = [h3.get_text().strip() for h3 in author_list]
author = ', '.join(h3_texts)
tag = soup.find('h2', class_='genre').get_text()
self.author = author
self.tag = tag
seo_id = url.split('/')[-2]
thumbnail_type = 'png' if 'png' in self.thumbnail_url else 'jpg'
self.thumbnail_name = seo_id + '.' + thumbnail_type
self.latest_title_no = soup.find('li', class_='_episodeItem').get('data-episode-no')
else:
print(f"fetch_information: {res.status_code}")
def _fetch_episode_information(self):
url = f"{self.base_url}/prologue/viewer?title_no={self.webtoon_id}&episode_no={self.latest_title_no}"
res = requests.get(url, headers=self.headers)
if res.status_code == 200:
self.episode_titles = []
soup = BeautifulSoup(res.content, 'html.parser')
li_tags = soup.find('div', class_='episode_cont').find_all('li', attrs={'data-episode-no': True})
self.episode_titles = [li.find('span', class_='subj').get_text() for li in li_tags if li.find('span', class_='subj')]
self.episode_urls = [li.find('a')['href'] for li in li_tags]
self.episode_ids = [int(li.get('data-episode-no')) for li in li_tags] # start with 1, not index
self.readablities_index_list = [id - 1 for id in self.episode_ids]
else:
print(f"fetch_episode_information: {res.status_code}")
def _get_episode_image_urls(self, episode_index) -> list[str]:
#url = self.episode_urls[episode_index]
episode_id = self.episode_ids[episode_index]
url = f"{self.base_url}/prologue/viewer?title_no={self.webtoon_id}&episode_no={episode_id}"
episode_image_urls = []
res = requests.get(url, headers=self.headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
img_tags = soup.select("#_imageList > img")
episode_image_urls = [element["data-url"] for element in img_tags]
if TYPE_CHECKING:
episode_image_urls = [
episode_image_url for episode_image_url in episode_image_urls if isinstance(episode_image_url, str)
]
else:
print(f"get_episode_image_urls: {res.status_code}")
return episode_image_urls
async def _download_image(
self,
episode_path: Path,
url: str,
image_no: int
) -> None:
file_extension = 'jpg'
file_name = f"{image_no:03d}.{file_extension}"
file_path = episode_path /file_name
try:
response = await self.client.get(url, headers=self.headers)
response.raise_for_status() # Raises HTTPStatusError for 4xx/5xx responses
image_raw: bytes = response.content
except httpx.RequestError as e:
print(f"An error occurred while requesting {url}: {e}")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
except httpx.TimeoutException as e:
print(f"Timeout error occurred: {e}")
except httpx.UnsupportedProtocol as e:
print(f"Unsupported protocol error occurred: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
# try:
# image_raw: bytes = (await self.client.get(url, headers=self.headers)).content
# except Exception as e:
# print(f"Error get image_raw: {file_path}: {e}")
file_path.write_bytes(image_raw)

23
helper.py Normal file
View File

@@ -0,0 +1,23 @@
from pathlib import Path
from data.path_constant import DOWNLOAD_DIR, NETWORK_DIR, TEMP_DOWNLOAD_DIR
from helper.missing_episode import get_missing_episodes
from helper.missing_images import get_missing_images, resize_and_overwrite
from prerequisite import delete_all_empty_episodes
# delete_all_empty_episodes(DOWNLOAD_DIR)
# delete_all_empty_episodes(NETWORK_DIR)
get_missing_episodes(DOWNLOAD_DIR)
get_missing_images()
# episode_path = Path(DOWNLOAD_DIR) / '結局創造者' / '0.第1話'
# images_path = []
# for i in range(11, 29+1):
# file_name = '0' + str(i) + '.webp'
# path = Path(episode_path) / file_name
# images_path.append(path)
# for path in images_path:
# resize_and_overwrite(path, 720)

0
helper/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

18
helper/missing_episode.py Normal file
View File

@@ -0,0 +1,18 @@
from pathlib import Path
def get_missing_episodes(path: Path):
for first_level_path in path.iterdir():
if first_level_path.is_dir():
episodes = []
missing_episodes = []
for second_level_path in first_level_path.iterdir():
if second_level_path.is_dir():
episodes.append(second_level_path.name.split('.')[0])
sorted_episodes = sorted(episodes, key=int)
max_index = int(sorted_episodes[-1])
for i in range(0, max_index):
if str(i) not in episodes:
missing_episodes.append(i)
if len(missing_episodes) > 0:
print(first_level_path.name)
print(missing_episodes)

44
helper/missing_images.py Normal file
View File

@@ -0,0 +1,44 @@
from PIL import Image
from data.path_constant import DOWNLOAD_DIR
def get_missing_images():
for first_level_path in DOWNLOAD_DIR.iterdir():
if first_level_path.is_dir():
for second_level_path in first_level_path.iterdir():
if second_level_path.is_dir():
images = []
missing_images = []
for third_level_path in second_level_path.iterdir():
images.append(int(third_level_path.name.split('.')[0]))
sorted_images = sorted(images, key=int)
max_index = int(sorted_images[-1])
for i in range(2, max_index):
if i not in images:
missing_images.append(i)
if len(missing_images) > 0:
print(first_level_path.name)
print(second_level_path.name)
print(missing_images)
def resize_and_overwrite(input_path, target_width):
# 打开原始webp图像
with Image.open(input_path) as img:
# 获取原始尺寸
width, height = img.size
# 计算缩放后的高度,保持宽高比
target_height = int((target_width / width) * height)
# 调整图像尺寸并保持比例
resized_img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
# 直接覆盖原始文件保存为webp格式
resized_img.save(input_path, "WEBP")
# 使用方法示例
# input_webp = "input_image.webp"
# target_width = 720 # 设置目标宽度为720像素
# resize_and_overwrite(input_webp, target_width)

99
main.py Normal file
View File

@@ -0,0 +1,99 @@
import argparse
from converter.converter import WebtoonConverter
from data.kakao_cookie import COOKIE_NAME, COOKIES, TASK_TYPE, URL_TYPE
from data.special_list import KAKAO_1, KAKAO_3, KAKAO_7, KAKAO_PAY, WEBTOON_NOT_PROCESSED, KAKAO_ONLY_MAIN_ACCOUNT
from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT
from downloaders.bomtoon import Bomtoon
from downloaders.kakao_webtoon import KakaoWebtoon
from prerequisite import get_download_list
from downloaders.webtoon_com import Webtoon
DOWNLOAD_WEBTOON = True
CONVERT_ALL = False
valid_cookies = []
new_webtoons = []
def set_valid_cookie():
global valid_cookies
for cookie in COOKIES:
if cookie.name == COOKIE_NAME:
print(cookie.name)
valid_cookies.append(cookie)
def get_kakao_urls(inputs):
result = []
if '1' in inputs:
result += KAKAO_1
if '3' in inputs:
result += KAKAO_3
if '7' in inputs:
result += KAKAO_7
if 'm' in inputs:
result += KAKAO_ONLY_MAIN_ACCOUNT
if 'p' in inputs:
result += KAKAO_PAY
return result
def download():
if len(valid_cookies) > 0:
url_list = get_download_list(DOWNLOAD_LIST_TXT)
for url in url_list:
webtoon = None
if 'tw.kakaowebtoon.com' in url:
webtoon_id = url.split('/')[-1]
for cookie in valid_cookies:
if webtoon_id in get_kakao_urls(URL_TYPE):
webtoon = KakaoWebtoon(webtoon_id, cookie)
webtoon.download_webtoon(url, DOWNLOAD_DIR)
elif DOWNLOAD_WEBTOON and 'www.webtoons.com' in url:
webtoon_id = url.split('=')[1]
webtoon = Webtoon(webtoon_id)
webtoon.download_webtoon(url, DOWNLOAD_DIR)
elif 'www.bomtoon.tw' in url:
webtoon_id = url.split('/')[-1]
webtoon = Bomtoon(webtoon_id)
webtoon.download_webtoon(url, DOWNLOAD_DIR)
if webtoon is not None and webtoon.new_webtoon != "":
new_webtoons.append(webtoon.new_webtoon)
print(new_webtoons)
def convert():
for webtoon_path in DOWNLOAD_DIR.iterdir():
if len(new_webtoons) > 0:
if webtoon_path.is_dir() and webtoon_path.name in new_webtoons:
print(webtoon_path)
converter = WebtoonConverter(webtoon_path)
converter.do_convert()
elif webtoon_path.is_dir() and CONVERT_ALL and webtoon_path.name not in WEBTOON_NOT_PROCESSED:
print(webtoon_path)
converter = WebtoonConverter(webtoon_path)
converter.do_convert()
def main():
parser = argparse.ArgumentParser(description="Run download or convert")
parser.add_argument('function', nargs='?', choices=['download', 'convert'], help="Function to run")
args = parser.parse_args()
if args.function == 'download':
download()
elif args.function == 'convert':
convert()
else:
download()
convert()
if __name__ == "__main__":
set_valid_cookie()
task = TASK_TYPE
if 'd' in task:
download()
if 'c' in task:
convert()
print('MyWebtoon')

62
prerequisite.py Normal file
View File

@@ -0,0 +1,62 @@
import os
from pathlib import Path
import shutil
from data.path_constant import DOWNLOAD_DIR, NETWORK_DIR
# input: DOWNLOAD_DIR or NETWORK_DIR
def delete_all_empty_episodes(path: Path):
for first_level_path in path.iterdir():
if first_level_path.is_dir():
for second_level_path in first_level_path.iterdir():
if second_level_path.is_dir() and not any(second_level_path.iterdir()):
print(f"Deleting directory: {second_level_path}")
shutil.rmtree(second_level_path)
print(f"Empty directory '{second_level_path}' deleted successfully.")
def delete_all_webtoons_without_episodes():
for first_level_path in NETWORK_DIR.iterdir():
if first_level_path.is_dir():
contains_dir = any(item.is_dir() for item in first_level_path.iterdir())
if not contains_dir:
# No subdirectories, safe to delete
print(f"Deleting directory: {first_level_path}")
shutil.rmtree(first_level_path)
print(f"Directory '{first_level_path}' deleted successfully.")
def get_download_list(path: Path):
url_list = []
try:
with open(path, 'r') as file:
for url in file:
if 'https://' in url:
url_list.append(url.strip())
except FileNotFoundError:
print(f"The file at {path} was not found.")
except Exception as e:
print(f"An error occurred: {e}")
return url_list
def rename_episodes(path: Path):
for first_level_path in path.iterdir():
if first_level_path.is_dir():
for second_level_path in first_level_path.iterdir():
if second_level_path.is_dir() and '. ' in second_level_path.name:
print(second_level_path)
newName = second_level_path.name.replace(". ", ".")
new_path = first_level_path / newName
os.rename(second_level_path, new_path)
if second_level_path.is_dir() and '' in second_level_path.name:
print(second_level_path)
newName = second_level_path.name.replace("", " (")
newName = newName.replace("", ")")
new_path = first_level_path / newName
os.rename(second_level_path, new_path)
def get_episodes_with_wrong_name(path: Path):
for first_level_path in path.iterdir():
if first_level_path.is_dir():
for second_level_path in first_level_path.iterdir():
if second_level_path.is_dir() and len(second_level_path.name.split('.')) > 2:
print(second_level_path)

17
rename.py Normal file
View File

@@ -0,0 +1,17 @@
import os
from data.path_constant import DOWNLOAD_DIR, NETWORK_DIR
from prerequisite import rename_episodes
rename_episodes(DOWNLOAD_DIR)
rename_episodes(NETWORK_DIR)
for first_level_path in NETWORK_DIR.iterdir():
if first_level_path.name == '怪力亂神':
for second_level_path in first_level_path.iterdir():
if "話." in second_level_path.name:
episode_name = second_level_path.name.replace("話.", "")
new_path = first_level_path / episode_name
print(second_level_path)
print(new_path)
os.rename(second_level_path, new_path)

15
rename_drawable.py Normal file
View File

@@ -0,0 +1,15 @@
import os
from pathlib import Path
res = Path('E:/') / 'Projects' / 'AndroidStudioProjects' / 'WebtoonViewer' / 'app' / 'src' / 'main' / 'res'
for folder_path in res.iterdir():
if "drawable" in folder_path.name and folder_path.is_dir():
for file_path in folder_path.iterdir():
if "menu-dots-vertical" in file_path.name:
new_path = folder_path / ("menu_" + file_path.name.split("_")[1])
# if new_path.exists:
# os.remove(new_path)
os.rename(file_path, new_path)