Files
my-webtoon/downloaders/kakao_webtoon.py
2025-05-11 18:58:43 +02:00

149 lines
5.8 KiB
Python

from pathlib import Path
import random
import sys
import time
from bs4 import BeautifulSoup
from httpx import AsyncClient, RequestError, HTTPStatusError
import httpx
import requests
from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT
from data.kakao_cookie import Cookie
from data.kakao_request import KakaoRequest
from data.special_list import KAKAO_TO_TW
from downloaders.decrypt import Decrypt
from downloaders.downloader import Downloader
class KakaoWebtoon(Downloader):
def __init__(self, webtoon_id: int, cookie: Cookie):
super().__init__(webtoon_id)
self._timestamp = int(time.time() * 1000)
chars = [*range(0x30, 0x3A), *range(0x61, 0x7B)]
self._nonce = "".join(chr(i) for i in random.choices(chars, k=10))
self.kakaoRequest = KakaoRequest(self._timestamp, self._nonce)
self.cookie = cookie
self.episode_headers = self.kakaoRequest.get_episode_headers(self.cookie.ant)
self.post_headers = self.kakaoRequest.get_post_headers(self.cookie.ant)
def verify_cookie(self) -> bool:
url = f"https://gateway.webtoon.kakao.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30"
res = requests.get(url, headers=self.episode_headers)
return res.status_code == 200
def _fetch_information(self, url):
res = requests.get(url, headers=self.episode_headers)
if res.status_code == 200:
soup = BeautifulSoup(res.content, 'html.parser')
description = soup.find('meta', attrs={'name': 'description'})
if description:
self.description = description.get('content')
self.description = ""
thumbnail_url = soup.find('meta', attrs={'property': 'og:image'})
if thumbnail_url:
self.thumbnail_url = thumbnail_url.get('content')
all_p = soup.find_all('p')
title = all_p[0].get_text()
if title in KAKAO_TO_TW:
self.title = KAKAO_TO_TW.get(title)
self.author = ""
self.tag = ""
else:
self.title = title
self.author = all_p[1].get_text()
self.tag = all_p[2].get_text()
self.thumbnail_name = self.webtoon_id + '.' + self.thumbnail_url.split('.')[-1]
def _fetch_episode_information(self):
offset = 0
limit = 30
is_last: bool = False
webtoon_episodes_data = []
while not is_last:
url = f"https://gateway-kw.kakao.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}"
res = requests.get(url, headers=self.episode_headers)
if res.status_code == 200:
json_data = res.json()
webtoon_episodes_data += json_data["data"]["episodes"]
offset += limit
is_last = json_data["meta"]["pagination"]["last"]
else:
print("_fetch_episode_information")
print(self.cookie.name)
print(res.status_code)
sys.exit()
episode_ids: list[int] = []
seo_ids: list[str] = []
numbers: list[int] = []
episode_titles: list[str] = []
readablities: list[bool] = []
for information in reversed(webtoon_episodes_data):
episode_ids.append(information["id"])
seo_ids.append(information["seoId"])
numbers.append(information["no"])
episode_titles.append(information["title"])
readablities.append(information["readable"])
self.episode_ids = episode_ids
self.seo_ids = seo_ids
self.episode_titles = episode_titles
self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True]
def _get_episode_image_urls(self, episode_index) -> list[tuple[str, bytes, bytes]] | None:
episode_id = self.episode_ids[episode_index]
url = f"https://gateway-kw.kakao.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources"
payload = self.kakaoRequest.get_payload(episode_id)
res = requests.post(url, headers=self.post_headers, json=payload)
data = res.json()["data"]
aid = data["media"]["aid"]
zid = data["media"]["zid"]
self.decrypt = Decrypt(aid, episode_id, self._timestamp, self._nonce, self.cookie.userID, zid)
key, iv = self.decrypt.get_decrypt_infomations()
return [(i["url"], key, iv) for i in data["media"]["files"]]
async def _download_image(
self,
episode_path: Path,
url: tuple[str, bytes, bytes],
image_no: int
) -> None:
real_url, key, iv = url
file_extension = 'webp'
file_name = f"{image_no:03d}.{file_extension}"
file_path = episode_path /file_name
try:
image_raw: bytes = (await self.client.get(real_url, headers=self.episode_headers)).content
except httpx.RequestError as e:
print(f"An error occurred while requesting {url}: {e}")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
except httpx.TimeoutException as e:
print(f"Timeout error occurred: {e}")
except httpx.UnsupportedProtocol as e:
print(f"Unsupported protocol error occurred: {e}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
except Exception as e:
print(f"Error get image_raw: {file_path}: {e}")
decrypted_data = self.decrypt._decrypt(image_raw, key, iv)
file_path.write_bytes(decrypted_data)
async def close(self):
await self.client.aclose()