from pathlib import Path import random import sys import time from bs4 import BeautifulSoup from httpx import AsyncClient, RequestError, HTTPStatusError import httpx import requests from data.path_constant import DOWNLOAD_DIR, DOWNLOAD_LIST_TXT from data.kakao_cookie import Cookie from data.kakao_request import KakaoRequest from downloaders.decrypt import Decrypt from downloaders.downloader import Downloader class KakaoWebtoon(Downloader): def __init__(self, webtoon_id: int, cookie: Cookie): super().__init__(webtoon_id) self._timestamp = int(time.time() * 1000) chars = [*range(0x30, 0x3A), *range(0x61, 0x7B)] self._nonce = "".join(chr(i) for i in random.choices(chars, k=10)) self.kakaoRequest = KakaoRequest(self._timestamp, self._nonce) self.cookie = cookie self.episode_headers = self.kakaoRequest.get_episode_headers(self.cookie.ant) self.post_headers = self.kakaoRequest.get_post_headers(self.cookie.ant) def verify_cookie(self) -> bool: url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset=0&limit=30" res = requests.get(url, headers=self.episode_headers) return res.status_code == 200 def _fetch_information(self, url): res = requests.get(url, headers=self.episode_headers) if res.status_code == 200: soup = BeautifulSoup(res.content, 'html.parser') description = soup.find('meta', attrs={'name': 'description'}) if description: self.description = description.get('content') thumbnail_url = soup.find('meta', attrs={'property': 'og:image'}) if thumbnail_url: self.thumbnail_url = thumbnail_url.get('content') all_p = soup.find_all('p') self.title = all_p[0].get_text() self.author = all_p[1].get_text() self.tag = all_p[2].get_text() self.thumbnail_name = self.webtoon_id + '.' + self.thumbnail_url.split('.')[-1] def _fetch_episode_information(self): offset = 0 limit = 30 is_last: bool = False webtoon_episodes_data = [] while not is_last: url = f"https://gateway.tw.kakaowebtoon.com/episode/v2/views/content-home/contents/{self.webtoon_id}/episodes?sort=-NO&offset={offset}&limit={limit}" res = requests.get(url, headers=self.episode_headers) if res.status_code == 200: json_data = res.json() webtoon_episodes_data += json_data["data"]["episodes"] offset += limit is_last = json_data["meta"]["pagination"]["last"] else: print("_fetch_episode_information") print(self.cookie.name) print(res.status_code) sys.exit() episode_ids: list[int] = [] seo_ids: list[str] = [] numbers: list[int] = [] episode_titles: list[str] = [] readablities: list[bool] = [] for information in reversed(webtoon_episodes_data): episode_ids.append(information["id"]) seo_ids.append(information["seoId"]) numbers.append(information["no"]) episode_titles.append(information["title"]) readablities.append(information["readable"]) self.episode_ids = episode_ids self.seo_ids = seo_ids self.episode_titles = episode_titles self.readablities_index_list = [index for index, value in enumerate(readablities) if value == True] def _get_episode_image_urls(self, episode_index) -> list[tuple[str, bytes, bytes]] | None: episode_id = self.episode_ids[episode_index] url = f"https://gateway.tw.kakaowebtoon.com/episode/v1/views/viewer/episodes/{episode_id}/media-resources" payload = self.kakaoRequest.get_payload(episode_id) res = requests.post(url, headers=self.post_headers, json=payload) data = res.json()["data"] aid = data["media"]["aid"] zid = data["media"]["zid"] self.decrypt = Decrypt(aid, episode_id, self._timestamp, self._nonce, self.cookie.userID, zid) key, iv = self.decrypt.get_decrypt_infomations() return [(i["url"], key, iv) for i in data["media"]["files"]] async def _download_image( self, episode_path: Path, url: tuple[str, bytes, bytes], image_no: int ) -> None: real_url, key, iv = url file_extension = 'webp' file_name = f"{image_no:03d}.{file_extension}" file_path = episode_path /file_name try: image_raw: bytes = (await self.client.get(real_url, headers=self.episode_headers)).content except httpx.RequestError as e: print(f"An error occurred while requesting {url}: {e}") except httpx.HTTPStatusError as e: print(f"HTTP error occurred: {e}") except httpx.TimeoutException as e: print(f"Timeout error occurred: {e}") except httpx.UnsupportedProtocol as e: print(f"Unsupported protocol error occurred: {e}") except Exception as e: print(f"An unexpected error occurred: {e}") except Exception as e: print(f"Error get image_raw: {file_path}: {e}") decrypted_data = self.decrypt._decrypt(image_raw, key, iv) file_path.write_bytes(decrypted_data) async def close(self): await self.client.aclose()