自动下载toptoon指定漫画页面并转换为PDF文件

以下为脚本内容:

#!/bin/bash
#自动获取未下载的漫画地址并自动下载
Web_Path='/data/nginx/wwwroot/520ym_dh/TopToon'
Image_Path="${Web_Path}/Image"
TopToon_Weekly=`curl -s 'https://toptoon.com/weekly' |sed -n "s/^v.*jsonFileUrl.*=[[:space:]]'\(.*\)';.*/\1/p" |jq -r .[] |grep -v 'remakeComic'`
for List_Name in $TopToon_Weekly
do
	Index_Code=`curl -s $List_Name |jq -r ".adult|.[]|{id:.id,lastupdated:.lastUpdated.episodeId,updated:.lastUpdated.publishedAt,urld:.meta.comicsListUrl}"`
	Cartoon_Name_Id=`echo "$Index_Code" |jq -r .id`
	Cartoon_LastUpdateD=`echo "$Index_Code" |jq -r .lastupdated`
	Cartoon_Url=`echo "$Index_Code" |jq -r .urld `
	Num=0

	#echo "${Index_Code}"

	for Name_Id in $Cartoon_Name_Id
	do
		let Num++
		Cartoon_LastUpdateD_Id=`echo "${Cartoon_LastUpdateD}"|sed -n "$Num"p`
		Cartoon_UrlD=`echo "${Cartoon_Url}"|sed -ne "$Num"p |sed -n 's/ep_list/ep_view/p'`
		if [ ! -f "/data/nginx/wwwroot/520ym_dh/TopToon/Image/${Name_Id}/${Name_Id}_${Cartoon_LastUpdateD_Id}.pdf" ];then
			if [ "${Cartoon_LastUpdateD_Id}" != 'e0' ];then
				echo "https://toptoon.com${Cartoon_UrlD}/${Cartoon_LastUpdateD_Id}"  >> /tmp/$(date "+%Y_%m_%d")
			fi
		fi
	done
done
#请填写Cookie 不包含"Cookie:"
Cookie=''

Page_List=`cat /tmp/$(date "+%Y_%m_%d")`
for List_Url in $Page_List;do
	Url_Name_Id=`echo $List_Url |awk -F '/' '{print "/tmp/"$6"_"$7".txt"}'`
	Page_Code_D=`curl -s -H "cookie: ${Cookie}" $List_Url`
	if [ "${Page_Code_D}" = 'ERROR:505' ];then
		echo "账号已被封禁!$Url_Name_Id"
		break
	else
		echo "${Page_Code_D}" > "${Url_Name_Id}"
	fi
done
Page_List_File=`echo "$Page_List" |awk -F '/' '{print $6"_"$7}'`
for Page_File_Name in ${Page_List_File};do
	Page_Code=`cat "/tmp/${Page_File_Name}.txt"`
	#漫画ID获取
	Page_ComicIdx=`echo "${Page_Code}" |sed -n 's/.*comicIdx[[:space:]]=[[:space:]]\(.*\);/\1/p'`
	Page_Name=`echo "${Page_File_Name}"|sed -n 's/\(.*\)_\([0-9]\{1,5\}\)/\1/p'`
	Num=`echo "${Page_File_Name}"|sed -n 's/\(.*\)_\([0-9]\{1,5\}\)/\2/p'`
	if [ ! -d "${Image_Path}/${Page_Name}/${Num}" ];then
		mkdir -p "${Image_Path}/${Page_Name}/${Num}"
	fi

	if [ ! -f "${Image_Path}/${Page_Name}/${Page_Name}_${Num}.pdf" ];then
		#获取图片地址
		#Image_Down_Url=`echo "${Page_Code}" |sed -n -e "s/\&/\&/g" -e "s/.*c_img.*\(http.*\)'[[:space:]]c.*/\1/p"`
		Image_Down_Url=`echo "${Page_Code}" |sed -n -e "s/\&/\&/g" -e 's/.*c_img.*\(http.*\)"[[:space:]]c.*/\1/p'`
		if [ -z "${Image_Down_Url}" ];then
			Image_Down_Url=`echo "${Page_Code}" |sed -n -e "s/\&/\&/g" -e "s/.*c_img.*\(http.*\)'[[:space:]]c.*/\1/p"`
		fi
		Nums=1
		Image_All_Name=''
		for Image_Name in ${Image_Down_Url}
		do
			Image_Name_De=`echo "${Image_Name}"|sed -n 's/.*\/\(.*\)?.*/\1/p'`
			if [ "${Image_Name_De}" = "ep_content" ];then
				TempTime=`date +%s`
				TempTime_De=`echo "${TempTime}" |head -c5`
				Image_Name_De="${Page_ComicIdx}_${TempTime}.${TempTime_De}.jpg"
			fi
			Image_Down_Path="${Image_Path}/${Page_Name}/${Num}/${Nums}_${Image_Name_De}"
			wget --no-check-certificate -t 3 -nv -O "${Image_Down_Path}" "${Image_Name}"
			Image_All_Name="${Image_All_Name} ${Image_Down_Path}"
			let Nums++
		done
		convert ${Image_All_Name} "${Image_Path}/${Page_Name}/${Page_Name}_${Num}.pdf"
	fi
done

脚本执行方式为: bash 脚本名称 image 用户名 密码 漫画地址


该文章采用「CC 协议」,转载必须注明作者和本文链接.
分类: Linux