385 lines
17 KiB
Python
385 lines
17 KiB
Python
import pickle
|
||
import traceback
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from datetime import datetime
|
||
from typing import Dict
|
||
from typing import List, Optional
|
||
|
||
from app.chain import ChainBase
|
||
from app.core.context import Context
|
||
from app.core.context import MediaInfo, TorrentInfo
|
||
from app.core.event import eventmanager, Event
|
||
from app.core.metainfo import MetaInfo
|
||
from app.db.systemconfig_oper import SystemConfigOper
|
||
from app.helper.progress import ProgressHelper
|
||
from app.helper.sites import SitesHelper
|
||
from app.helper.torrent import TorrentHelper
|
||
from app.log import logger
|
||
from app.schemas import NotExistMediaInfo
|
||
from app.schemas.types import MediaType, ProgressKey, SystemConfigKey, EventType
|
||
|
||
|
||
class SearchChain(ChainBase):
|
||
"""
|
||
站点资源搜索处理链
|
||
"""
|
||
|
||
def __init__(self):
|
||
super().__init__()
|
||
self.siteshelper = SitesHelper()
|
||
self.progress = ProgressHelper()
|
||
self.systemconfig = SystemConfigOper()
|
||
self.torrenthelper = TorrentHelper()
|
||
|
||
def search_by_id(self, tmdbid: int = None, doubanid: str = None,
|
||
mtype: MediaType = None, area: str = "title", season: int = None) -> List[Context]:
|
||
"""
|
||
根据TMDBID/豆瓣ID搜索资源,精确匹配,但不不过滤本地存在的资源
|
||
:param tmdbid: TMDB ID
|
||
:param doubanid: 豆瓣 ID
|
||
:param mtype: 媒体,电影 or 电视剧
|
||
:param area: 搜索范围,title or imdbid
|
||
:param season: 季数
|
||
"""
|
||
mediainfo = self.recognize_media(tmdbid=tmdbid, doubanid=doubanid, mtype=mtype)
|
||
if not mediainfo:
|
||
logger.error(f'{tmdbid} 媒体信息识别失败!')
|
||
return []
|
||
no_exists = None
|
||
if season:
|
||
no_exists = {
|
||
tmdbid or doubanid: {
|
||
season: NotExistMediaInfo(episodes=[])
|
||
}
|
||
}
|
||
results = self.process(mediainfo=mediainfo, area=area, no_exists=no_exists)
|
||
# 保存眲结果
|
||
bytes_results = pickle.dumps(results)
|
||
self.systemconfig.set(SystemConfigKey.SearchResults, bytes_results)
|
||
return results
|
||
|
||
def search_by_title(self, title: str, page: int = 0, site: int = None) -> List[TorrentInfo]:
|
||
"""
|
||
根据标题搜索资源,不识别不过滤,直接返回站点内容
|
||
:param title: 标题,为空时返回所有站点首页内容
|
||
:param page: 页码
|
||
:param site: 站点ID
|
||
"""
|
||
if title:
|
||
logger.info(f'开始搜索资源,关键词:{title} ...')
|
||
else:
|
||
logger.info(f'开始浏览资源,站点:{site} ...')
|
||
# 搜索
|
||
return self.__search_all_sites(keywords=[title], sites=[site] if site else None, page=page) or []
|
||
|
||
def last_search_results(self) -> List[Context]:
|
||
"""
|
||
获取上次搜索结果
|
||
"""
|
||
results = self.systemconfig.get(SystemConfigKey.SearchResults)
|
||
if not results:
|
||
return []
|
||
try:
|
||
return pickle.loads(results)
|
||
except Exception as e:
|
||
logger.error(f'加载搜索结果失败:{str(e)} - {traceback.format_exc()}')
|
||
return []
|
||
|
||
def process(self, mediainfo: MediaInfo,
|
||
keyword: str = None,
|
||
no_exists: Dict[int, Dict[int, NotExistMediaInfo]] = None,
|
||
sites: List[int] = None,
|
||
priority_rule: str = None,
|
||
filter_rule: Dict[str, str] = None,
|
||
area: str = "title") -> List[Context]:
|
||
"""
|
||
根据媒体信息搜索种子资源,精确匹配,应用过滤规则,同时根据no_exists过滤本地已存在的资源
|
||
:param mediainfo: 媒体信息
|
||
:param keyword: 搜索关键词
|
||
:param no_exists: 缺失的媒体信息
|
||
:param sites: 站点ID列表,为空时搜索所有站点
|
||
:param priority_rule: 优先级规则,为空时使用搜索优先级规则
|
||
:param filter_rule: 过滤规则,为空是使用默认过滤规则
|
||
:param area: 搜索范围,title or imdbid
|
||
"""
|
||
# 豆瓣标题处理
|
||
if not mediainfo.tmdb_id:
|
||
meta = MetaInfo(title=mediainfo.title)
|
||
mediainfo.title = meta.name
|
||
mediainfo.season = meta.begin_season
|
||
logger.info(f'开始搜索资源,关键词:{keyword or mediainfo.title} ...')
|
||
# 补充媒体信息
|
||
if not mediainfo.names:
|
||
mediainfo: MediaInfo = self.recognize_media(mtype=mediainfo.type,
|
||
tmdbid=mediainfo.tmdb_id,
|
||
doubanid=mediainfo.douban_id)
|
||
if not mediainfo:
|
||
logger.error(f'媒体信息识别失败!')
|
||
return []
|
||
# 缺失的季集
|
||
mediakey = mediainfo.tmdb_id or mediainfo.douban_id
|
||
if no_exists and no_exists.get(mediakey):
|
||
# 过滤剧集
|
||
season_episodes = {sea: info.episodes
|
||
for sea, info in no_exists[mediainfo.tmdb_id].items()}
|
||
elif mediainfo.season:
|
||
# 豆瓣只搜索当前季
|
||
season_episodes = {mediainfo.season: []}
|
||
else:
|
||
season_episodes = None
|
||
# 搜索关键词
|
||
if keyword:
|
||
keywords = [keyword]
|
||
else:
|
||
# 去重去空,但要保持顺序
|
||
keywords = list(dict.fromkeys([k for k in [mediainfo.title,
|
||
mediainfo.original_title,
|
||
mediainfo.en_title,
|
||
mediainfo.sg_title] if k]))
|
||
|
||
# 执行搜索
|
||
torrents: List[TorrentInfo] = self.__search_all_sites(
|
||
mediainfo=mediainfo,
|
||
keywords=keywords,
|
||
sites=sites,
|
||
area=area
|
||
)
|
||
if not torrents:
|
||
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
|
||
return []
|
||
# 开始新进度
|
||
self.progress.start(ProgressKey.Search)
|
||
# 匹配的资源
|
||
_match_torrents = []
|
||
# 总数
|
||
_total = len(torrents)
|
||
# 已处理数
|
||
_count = 0
|
||
if mediainfo:
|
||
# 英文标题应该在别名/原标题中,不需要再匹配
|
||
logger.info(f"开始匹配结果 标题:{mediainfo.title},原标题:{mediainfo.original_title},别名:{mediainfo.names}")
|
||
self.progress.update(value=0, text=f'开始匹配,总 {_total} 个资源 ...', key=ProgressKey.Search)
|
||
for torrent in torrents:
|
||
_count += 1
|
||
self.progress.update(value=(_count / _total) * 96,
|
||
text=f'正在匹配 {torrent.site_name},已完成 {_count} / {_total} ...',
|
||
key=ProgressKey.Search)
|
||
if not torrent.title:
|
||
continue
|
||
# 比对IMDBID
|
||
if torrent.imdbid \
|
||
and mediainfo.imdb_id \
|
||
and torrent.imdbid == mediainfo.imdb_id:
|
||
logger.info(f'{mediainfo.title} 通过IMDBID匹配到资源:{torrent.site_name} - {torrent.title}')
|
||
_match_torrents.append(torrent)
|
||
continue
|
||
# 识别
|
||
torrent_meta = MetaInfo(title=torrent.title, subtitle=torrent.description)
|
||
if torrent.title != torrent_meta.org_string:
|
||
logger.info(f"种子名称应用识别词后发生改变:{torrent.title} => {torrent_meta.org_string}")
|
||
# 比对词条指定的tmdbid
|
||
if torrent_meta.tmdbid or torrent_meta.doubanid:
|
||
if torrent_meta.tmdbid and torrent_meta.tmdbid == mediainfo.tmdb_id:
|
||
logger.info(f'{mediainfo.title} 通过词表指定TMDBID匹配到资源:{torrent.site_name} - {torrent.title}')
|
||
_match_torrents.append(torrent)
|
||
continue
|
||
if torrent_meta.doubanid and torrent_meta.doubanid == mediainfo.douban_id:
|
||
logger.info(f'{mediainfo.title} 通过词表指定豆瓣ID匹配到资源:{torrent.site_name} - {torrent.title}')
|
||
_match_torrents.append(torrent)
|
||
continue
|
||
|
||
# 比对种子
|
||
if self.torrenthelper.match_torrent(mediainfo=mediainfo,
|
||
torrent_meta=torrent_meta,
|
||
torrent=torrent):
|
||
# 匹配成功
|
||
_match_torrents.append(torrent)
|
||
continue
|
||
# 匹配完成
|
||
logger.info(f"匹配完成,共匹配到 {len(_match_torrents)} 个资源")
|
||
self.progress.update(value=97,
|
||
text=f'匹配完成,共匹配到 {len(_match_torrents)} 个资源',
|
||
key=ProgressKey.Search)
|
||
else:
|
||
_match_torrents = torrents
|
||
# 开始过滤
|
||
self.progress.update(value=98, text=f'开始过滤,总 {len(_match_torrents)} 个资源,请稍候...',
|
||
key=ProgressKey.Search)
|
||
# 过滤种子
|
||
if priority_rule is None:
|
||
# 取搜索优先级规则
|
||
priority_rule = self.systemconfig.get(SystemConfigKey.SearchFilterRules)
|
||
if priority_rule:
|
||
logger.info(f'开始优先级规则/剧集过滤,当前规则:{priority_rule} ...')
|
||
result: List[TorrentInfo] = self.filter_torrents(rule_string=priority_rule,
|
||
torrent_list=_match_torrents,
|
||
season_episodes=season_episodes,
|
||
mediainfo=mediainfo)
|
||
if result is not None:
|
||
_match_torrents = result
|
||
if not _match_torrents:
|
||
logger.warn(f'{keyword or mediainfo.title} 没有符合优先级规则的资源')
|
||
return []
|
||
# 使用过滤规则再次过滤
|
||
if _match_torrents:
|
||
logger.info(f'开始过滤规则过滤,当前规则:{filter_rule} ...')
|
||
_match_torrents = self.filter_torrents_by_rule(torrents=_match_torrents,
|
||
mediainfo=mediainfo,
|
||
filter_rule=filter_rule)
|
||
if not _match_torrents:
|
||
logger.warn(f'{keyword or mediainfo.title} 没有符合过滤规则的资源')
|
||
return []
|
||
# 去掉mediainfo中多余的数据
|
||
mediainfo.clear()
|
||
# 组装上下文
|
||
contexts = [Context(meta_info=MetaInfo(title=torrent.title, subtitle=torrent.description),
|
||
media_info=mediainfo,
|
||
torrent_info=torrent) for torrent in _match_torrents]
|
||
|
||
logger.info(f"过滤完成,剩余 {len(contexts)} 个资源")
|
||
self.progress.update(value=99, text=f'过滤完成,剩余 {len(contexts)} 个资源', key=ProgressKey.Search)
|
||
# 排序
|
||
self.progress.update(value=100,
|
||
text=f'正在对 {len(contexts)} 个资源进行排序,请稍候...',
|
||
key=ProgressKey.Search)
|
||
contexts = self.torrenthelper.sort_torrents(contexts)
|
||
# 结束进度
|
||
self.progress.end(ProgressKey.Search)
|
||
# 返回
|
||
return contexts
|
||
|
||
def __search_all_sites(self, keywords: List[str],
|
||
mediainfo: Optional[MediaInfo] = None,
|
||
sites: List[int] = None,
|
||
page: int = 0,
|
||
area: str = "title") -> Optional[List[TorrentInfo]]:
|
||
"""
|
||
多线程搜索多个站点
|
||
:param mediainfo: 识别的媒体信息
|
||
:param keywords: 搜索关键词列表
|
||
:param sites: 指定站点ID列表,如有则只搜索指定站点,否则搜索所有站点
|
||
:param page: 搜索页码
|
||
:param area: 搜索区域 title or imdbid
|
||
:reutrn: 资源列表
|
||
"""
|
||
# 未开启的站点不搜索
|
||
indexer_sites = []
|
||
|
||
# 配置的索引站点
|
||
if not sites:
|
||
sites = self.systemconfig.get(SystemConfigKey.IndexerSites) or []
|
||
|
||
for indexer in self.siteshelper.get_indexers():
|
||
# 检查站点索引开关
|
||
if not sites or indexer.get("id") in sites:
|
||
# 站点流控
|
||
state, msg = self.siteshelper.check(indexer.get("domain"))
|
||
if state:
|
||
logger.warn(msg)
|
||
continue
|
||
indexer_sites.append(indexer)
|
||
if not indexer_sites:
|
||
logger.warn('未开启任何有效站点,无法搜索资源')
|
||
return []
|
||
|
||
# 开始进度
|
||
self.progress.start(ProgressKey.Search)
|
||
# 开始计时
|
||
start_time = datetime.now()
|
||
# 总数
|
||
total_num = len(indexer_sites)
|
||
# 完成数
|
||
finish_count = 0
|
||
# 更新进度
|
||
self.progress.update(value=0,
|
||
text=f"开始搜索,共 {total_num} 个站点 ...",
|
||
key=ProgressKey.Search)
|
||
# 多线程
|
||
executor = ThreadPoolExecutor(max_workers=len(indexer_sites))
|
||
all_task = []
|
||
for site in indexer_sites:
|
||
if area == "imdbid":
|
||
# 搜索IMDBID
|
||
task = executor.submit(self.search_torrents, site=site,
|
||
keywords=[mediainfo.imdb_id] if mediainfo else None,
|
||
mtype=mediainfo.type if mediainfo else None,
|
||
page=page)
|
||
else:
|
||
# 搜索标题
|
||
task = executor.submit(self.search_torrents, site=site,
|
||
keywords=keywords,
|
||
mtype=mediainfo.type if mediainfo else None,
|
||
page=page)
|
||
all_task.append(task)
|
||
# 结果集
|
||
results = []
|
||
for future in as_completed(all_task):
|
||
finish_count += 1
|
||
result = future.result()
|
||
if result:
|
||
results.extend(result)
|
||
logger.info(f"站点搜索进度:{finish_count} / {total_num}")
|
||
self.progress.update(value=finish_count / total_num * 100,
|
||
text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...",
|
||
key=ProgressKey.Search)
|
||
# 计算耗时
|
||
end_time = datetime.now()
|
||
# 更新进度
|
||
self.progress.update(value=100,
|
||
text=f"站点搜索完成,有效资源数:{len(results)},总耗时 {(end_time - start_time).seconds} 秒",
|
||
key=ProgressKey.Search)
|
||
logger.info(f"站点搜索完成,有效资源数:{len(results)},总耗时 {(end_time - start_time).seconds} 秒")
|
||
# 结束进度
|
||
self.progress.end(ProgressKey.Search)
|
||
# 返回
|
||
return results
|
||
|
||
def filter_torrents_by_rule(self,
|
||
torrents: List[TorrentInfo],
|
||
mediainfo: MediaInfo,
|
||
filter_rule: Dict[str, str] = None,
|
||
) -> List[TorrentInfo]:
|
||
"""
|
||
使用过滤规则过滤种子
|
||
:param torrents: 种子列表
|
||
:param filter_rule: 过滤规则
|
||
:param mediainfo: 媒体信息
|
||
"""
|
||
|
||
if not filter_rule:
|
||
# 没有则取搜索默认过滤规则
|
||
filter_rule = self.systemconfig.get(SystemConfigKey.DefaultSearchFilterRules)
|
||
if not filter_rule:
|
||
return torrents
|
||
|
||
# 使用默认过滤规则再次过滤
|
||
return list(filter(
|
||
lambda t: self.torrenthelper.filter_torrent(
|
||
torrent_info=t,
|
||
filter_rule=filter_rule,
|
||
mediainfo=mediainfo
|
||
),
|
||
torrents
|
||
))
|
||
|
||
@eventmanager.register(EventType.SiteDeleted)
|
||
def remove_site(self, event: Event):
|
||
"""
|
||
从搜索站点中移除与已删除站点相关的设置
|
||
"""
|
||
if not event:
|
||
return
|
||
event_data = event.event_data or {}
|
||
site_id = event_data.get("site_id")
|
||
if not site_id:
|
||
return
|
||
if site_id == "*":
|
||
# 清空搜索站点
|
||
SystemConfigOper().set(SystemConfigKey.IndexerSites, [])
|
||
return
|
||
# 从选中的rss站点中移除
|
||
selected_sites = SystemConfigOper().get(SystemConfigKey.IndexerSites) or []
|
||
if site_id in selected_sites:
|
||
selected_sites.remove(site_id)
|
||
SystemConfigOper().set(SystemConfigKey.IndexerSites, selected_sites)
|