fix #694 按站点多次检索

This commit is contained in:
jxxghp 2023-09-29 08:20:55 +08:00
parent dbb3bead6b
commit 2e89eeca2c
5 changed files with 97 additions and 102 deletions

View File

@ -197,21 +197,19 @@ class ChainBase(metaclass=ABCMeta):
return self.run_module("search_medias", meta=meta) return self.run_module("search_medias", meta=meta)
def search_torrents(self, site: CommentedMap, def search_torrents(self, site: CommentedMap,
mediainfo: MediaInfo, keywords: List[str],
keyword: str = None, mtype: MediaType = None,
page: int = 0, page: int = 0) -> List[TorrentInfo]:
area: str = "title") -> List[TorrentInfo]:
""" """
搜索一个站点的种子资源 搜索一个站点的种子资源
:param site: 站点 :param site: 站点
:param mediainfo: 识别的媒体信息 :param keywords: 搜索关键词列表
:param keyword: 搜索关键词如有按关键词搜索否则按媒体信息名称搜索 :param mtype: 媒体类型
:param page: 页码 :param page: 页码
:param area: 搜索区域
:reutrn: 资源列表 :reutrn: 资源列表
""" """
return self.run_module("search_torrents", mediainfo=mediainfo, site=site, return self.run_module("search_torrents", site=site, keywords=keywords,
keyword=keyword, page=page, area=area) mtype=mtype, page=page)
def refresh_torrents(self, site: CommentedMap) -> List[TorrentInfo]: def refresh_torrents(self, site: CommentedMap) -> List[TorrentInfo]:
""" """

View File

@ -62,7 +62,7 @@ class SearchChain(ChainBase):
else: else:
logger.info(f'开始浏览资源,站点:{site} ...') logger.info(f'开始浏览资源,站点:{site} ...')
# 搜索 # 搜索
return self.__search_all_sites(keyword=title, sites=[site] if site else None, page=page) or [] return self.__search_all_sites(keywords=[title], sites=[site] if site else None, page=page) or []
def last_search_results(self) -> List[Context]: def last_search_results(self) -> List[Context]:
""" """
@ -117,16 +117,12 @@ class SearchChain(ChainBase):
else: else:
keywords = [mediainfo.title] keywords = [mediainfo.title]
# 执行搜索 # 执行搜索
torrents: List[TorrentInfo] = [] torrents: List[TorrentInfo] = self.__search_all_sites(
for keyword in keywords:
torrents = self.__search_all_sites(
mediainfo=mediainfo, mediainfo=mediainfo,
keyword=keyword, keywords=keywords,
sites=sites, sites=sites,
area=area area=area
) )
if torrents:
break
if not torrents: if not torrents:
logger.warn(f'{keyword or mediainfo.title} 未搜索到资源') logger.warn(f'{keyword or mediainfo.title} 未搜索到资源')
return [] return []
@ -241,15 +237,15 @@ class SearchChain(ChainBase):
# 返回 # 返回
return contexts return contexts
def __search_all_sites(self, mediainfo: Optional[MediaInfo] = None, def __search_all_sites(self, keywords: List[str],
keyword: str = None, mediainfo: Optional[MediaInfo] = None,
sites: List[int] = None, sites: List[int] = None,
page: int = 0, page: int = 0,
area: str = "title") -> Optional[List[TorrentInfo]]: area: str = "title") -> Optional[List[TorrentInfo]]:
""" """
多线程搜索多个站点 多线程搜索多个站点
:param mediainfo: 识别的媒体信息 :param mediainfo: 识别的媒体信息
:param keyword: 搜索关键词如有按关键词搜索否则按媒体信息名称搜索 :param keywords: 搜索关键词列表
:param sites: 指定站点ID列表如有则只搜索指定站点否则搜索所有站点 :param sites: 指定站点ID列表如有则只搜索指定站点否则搜索所有站点
:param page: 搜索页码 :param page: 搜索页码
:param area: 搜索区域 title or imdbid :param area: 搜索区域 title or imdbid
@ -291,8 +287,18 @@ class SearchChain(ChainBase):
executor = ThreadPoolExecutor(max_workers=len(indexer_sites)) executor = ThreadPoolExecutor(max_workers=len(indexer_sites))
all_task = [] all_task = []
for site in indexer_sites: for site in indexer_sites:
task = executor.submit(self.search_torrents, mediainfo=mediainfo, if area == "imdbid":
site=site, keyword=keyword, page=page, area=area) # 搜索IMDBID
task = executor.submit(self.search_torrents, site=site,
keywords=[mediainfo.imdb_id] if mediainfo else None,
mtype=mediainfo.type if mediainfo else None,
page=page)
else:
# 搜索标题
task = executor.submit(self.search_torrents, site=site,
keywords=keywords,
mtype=mediainfo.type if mediainfo else None,
page=page)
all_task.append(task) all_task.append(task)
# 结果集 # 结果集
results = [] results = []
@ -303,7 +309,7 @@ class SearchChain(ChainBase):
results.extend(result) results.extend(result)
logger.info(f"站点搜索进度:{finish_count} / {total_num}") logger.info(f"站点搜索进度:{finish_count} / {total_num}")
self.progress.update(value=finish_count / total_num * 100, self.progress.update(value=finish_count / total_num * 100,
text=f"正在搜索{keyword or ''},已完成 {finish_count} / {total_num} 个站点 ...", text=f"正在搜索{keywords or ''},已完成 {finish_count} / {total_num} 个站点 ...",
key=ProgressKey.Search) key=ProgressKey.Search)
# 计算耗时 # 计算耗时
end_time = datetime.now() end_time = datetime.now()

View File

@ -3,7 +3,7 @@ from typing import List, Optional, Tuple, Union
from ruamel.yaml import CommentedMap from ruamel.yaml import CommentedMap
from app.core.context import MediaInfo, TorrentInfo from app.core.context import TorrentInfo
from app.log import logger from app.log import logger
from app.modules import _ModuleBase from app.modules import _ModuleBase
from app.modules.indexer.mtorrent import MTorrentSpider from app.modules.indexer.mtorrent import MTorrentSpider
@ -28,46 +28,46 @@ class IndexerModule(_ModuleBase):
def init_setting(self) -> Tuple[str, Union[str, bool]]: def init_setting(self) -> Tuple[str, Union[str, bool]]:
return "INDEXER", "builtin" return "INDEXER", "builtin"
def search_torrents(self, site: CommentedMap, mediainfo: MediaInfo = None, def search_torrents(self, site: CommentedMap,
keyword: str = None, page: int = 0, area: str = "title") -> List[TorrentInfo]: keywords: List[str] = None,
mtype: MediaType = None,
page: int = 0) -> List[TorrentInfo]:
""" """
搜索一个站点 搜索一个站点
:param mediainfo: 识别的媒体信息
:param site: 站点 :param site: 站点
:param keyword: 搜索关键词如有按关键词搜索否则按媒体信息名称搜索 :param keywords: 搜索关键词列表
:param mtype: 媒体类型
:param page: 页码 :param page: 页码
:param area: 搜索区域 title or imdbid
:return: 资源列表 :return: 资源列表
""" """
# 确认搜索的名字 # 确认搜索的名字
if keyword: if not keywords:
search_word = keyword # 浏览种子页
elif mediainfo: keywords = [None]
search_word = mediainfo.title
else:
search_word = None
if search_word \
and site.get('language') == "en" \
and StringUtils.is_chinese(search_word):
# 不支持中文
logger.warn(f"{site.get('name')} 不支持中文搜索")
return []
# 去除搜索关键字中的特殊字符
if search_word:
search_word = StringUtils.clear(search_word, replace_word=" ", allow_space=True)
# 开始索引 # 开始索引
result_array = [] result_array = []
# 开始计时 # 开始计时
start_time = datetime.now() start_time = datetime.now()
# 搜索多个关键字
for search_word in keywords:
# 可能为关键字或ttxxxx
if search_word \
and site.get('language') == "en" \
and StringUtils.is_chinese(search_word):
# 不支持中文
logger.warn(f"{site.get('name')} 不支持中文搜索")
continue
# 去除搜索关键字中的特殊字符
if search_word:
search_word = StringUtils.clear(search_word, replace_word=" ", allow_space=True)
try: try:
imdbid = mediainfo.imdb_id if mediainfo and area == "imdbid" else None
if site.get('parser') == "TNodeSpider": if site.get('parser') == "TNodeSpider":
error_flag, result_array = TNodeSpider(site).search( error_flag, result_array = TNodeSpider(site).search(
keyword=search_word, keyword=search_word,
imdbid=imdbid,
page=page page=page
) )
elif site.get('parser') == "TorrentLeech": elif site.get('parser') == "TorrentLeech":
@ -78,17 +78,19 @@ class IndexerModule(_ModuleBase):
elif site.get('parser') == "mTorrent": elif site.get('parser') == "mTorrent":
error_flag, result_array = MTorrentSpider(site).search( error_flag, result_array = MTorrentSpider(site).search(
keyword=search_word, keyword=search_word,
mtype=mediainfo.type if mediainfo else None, mtype=mtype,
page=page page=page
) )
else: else:
error_flag, result_array = self.__spider_search( error_flag, result_array = self.__spider_search(
keyword=search_word, search_word=search_word,
imdbid=imdbid,
indexer=site, indexer=site,
mtype=mediainfo.type if mediainfo else None, mtype=mtype,
page=page page=page
) )
# 有结果后停止
if result_array:
break
except Exception as err: except Exception as err:
logger.error(f"{site.get('name')} 搜索出错:{err}") logger.error(f"{site.get('name')} 搜索出错:{err}")
@ -112,15 +114,13 @@ class IndexerModule(_ModuleBase):
@staticmethod @staticmethod
def __spider_search(indexer: CommentedMap, def __spider_search(indexer: CommentedMap,
keyword: str = None, search_word: str = None,
imdbid: str = None,
mtype: MediaType = None, mtype: MediaType = None,
page: int = 0) -> (bool, List[dict]): page: int = 0) -> (bool, List[dict]):
""" """
根据关键字搜索单个站点 根据关键字搜索单个站点
:param: indexer: 站点配置 :param: indexer: 站点配置
:param: keyword: 关键字 :param: search_word: 关键字
:param: imdbid: imdbid
:param: page: 页码 :param: page: 页码
:param: mtype: 媒体类型 :param: mtype: 媒体类型
:param: timeout: 超时时间 :param: timeout: 超时时间
@ -128,8 +128,7 @@ class IndexerModule(_ModuleBase):
""" """
_spider = TorrentSpider(indexer=indexer, _spider = TorrentSpider(indexer=indexer,
mtype=mtype, mtype=mtype,
keyword=keyword, keyword=search_word,
imdbid=imdbid,
page=page) page=page)
return _spider.is_error, _spider.get_torrents() return _spider.is_error, _spider.get_torrents()

View File

@ -40,8 +40,6 @@ class TorrentSpider:
referer: str = None referer: str = None
# 搜索关键字 # 搜索关键字
keyword: str = None keyword: str = None
# 搜索IMDBID
imdbid: str = None
# 媒体类型 # 媒体类型
mtype: MediaType = None mtype: MediaType = None
# 搜索路径、方式配置 # 搜索路径、方式配置
@ -68,7 +66,6 @@ class TorrentSpider:
def __init__(self, def __init__(self,
indexer: CommentedMap, indexer: CommentedMap,
keyword: [str, list] = None, keyword: [str, list] = None,
imdbid: str = None,
page: int = 0, page: int = 0,
referer: str = None, referer: str = None,
mtype: MediaType = None): mtype: MediaType = None):
@ -76,7 +73,6 @@ class TorrentSpider:
设置查询参数 设置查询参数
:param indexer: 索引器 :param indexer: 索引器
:param keyword: 搜索关键字如果数组则为批量搜索 :param keyword: 搜索关键字如果数组则为批量搜索
:param imdbid: IMDB ID
:param page: 页码 :param page: 页码
:param referer: Referer :param referer: Referer
:param mtype: 媒体类型 :param mtype: 媒体类型
@ -84,7 +80,6 @@ class TorrentSpider:
if not indexer: if not indexer:
return return
self.keyword = keyword self.keyword = keyword
self.imdbid = imdbid
self.mtype = mtype self.mtype = mtype
self.indexerid = indexer.get('id') self.indexerid = indexer.get('id')
self.indexername = indexer.get('name') self.indexername = indexer.get('name')
@ -159,20 +154,17 @@ class TorrentSpider:
# 搜索URL # 搜索URL
indexer_params = self.search.get("params") or {} indexer_params = self.search.get("params") or {}
if indexer_params: if indexer_params:
# 支持IMDBID时优先使用IMDBID搜索 search_area = indexer_params.get('search_area')
search_area = indexer_params.get("search_area") or 0 # search_area非0表示支持imdbid搜索
if self.imdbid and search_area: if (search_area and
search_word = self.imdbid (not self.keyword or not self.keyword.startswith('tt'))):
else: # 支持imdbid搜索但关键字不是imdbid时不启用imdbid搜索
search_word = self.keyword
# 不启用IMDBID搜索时需要将search_area移除
if search_area:
indexer_params.pop('search_area') indexer_params.pop('search_area')
# 变量字典 # 变量字典
inputs_dict = { inputs_dict = {
"keyword": search_word "keyword": search_word
} }
# 查询参数 # 查询参数,默认查询标题
params = { params = {
"search_mode": search_mode, "search_mode": search_mode,
"search_area": 0, "search_area": 0,

View File

@ -49,16 +49,16 @@ class TNodeSpider:
if csrf_token: if csrf_token:
self._token = csrf_token.group(1) self._token = csrf_token.group(1)
def search(self, keyword: str, imdbid: str = None, page: int = 0) -> Tuple[bool, List[dict]]: def search(self, keyword: str, page: int = 0) -> Tuple[bool, List[dict]]:
if not self._token: if not self._token:
logger.warn(f"{self._name} 未获取到token无法搜索") logger.warn(f"{self._name} 未获取到token无法搜索")
return True, [] return True, []
search_type = "imdbid" if imdbid else "title" search_type = "imdbid" if (keyword and keyword.startswith('tt')) else "title"
params = { params = {
"page": int(page) + 1, "page": int(page) + 1,
"size": self._size, "size": self._size,
"type": search_type, "type": search_type,
"keyword": imdbid or keyword or "", "keyword": keyword or "",
"sorter": "id", "sorter": "id",
"order": "desc", "order": "desc",
"tags": [], "tags": [],