# build-in lib import json import secrets import typing from pathlib import Path # third party lib import opencc from lxml import etree # project wide definitions import config from ADC_function import (translate, load_cookies, file_modification_days, delete_all_elements_in_str, delete_all_elements_in_list ) from scrapinglib.api import search def get_data_from_json( file_number: str, open_cc: opencc.OpenCC, specified_source: str, specified_url: str) -> typing.Optional[dict]: """ iterate through all services and fetch the data 从网站上查询片名解析JSON返回元数据 :param file_number: 影片名称 :param open_cc: 简繁转换器 :param specified_source: 指定的媒体数据源 :param specified_url: 指定的数据查询地址, 目前未使用 :return 给定影片名称的具体信息 """ try: actor_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_actor.xml')) info_mapping_data = etree.parse(str(Path.home() / '.local' / 'share' / 'mdc' / 'mapping_info.xml')) except: actor_mapping_data = etree.fromstring("", etree.HTMLParser()) info_mapping_data = etree.fromstring("", etree.HTMLParser()) conf = config.getInstance() # default fetch order list, from the beginning to the end sources = conf.sources() # TODO 准备参数 # - 清理 ADC_function, webcrawler proxies: dict = None config_proxy = conf.proxy() if config_proxy.enable: proxies = config_proxy.proxies() ca_cert = None if conf.cacert_file(): ca_cert = conf.cacert_file() json_data = search(file_number, sources, proxies=proxies, verify=ca_cert, morestoryline=conf.is_storyline(), specifiedSource=specified_source, specifiedUrl=specified_url, debug = conf.debug()) # Return if data not found in all sources if not json_data: print('[-]Movie Number not found!') return None # 增加number严格判断,避免提交任何number if str(json_data.get('number')).upper() != file_number.upper(): try: if json_data.get('allow_number_change'): pass except: print('[-]Movie number has changed! [{}]->[{}]'.format(file_number, str(json_data.get('number')))) return None # ================================================网站规则添加结束================================================ if json_data.get('title') == '': print('[-]Movie Number or Title not found!') return None title = json_data.get('title') actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表 actor_list = [actor.strip() for actor in actor_list] # 去除空白 director = json_data.get('director') release = json_data.get('release') number = json_data.get('number') studio = json_data.get('studio') source = json_data.get('source') runtime = json_data.get('runtime') outline = json_data.get('outline') label = json_data.get('label') series = json_data.get('series') year = json_data.get('year') if json_data.get('cover_small'): cover_small = json_data.get('cover_small') else: cover_small = '' if json_data.get('trailer'): trailer = json_data.get('trailer') else: trailer = '' if json_data.get('extrafanart'): extrafanart = json_data.get('extrafanart') else: extrafanart = '' imagecut = json_data.get('imagecut') tag = str(json_data.get('tag')).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ while 'XXXX' in tag: tag.remove('XXXX') while 'xxx' in tag: tag.remove('xxx') actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') # if imagecut == '3': # DownloadFileWithFilename() # ====================处理异常字符====================== #\/:*?"<>| actor = special_characters_replacement(actor) actor_list = [special_characters_replacement(a) for a in actor_list] title = special_characters_replacement(title) label = special_characters_replacement(label) outline = special_characters_replacement(outline) series = special_characters_replacement(series) studio = special_characters_replacement(studio) director = special_characters_replacement(director) tag = [special_characters_replacement(t) for t in tag] release = release.replace('/', '-') tmpArr = cover_small.split(',') if len(tmpArr) > 0: cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| # 处理大写 if conf.number_uppercase(): json_data['number'] = number.upper() # 返回处理后的json_data json_data['title'] = title json_data['original_title'] = title json_data['actor'] = actor json_data['release'] = release json_data['cover_small'] = cover_small json_data['tag'] = tag json_data['year'] = year json_data['actor_list'] = actor_list json_data['trailer'] = trailer json_data['extrafanart'] = extrafanart json_data['label'] = label json_data['outline'] = outline json_data['series'] = series json_data['studio'] = studio json_data['director'] = director if conf.is_translate(): translate_values = conf.translate_values().split(",") for translate_value in translate_values: if json_data[translate_value] == "": continue if conf.get_translate_engine() == "azure": t = translate( json_data[translate_value], target_language="zh-Hans", engine=conf.get_translate_engine(), key=conf.get_translate_key(), ) else: if len(json_data[translate_value]): if type(json_data[translate_value]) == str: json_data[translate_value] = special_characters_replacement(json_data[translate_value]) json_data[translate_value] = translate(json_data[translate_value]) else: for i in range(len(json_data[translate_value])): json_data[translate_value][i] = special_characters_replacement( json_data[translate_value][i]) list_in_str = ",".join(json_data[translate_value]) json_data[translate_value] = translate(list_in_str).split(',') if open_cc: cc_vars = conf.cc_convert_vars().split(",") ccm = conf.cc_convert_mode() def convert_list(mapping_data, language, vars): total = [] for i in vars: if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")) != 0: i = mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=f",{i},")[0] total.append(i) return total def convert(mapping_data, language, vars): if len(mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)) != 0: return mapping_data.xpath('a[contains(@keyword, $name)]/@' + language, name=vars)[0] else: raise IndexError('keyword not found') for cc in cc_vars: if json_data[cc] == "" or len(json_data[cc]) == 0: continue try: if ccm == 1: json_data[cc] = convert(info_mapping_data, "zh_cn", json_data[cc]) json_data[cc] = delete_all_elements_in_str("删除", json_data[cc]) elif ccm == 2: json_data[cc] = convert(info_mapping_data, "zh_tw", json_data[cc]) json_data[cc] = delete_all_elements_in_str("删除", json_data[cc]) elif ccm == 3: json_data[cc] = convert(info_mapping_data, "jp", json_data[cc]) json_data[cc] = delete_all_elements_in_str("删除", json_data[cc]) except IndexError: json_data[cc] = open_cc.convert(json_data[cc]) except: pass naming_rule = "" original_naming_rule = "" for i in conf.naming_rule().split("+"): if i not in json_data: naming_rule += i.strip("'").strip('"') original_naming_rule += i.strip("'").strip('"') else: item = json_data.get(i) naming_rule += item if type(item) is not list else "&".join(item) # PATCH:处理[title]存在翻译的情况,后续NFO文件的original_name只会直接沿用naming_rule,这导致original_name非原始名 # 理应在翻译处处理 naming_rule和original_naming_rule if i == 'title': item = json_data.get('original_title') original_naming_rule += item if type(item) is not list else "&".join(item) json_data['naming_rule'] = naming_rule json_data['original_naming_rule'] = original_naming_rule return json_data def special_characters_replacement(text) -> str: if not isinstance(text, str): return text return (text.replace('\\', '∖'). # U+2216 SET MINUS @ Basic Multilingual Plane replace('/', '∕'). # U+2215 DIVISION SLASH @ Basic Multilingual Plane replace(':', '꞉'). # U+A789 MODIFIER LETTER COLON @ Latin Extended-D replace('*', '∗'). # U+2217 ASTERISK OPERATOR @ Basic Multilingual Plane replace('?', '?'). # U+FF1F FULLWIDTH QUESTION MARK @ Basic Multilingual Plane replace('"', '"'). # U+FF02 FULLWIDTH QUOTATION MARK @ Basic Multilingual Plane replace('<', 'ᐸ'). # U+1438 CANADIAN SYLLABICS PA @ Basic Multilingual Plane replace('>', 'ᐳ'). # U+1433 CANADIAN SYLLABICS PO @ Basic Multilingual Plane replace('|', 'ǀ'). # U+01C0 LATIN LETTER DENTAL CLICK @ Basic Multilingual Plane replace('‘', '‘'). # U+02018 LEFT SINGLE QUOTATION MARK replace('’', '’'). # U+02019 RIGHT SINGLE QUOTATION MARK replace('…', '…'). replace('&', '&'). replace("&", '&') )