Upload kaggle-util.ipynb
Browse files- kaggle-util.ipynb +1 -1
kaggle-util.ipynb
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["from pathlib import Path\n","import os\n","import time\n","import re\n","import subprocess\n","import threading\n","import sys\n","import socket\n","import torch\n","from typing import List\n","import uuid"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 内置参数默认值,当上下文有参数时可覆盖默认值\n","_runing = False\n","_useFrpc = locals().get('useFrpc') or globals().get('useFrpc') or True\n","\n","_useNgrok = locals().get('useNgrok') or globals().get('useNgrok') or True\n","\n","_server_port = locals().get('server_port') or globals().get('server_port') or 7860\n"," \n","_huggingface_token = locals().get('huggingface_token') or globals().get('huggingface_token') or '{input_path}/configs/huggingface_token.txt'\n","_huggingface_token = _huggingface_token\\\n"," .replace('{sdwui}','stable-diffusion-webui')\\\n"," .replace('{wui}',\"webui\")\n","\n","show_shell_info = locals().get('hidden_console_info') or globals().get('hidden_console_info')\n","if show_shell_info is None: show_shell_info = False\n","\n","run_by_none_device = False\n","\n","_proxy_path = locals().get('proxy_path') or globals().get('proxy_path') or {}\n","\n","_config_args:dict[str, str] = locals().get('config_args') or globals().get('config_args') or {}"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","def run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True) -> str:\n"," global show_shell_info\n"," if desc is not None:\n"," print(desc)\n","\n"," run_kwargs = {\n"," \"args\": command,\n"," \"shell\": True,\n"," \"cwd\": cwd,\n"," \"env\": os.environ if custom_env is None else custom_env,\n"," \"encoding\": 'utf8',\n"," \"errors\": 'ignore',\n"," }\n","\n"," if not show_shell_info:\n"," run_kwargs[\"stdout\"] = run_kwargs[\"stderr\"] = subprocess.PIPE\n","\n"," result = subprocess.run(**run_kwargs)\n","\n"," if result.returncode != 0:\n"," error_bits = [\n"," f\"{errdesc or 'Error running command'}.\",\n"," f\"Command: {command}\",\n"," f\"Error code: {result.returncode}\",\n"," ]\n"," if result.stdout:\n"," error_bits.append(f\"stdout: {result.stdout}\")\n"," if result.stderr:\n"," error_bits.append(f\"stderr: {result.stderr}\")\n"," if try_error:\n"," print((RuntimeError(\"\\n\".join(error_bits))))\n"," else:\n"," raise RuntimeError(\"\\n\".join(error_bits))\n","\n"," if show_shell_info:\n"," print((result.stdout or \"\"))\n"," return (result.stdout or \"\")\n","\n","def mkdirs(path, exist_ok=True):\n"," if path and not Path(path).exists():\n"," os.makedirs(path,exist_ok=exist_ok)\n","\n","\n","# 检查网络\n","def check_service(host, port):\n"," try:\n"," socket.create_connection((host, port), timeout=5)\n"," return True\n"," except socket.error:\n"," return False\n","\n","\n","# 检查gpu是否存在\n","def check_gpu():\n"," if not run_by_none_device and torch.cuda.device_count() == 0:\n"," raise Exception('当前环境没有GPU')\n","\n","\n","def echoToFile(content:str,path:str):\n"," if path.find('/') >= 0:\n"," _path = '/'.join(path.split('/')[:-1])\n"," run(f'''mkdir -p {_path}''')\n"," with open(path,'w') as sh:\n"," sh.write(content)\n"," \n","def get_freefrp_confog(local_port):\n"," rd_str = uuid.uuid1()\n"," return (f'''\n","[common]\n","server_addr = frp1.freefrp.net\n","server_port = 7000\n","token = freefrp.net\n","\n","[{rd_str}_http]\n","type = http\n","local_ip = 127.0.0.1\n","local_port = {local_port}\n","custom_domains = {rd_str}.frp.eaias.com\n","''',f'http://{rd_str}.frp.eaias.com')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","_install_path = f\"{os.environ['HOME']}/sd_webui\" # 安装目录\n","_output_path = '/kaggle/working' if os.path.exists('/kaggle/working/') else f\"{os.environ['HOME']}/.sdwui/Output\" # 输出目录 如果使用google云盘 会在google云盘增加sdwebui/Output\n","_input_path = '/kaggle/input' # 输入目录\n","_ui_dir_name = 'sd_main_dir'\n","\n","_install_path = locals().get('install_path') or globals().get('install_path') or _install_path\n","_output_path = locals().get('output_path') or globals().get('output_path') or _output_path\n","_input_path = locals().get('input_path') or globals().get('input_path') or _input_path\n","_ui_dir_name = locals().get('ui_dir_name') or globals().get('ui_dir_name') or _ui_dir_name\n","\n","install_path = _install_path\n","output_path = _output_path\n","input_path = _input_path\n","ui_dir_name = _ui_dir_name\n"," \n","google_drive = '' \n","\n","\n","_useGooglrDrive = locals().get('useGooglrDrive') or globals().get('useGooglrDrive') or True\n","\n","# 连接谷歌云\n","try:\n"," if _useGooglrDrive:\n"," from google.colab import drive\n"," drive.mount(f'~/google_drive')\n"," google_drive = f\"{os.environ['HOME']}/google_drive/MyDrive\"\n"," _output_path = f'{google_drive}/sdwebui/Output'\n"," _input_path = f'{google_drive}/sdwebui/Input'\n"," run(f'''mkdir -p {_input_path}''')\n"," print('''\n","已经链接到谷歌云盘\n","已在云盘创建Input和Output目录\n"," ''')\n","except:\n"," _useGooglrDrive = False\n","\n","run(f'''mkdir -p {_install_path}''')\n","run(f'''mkdir -p {_output_path}''')\n","\n","\n","os.environ['install_path'] = _install_path\n","os.environ['output_path'] = _output_path\n","os.environ['google_drive'] = google_drive\n","os.environ['input_path'] = _input_path\n","\n","def replace_path(input_str:str):\n"," if not input_str: return ''\n"," for key in _config_args:\n"," input_str = input_str.replace(key,_config_args[key])\n"," \n"," return input_str.replace('$install_path',_install_path)\\\n"," .replace('{install_path}',_install_path)\\\n"," .replace('$input_path',_input_path)\\\n"," .replace('{input_path}',_input_path)\\\n"," .replace('$output_path',_output_path)\\\n"," .replace('{output_path}',_output_path)\\\n"," .replace('{sdwui}','stable-diffusion-webui')\\\n"," .replace('{wui}',\"webui\")\n","\n","space_string = ' \\n\\r\\t\\'\\\",'\n","\n","def config_reader(conf:str):\n"," args = [replace_path(item.split('#')[0].strip(space_string)) for item in conf.split('\\n') if item.strip(space_string)]\n"," return [item.strip() for item in args if item.strip()]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"i3LhnwYHLCtC","trusted":true},"outputs":[],"source":["ngrokTokenFile = os.path.join(_input_path,'configs/ngrok_token.txt') # 非必填 存放ngrokToken的文件的路径\n","frpcConfigFile = os.path.join(_input_path,'configs/frpc_koishi.ini') # 非必填 frp 配置文件\n","# ss证书目录 下载nginx的版本,把pem格式改成crt格式\n","frpcSSLFFlies = [os.path.join(_input_path,'configs/koishi_ssl')]\n","if 'frp_ssl_dir' in locals() or 'frp_ssl_dir' in globals():\n"," frpcSSLFFlies = frpcSSLFFlies + config_reader(locals().get('frp_ssl_dir') or globals().get('frp_ssl_dir'))\n","# frpc 文件目录 如果目录不存在,会自动下载,也可以在数据集搜索 viyiviyi/utils 添加\n","frpcExePath = os.path.join(_input_path,'utils-tools/frpc')\n","# 其他需要加载的webui启动参数 写到【参数列表】这个配置去\n","\n","# 用于使用kaggle api的token文件 参考 https://www.kaggle.com/docs/api\n","# 此文件用于自动上传koishi的相关配置 也可以用于保存重要的输出文件\n","kaggleApiTokenFile = locals().get('kaggle_api_token') or globals().get('kaggle_api_token') or os.path.join(_input_path,'configs/kaggle.json')\n","\n","requirements = []\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"a_GtG2ayLCtD","trusted":true},"outputs":[],"source":["# 这下面的是用于初始化一些值或者环境变量的,轻易别改\n","_setting_file = replace_path(locals().get('setting_file') or globals().get('setting_file') or '/kaggle/working/configs/config.json')\n","\n","_ui_config_file = replace_path(locals().get('ui_config_file') or globals().get('ui_config_file') or '/kaggle/working/configs/ui-config.json')\n","\n","# 设置文件路径\n","if Path(f\"{os.environ['HOME']}/google_drive/MyDrive\").exists():\n"," if _setting_file == '/kaggle/working/configs/config.json':\n"," _setting_file = os.path.join(_output_path,'configs/config.json')\n"," if _ui_config_file == '/kaggle/working/configs/ui-config.json':\n"," _ui_config_file = os.path.join(_output_path,'configs/ui-config.json')\n"," \n","frpcStartArg = ''\n","freefrp_url = ''\n","_frp_temp_config_file = ''\n","_frp_config_or_file = replace_path(locals().get('frp_config_or_file') or globals().get('frp_config_or_file')) or frpcConfigFile\n","run(f'''mkdir -p {_install_path}/configFiles''')\n","if _frp_config_or_file:\n"," if '[common]' in _frp_config_or_file:\n"," echoToFile(_frp_config_or_file,f'{_install_path}/configFiles/temp_frpc_webui.ini')\n"," _frp_temp_config_file = f'{_install_path}/configFiles/temp_frpc_webui.ini'\n"," elif '.ini' in _frp_config_or_file:\n"," _frp_temp_config_file = _frp_config_or_file.strip()\n"," \n"," if _frp_temp_config_file:\n"," if Path(_frp_temp_config_file).exists():\n"," run(f'''cp -f {_frp_temp_config_file} {_install_path}/configFiles/frpc_webui.ini''')\n"," run(f'''sed -i \"s/local_port = .*/local_port = {_server_port}/g\" {_install_path}/configFiles/frpc_webui.ini''')\n"," frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n"," elif _frp_config_or_file.strip().startswith('-f'):\n"," frpcStartArg = _frp_config_or_file.strip()\n"," \n","if not frpcStartArg:\n"," conf,url = get_freefrp_confog(_server_port)\n"," echoToFile(conf,f'{_install_path}/configFiles/frpc_webui.ini')\n"," freefrp_url = url\n"," frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n","\n","ngrokToken=''\n","_ngrok_config_or_file = replace_path(locals().get('ngrok_config_or_file') or globals().get('ngrok_config_or_file')) or ngrokTokenFile\n","if _ngrok_config_or_file:\n"," if Path(_ngrok_config_or_file.strip()).exists():\n"," ngrokTokenFile = _ngrok_config_or_file.strip()\n"," if Path(ngrokTokenFile).exists():\n"," with open(ngrokTokenFile,encoding = \"utf-8\") as nkfile:\n"," ngrokToken = nkfile.readline()\n"," elif not _ngrok_config_or_file.strip().startswith('/'):\n"," ngrokToken=_ngrok_config_or_file.strip()\n"," \n","if not Path(venvPath).exists():\n"," venvPath = os.path.join(_input_path,'sd-webui-venv/venv.zip')\n"," \n","huggingface_headers:dict = None "]},{"cell_type":"markdown","metadata":{},"source":["## 文件下载工具\n","\n","---\n","\n","link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',sync:bool=False,thread_num:int=None)"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import concurrent.futures\n","import importlib\n","import os\n","import pprint\n","import re\n","from pathlib import Path\n","from typing import List\n","\n","import requests\n","\n","show_shell_info = False\n","\n","def is_installed(package):\n"," try:\n"," spec = importlib.util.find_spec(package)\n"," except ModuleNotFoundError:\n"," return False\n","\n"," return spec is not None\n","\n","def download_file(url:str, filename:str, dist_path:str, cache_path = '',_link_instead_of_copy:bool=True,headers={}):\n"," # 获取文件的真实文件名\n"," if not filename:\n"," with requests.get(url, stream=True,headers=headers) as r:\n"," if 'Content-Disposition' in r.headers:\n"," filename = r.headers['Content-Disposition'].split('filename=')[1].strip('\"')\n"," r.close()\n"," if not filename and re.search(r'/[^/]+\\.[^/]+$',url):\n"," filename = url.split('/')[-1].split('?')[0]\n"," \n"," filename = re.sub(r'[\\\\/:*?\"<>|;]', '', filename)\n"," filename = re.sub(r'[\\s\\t]+', '_', filename)\n"," \n"," print(f'下载 {filename} url: {url} --> {dist_path}')\n"," \n"," # 创建目录\n"," if cache_path and not Path(cache_path).exists():\n"," os.makedirs(cache_path,exist_ok=True)\n"," if dist_path and not Path(dist_path).exists():\n"," os.makedirs(dist_path,exist_ok=True)\n"," \n"," # 拼接文件的完整路径\n"," filepath = os.path.join(dist_path, filename)\n","\n"," if cache_path:\n"," cache_path = os.path.join(cache_path, filename)\n"," \n"," # 判断文件是否已存在\n"," if Path(filepath).exists():\n"," print(f'文件 {filename} 已存在 {dist_path}')\n"," return\n"," \n"," if cache_path and Path(cache_path).exists():\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n"," print(f'文件缓存 {cache_path} --> {dist_path}')\n"," return\n"," # 下载文件\n"," with requests.get(url, stream=True, headers=headers) as r:\n"," r.raise_for_status()\n"," with open(cache_path or filepath, 'wb') as f:\n"," for chunk in r.iter_content(chunk_size=1024):\n"," if chunk:\n"," f.write(chunk)\n"," # 如果使用了缓存目录 需要复制或链接文件到目标目录\n"," if cache_path:\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n"," print(f'下载完成 {filename} --> {dist_path}')\n"," \n","def download_git(url, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n"," if not Path(dist_path).exists():\n"," os.makedirs(dist_path,exist_ok=True)\n"," if show_shell_info:\n"," print(f'git 下载 {url} --> {dist_path}')\n"," if cache_path and not Path(cache_path).exists():\n"," os.makedirs(cache_path,exist_ok=True)\n"," run(f'git clone {url}',cwd = cache_path)\n"," if cache_path:\n"," run(f'cp -n -r -f {cache_path}/* {dist_path}')\n"," else:\n"," run(f'git clone {url}',cwd = dist_path)\n"," if show_shell_info:\n"," print(f'git 下载完成 {url} --> {dist_path}')\n"," \n"," \n","def download_huggingface(url:str, filename:str, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n"," fileReg = r'^https:\\/\\/huggingface.co(\\/([^\\/]+\\/)?[^\\/]+\\/[^\\/]+\\/(resolve|blob)\\/[^\\/]+\\/|[^\\.]+\\.[^\\.]+$|download=true)'\n"," def isFile(url:str):\n"," if re.match(fileReg,url):\n"," return True\n"," return False\n"," if isFile(url):\n"," download_file(url,filename,dist_path,cache_path,_link_instead_of_copy,headers=huggingface_headers)\n"," else:\n"," download_git(url,dist_path,cache_path,_link_instead_of_copy)\n"," \n","# 加入文件到下载列表\n","def pause_url(url:str,dist_path:str):\n"," file_name = ''\n"," if re.match(r'^[^:]+:(https?|ftps?)://', url, flags=0):\n"," file_name = re.findall(r'^[^:]+:',url)[0][:-1]\n"," url = url[len(file_name)+1:]\n"," if not re.match(r'^(https?|ftps?)://',url):\n"," return\n"," file_name = re.sub(r'\\s+','_',file_name or '')\n"," path_hash = str(hash(url)).replace('-','')\n"," \n"," return {'file_name':file_name,'path_hash':path_hash,'url':url,'dist_path':dist_path}\n","\n","def download_urls(download_list:List[dict],sync:bool=False,thread_num:int=5, \n"," cache_path:str=os.path.join(os.environ['HOME'],'.cache','download_util'),\n"," _link_instead_of_copy:bool=True,is_await:bool=False):\n"," if sync:\n"," for conf in download_list:\n"," cache_dir = os.path.join(cache_path,conf['path_hash'])\n"," if conf['url'].startswith('https://github.com'):\n"," download_git(conf['url'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," continue\n"," if conf['url'].startswith('https://huggingface.co'):\n"," download_huggingface(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," continue\n"," if conf['url'].startswith('https://civitai.com'):\n"," if not re.search(r'token=.+', conf['url']):\n"," if conf['url'].find('?') == -1:\n"," conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n"," else:\n"," conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n"," download_file(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," else:\n"," executor = concurrent.futures.ThreadPoolExecutor(max_workers=thread_num)\n"," futures = []\n"," for conf in download_list:\n"," cache_dir = os.path.join(cache_path,conf['path_hash'])\n"," if conf['url'].startswith('https://github.com'):\n"," futures.append(executor.submit(download_git, conf['url'],conf['dist_path'],\n"," cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," continue\n"," if conf['url'].startswith('https://huggingface.co'):\n"," futures.append(executor.submit(download_huggingface,conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," continue\n"," if conf['url'].startswith('https://civitai.com'):\n"," if not re.search(r'token=.+', conf['url']):\n"," if conf['url'].find('?') == -1:\n"," conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n"," else:\n"," conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n"," futures.append(executor.submit(download_file, conf['url'],conf['file_name'],conf['dist_path'],\n"," cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," if is_await:\n"," concurrent.futures.wait(futures)\n"," \n"," \n","def parse_config(config:str):\n"," space_string = ' \\n\\r\\t\\'\\\",'\n"," other_flie_list = [item.split('#')[0].strip(space_string) for item in config.split('\\n') if item.strip(space_string)]\n"," other_flie_list = [item.strip() for item in other_flie_list if item.strip()]\n"," other_flie_list_store = {}\n"," other_flie_list_store_name='default'\n"," other_flie_list_store_list_cache=[]\n"," \n"," for item in other_flie_list:\n"," if item.startswith('[') and item.endswith(']'):\n"," if not other_flie_list_store_name == 'default':\n"," other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n"," other_flie_list_store_list_cache = []\n"," other_flie_list_store_name = item[1:-1]\n"," else:\n"," other_flie_list_store_list_cache.append(item)\n"," other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n"," \n"," return other_flie_list_store\n","\n","\n","def link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',\n"," sync:bool=False,thread_num:int=None, is_await:bool=False):\n"," store:dict[str,List[str]] = parse_config(config)\n"," download_list = []\n"," for dist_dir in store.keys():\n"," dist_path = os.path.join(base_path,dist_dir)\n"," os.makedirs(dist_path,exist_ok=True)\n"," for path in store[dist_dir]:\n"," if 'https://' in path or 'http://' in path:\n"," if skip_url:\n"," continue\n"," if sync:\n"," download_urls([pause_url(path,dist_path)],_link_instead_of_copy = _link_instead_of_copy, sync=sync)\n"," continue\n"," download_list.append(pause_url(path,dist_path))\n"," else:\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {path} {dist_path}')\n"," if show_shell_info:\n"," print(f'{\"链接\" if _link_instead_of_copy else \"复制\"} {path} --> {dist_path}')\n"," run(f'rm -f {dist_path}/\\*.* ')\n"," if not skip_url:\n"," if show_shell_info:\n"," pprint.pprint(download_list)\n"," download_urls(download_list,_link_instead_of_copy = _link_instead_of_copy, sync=sync, thread_num=thread_num or 3,is_await=is_await)"]},{"cell_type":"markdown","metadata":{"id":"p0uS-BLULCtD"},"source":["## kaggle public API\n","\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"m8FJi4j0LCtD","trusted":true},"outputs":[],"source":["# 安装kaggle的api token文件\n","def initKaggleConfig():\n"," if Path('~/.kaggle/kaggle.json').exists():\n"," return True\n"," if Path(kaggleApiTokenFile).exists():\n"," run(f'''mkdir -p ~/.kaggle/''')\n"," run('cp '+kaggleApiTokenFile+' ~/.kaggle/kaggle.json')\n"," run(f'''chmod 600 ~/.kaggle/kaggle.json''')\n"," return True\n"," print('缺少kaggle的apiToken文件,访问:https://www.kaggle.com/你的kaggle用户名/account 获取')\n"," return False\n","\n","def getUserName():\n"," if not initKaggleConfig(): return\n"," import kaggle\n"," return kaggle.KaggleApi().read_config_file()['username']\n","\n","def createOrUpdateDataSet(path:str,datasetName:str):\n"," if not initKaggleConfig(): return\n"," print('创建或更新数据集 '+datasetName)\n"," import kaggle\n"," run(f'mkdir -p {_install_path}/kaggle_cache')\n"," run(f'rm -rf {_install_path}/kaggle_cache/*')\n"," datasetDirPath = _install_path+'/kaggle_cache/'+datasetName\n"," run('mkdir -p '+datasetDirPath)\n"," run('cp -f '+path+' '+datasetDirPath+'/')\n"," username = getUserName()\n"," print(\"kaggle username:\"+username)\n"," datasetPath = username+'/'+datasetName\n"," datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n"," print(datasetList)\n"," if len(datasetList) == 0 or datasetPath not in [str(d) for d in datasetList]: # 创建 create\n"," run('kaggle datasets init -p' + datasetDirPath)\n"," metadataFile = datasetDirPath+'/dataset-metadata.json'\n"," run('sed -i s/INSERT_TITLE_HERE/'+ datasetName + '/g ' + metadataFile)\n"," run('sed -i s/INSERT_SLUG_HERE/'+ datasetName + '/g ' + metadataFile)\n"," run('cat '+metadataFile)\n"," run('kaggle datasets create -p '+datasetDirPath)\n"," print('create database done')\n"," else:\n"," kaggle.api.dataset_metadata(datasetPath,datasetDirPath)\n"," kaggle.api.dataset_create_version(datasetDirPath, 'auto update',dir_mode='zip')\n"," print('upload database done')\n","\n","def downloadDatasetFiles(datasetName:str,outputPath:str):\n"," if not initKaggleConfig(): return\n"," print('下载数据集文件 '+datasetName)\n"," import kaggle\n"," username = getUserName()\n"," datasetPath = username+'/'+datasetName\n"," datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n"," if datasetPath not in [str(d) for d in datasetList]:\n"," return False\n"," run('mkdir -p '+outputPath)\n"," kaggle.api.dataset_download_files(datasetPath,path=outputPath,unzip=True)\n"," return True\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["## 同步文件夹到 huggingface\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 文件夹与 huggingface 同步\n","if _huggingface_token:\n"," if not is_installed('watchdog'):\n"," requirements.append('watchdog')\n"," if not is_installed('huggingface_hub'):\n"," requirements.append('huggingface_hub')\n"," else:\n"," try:\n"," from huggingface_hub import HfApi,login,snapshot_download\n"," except:\n"," requirements.append('huggingface_hub')\n","\n","huggingface_is_init = False\n","\n","def init_huggingface():\n"," if not _huggingface_token:\n"," return False\n","\n"," global huggingface_headers\n"," global huggingface_is_init\n"," \n"," from huggingface_hub import login\n"," token = replace_path(_huggingface_token)\n"," if not _huggingface_token.startswith('hf_') and Path(token).exists():\n"," with open(token,encoding = \"utf-8\") as nkfile:\n"," token = nkfile.readline()\n"," if not token.startswith('hf_'):\n"," print('huggingface token 不正确,请将 token 或 仅存放token 的txt文件路径填入 _huggingface_token 配置')\n"," return False\n"," login(token,add_to_git_credential=True)\n"," huggingface_headers = {'Authorization': 'Bearer '+token}\n"," print('huggingface token 已经加载,可以下载私有仓库或文件')\n"," \n"," huggingface_is_init = True\n"," return True\n","\n","\n","def start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg']):\n"," if not huggingface_is_init:\n"," print('huggingface 相关功能未初始化 请调用 init_huggingface() 初始化')\n"," \n"," if not directory_to_watch:\n"," print('请指定需要同步的本地目录 directory_to_watch')\n"," return\n"," if not Path(directory_to_watch).exists():\n"," run(f'mkdir -p {directory_to_watch}')\n"," from watchdog.observers import Observer\n"," from watchdog.events import FileSystemEventHandler\n"," from huggingface_hub import HfApi,login,snapshot_download\n"," \n"," # 配置监视的目录和 Hugging Face 仓库信息\n"," class FileChangeHandler(FileSystemEventHandler):\n"," def __init__(self, api, repo_id, repo_type,directory_to_watch):\n"," self.api = api\n"," self.repo_id = repo_id\n"," self.repo_type = repo_type\n"," self.directory_to_watch = directory_to_watch\n"," def on_created(self, event):\n"," if not event.is_directory:\n"," # 上传新文件到 Hugging Face 仓库\n"," file_path = event.src_path\n"," file_name:str = os.path.basename(file_path)\n"," print(file_name)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," print(file_name,'>>','huggingface')\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_deleted(self, event):\n"," if not event.is_directory:\n"," # 从 Hugging Face 仓库删除文件\n"," file_path = event.src_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," try:\n"," self.api.delete_file(\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_modified(self, event):\n"," if not event.is_directory:\n"," # 更新 Hugging Face 仓库中的文件\n"," file_path = event.src_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in ['.png','.jpg','.txt','.webp','.jpeg']: return\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_moved(self, event):\n"," if not event.is_directory:\n"," file_path = event.dest_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," if event.dest_path.startswith(self.directory_to_watch):\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," api = HfApi()\n"," \n"," # 创建观察者对象并注册文件变化处理程序\n"," event_handler = FileChangeHandler(api,repo_id,repo_type,directory_to_watch)\n"," observer = Observer()\n"," observer.schedule(event_handler, directory_to_watch, recursive=True)\n","\n"," # 启动观察者\n"," observer.name = \"solo_directory_to_watch\"\n"," print(f'��动目录同步,{directory_to_watch} 将自动同步到 huggingface {repo_type} : {repo_id}')\n"," observer.start()"]},{"cell_type":"markdown","metadata":{"id":"sswa04veLCtE"},"source":["## 工具函数\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["\n","def zipPath(path:str,zipName:str,format='tar'):\n"," if path.startswith('$install_path'):\n"," path = path.replace('$install_path',_install_path)\n"," if path.startswith('$output_path'):\n"," path = path.replace('$install_path',_output_path)\n"," if not path.startswith('/'):\n"," path = f'{_install_path}/{_ui_dir_name}/{path}'\n"," if Path(path).exists():\n"," if 'tar' == format:\n"," run(f'tar -cf {_output_path}/'+ zipName +'.tar -C '+ path +' . ')\n"," elif 'gz' == format:\n"," run(f'tar -czf {_output_path}/'+ zipName +'.tar.gz -C '+ path +' . ')\n"," return\n"," print('指定的目录不存在:'+path)\n"]},{"cell_type":"markdown","metadata":{},"source":["## 内网穿透\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"id":"coqQvTSLLCtE","trusted":true},"outputs":[],"source":["def printUrl(url,name=''):\n"," print(f'{name} 访问地址:{url}')\n"," for key in sorted(_proxy_path.keys(), key=len)[::-1]:\n"," print(f'{name} 本地服务:{_proxy_path[key]} 访问地址:{url}{key}')\n","# ngrok\n","def startNgrok(ngrokToken:str,ngrokLocalPort:int):\n"," if not is_installed('pyngrok'):\n"," run('pip install pyngrok')\n"," from pyngrok import conf, ngrok\n"," try:\n"," conf.get_default().auth_token = ngrokToken\n"," conf.get_default().monitor_thread = False\n"," ssh_tunnels = ngrok.get_tunnels(conf.get_default())\n"," url = ''\n"," if len(ssh_tunnels) == 0:\n"," ssh_tunnel = ngrok.connect(ngrokLocalPort)\n"," url = ssh_tunnel.public_url\n"," print('ngrok 访问地址:'+ssh_tunnel.public_url)\n"," else:\n"," print('ngrok 访问地址:'+ssh_tunnels[0].public_url)\n"," url = ssh_tunnels[0].public_url\n"," printUrl(url,'ngrok')\n"," def auto_request_ngrok():\n"," if url:\n"," while(_runing):\n"," time.sleep(60*1)\n"," try:\n"," res = requests.get(url+'/',headers={\"ngrok-skip-browser-warning\" : \"1\"},timeout=10)\n"," except:\n"," ''\n"," # print('自动调用ngrok链接以保存链接不会断开',res.status_code)\n","\n"," # threading.Thread(target = auto_request_ngrok,daemon=True,name='solo_auto_request_ngrok').start()\n"," except:\n"," print('启动ngrok出错')\n"," \n","def startFrpc(name,configFile):\n"," if not Path(f'{_install_path}/frpc/frpc').exists():\n"," installFrpExe()\n"," if freefrp_url:\n"," printUrl(freefrp_url,'freefrp')\n"," echoToFile(f'''\n","cd {_install_path}/frpc/\n","{_install_path}/frpc/frpc {configFile}\n","''',f'{_install_path}/frpc/start.sh')\n"," get_ipython().system(f'''bash {_install_path}/frpc/start.sh''')\n"," \n","def installFrpExe():\n"," if _useFrpc:\n"," print('安装frpc')\n"," run(f'mkdir -p {_install_path}/frpc')\n"," if Path(frpcExePath).exists():\n"," run(f'cp -f -n {frpcExePath} {_install_path}/frpc/frpc')\n"," else:\n"," run(f'wget \"https://huggingface.co/datasets/ACCA225/Frp/resolve/main/frpc\" -O {_install_path}/frpc/frpc')\n"," \n"," for ssl in frpcSSLFFlies:\n"," if Path(ssl).exists():\n"," run(f'cp -f -n {ssl}/* {_install_path}/frpc/')\n"," run(f'chmod +x {_install_path}/frpc/frpc')\n"," run(f'{_install_path}/frpc/frpc -v')\n","\n","def startProxy():\n"," if _useNgrok:\n"," startNgrok(ngrokToken,_server_port)\n"," if _useFrpc:\n"," startFrpc('frpc_proxy',frpcStartArg)"]},{"cell_type":"markdown","metadata":{},"source":["## NGINX 反向代理\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"trusted":true},"outputs":[],"source":["\n","# nginx 反向代理配置文件\n","def localProxy():\n"," def getProxyLocation(subPath:str, localServer:str):\n"," return '''\n"," location '''+ subPath +'''\n"," {\n"," proxy_pass '''+ localServer +''';\n"," proxy_set_header Host $host;\n"," proxy_set_header X-Real-IP $remote_addr;\n"," proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n"," proxy_set_header REMOTE-HOST $remote_addr;\n"," proxy_set_header Upgrade $http_upgrade;\n"," proxy_set_header Connection upgrade;\n"," proxy_http_version 1.1;\n"," proxy_connect_timeout 10m;\n"," proxy_read_timeout 10m;\n"," }\n"," \n"," '''\n"," \n"," conf = '''\n","server\n","{\n"," listen '''+str(_server_port)+''';\n"," listen [::]:'''+str(_server_port)+''';\n"," server_name 127.0.0.1 localhost 0.0.0.0 \"\";\n"," \n"," if ($request_method = OPTIONS) {\n"," return 200;\n"," }\n"," fastcgi_send_timeout 10m;\n"," fastcgi_read_timeout 10m;\n"," fastcgi_connect_timeout 10m;\n"," \n"," '''+ ''.join([getProxyLocation(key,_proxy_path[key]) for key in sorted(_proxy_path.keys(), key=len)[::-1]]) +'''\n","}\n","'''\n"," echoToFile(conf,'/etc/nginx/conf.d/proxy_nginx.conf')\n"," if not check_service('localhost',_server_port):\n"," run(f'''nginx -c /etc/nginx/nginx.conf''')\n"," run(f'''nginx -s reload''')"]},{"cell_type":"markdown","metadata":{},"source":["## 线程清理工具\n","\n","---\n","\n","清理线程名以 solo_ 开头的所有线程"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["import inspect\n","import ctypes\n","\n","def _async_raise(tid, exctype):\n"," \"\"\"raises the exception, performs cleanup if needed\"\"\"\n"," tid = ctypes.c_long(tid)\n"," if not inspect.isclass(exctype):\n"," exctype = type(exctype)\n"," res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))\n"," if res == 0:\n"," raise ValueError(\"invalid thread id\")\n"," elif res != 1:\n"," # \"\"\"if it returns a number greater than one, you're in trouble,\n"," # and you should call it again with exc=NULL to revert the effect\"\"\"\n"," ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)\n"," raise SystemError(\"PyThreadState_SetAsyncExc failed\")\n","\n","def stop_thread(thread):\n"," _async_raise(thread.ident, SystemExit)\n","\n","def stop_solo_threads():\n"," global _runing\n"," _runing = False\n"," # 获取当前所有活动的线程\n"," threads = threading.enumerate()\n"," # 关闭之前创建的子线程\n"," for thread in threads:\n"," if thread.name.startswith('solo_'):\n"," print(f'结束线程:{thread.name}')\n"," try:\n"," stop_thread(thread)\n"," except socket.error:\n"," print(f'结束线程:{thread.name} 执行失败')\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["docs = '''\n","# 配置项\n","server_port int 本地服务端口,穿透使用的端口\n","huggingface_token string huggingface token,用于同步文件或者下载需要登录的文件\n","hidden_console_info bool 是否隐藏控制台信息\n","proxy_path {string:string} 将哪个路径映射到哪个服务,格式为:{'/':'http://127.0.0.1:5000/'}\n","kaggle_api_token string kaggle api token,用于访问kaggle的api\n","ngrok_config_or_file ngrok的token文件内容或者放token的文件的路径\n","frp_config_or_file frp的配置内容或者配置文件的路径\n","frp_ssl_dir frp的https证书存放目录\n","# 工具函数\n","执行cmd命令:\n","run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True)\n","检查网络是否可以访问\n","check_service(host, port)\n","检查gpu是否存在\n","check_gpu()\n","写入文本到文件\n","echoToFile(content:str,path:str)\n","获取一个免费frp配置\n","get_freefrp_confog(local_port)\n","\n","下载文件\n","link_or_download_flie(config:str, \n","skip_url:bool=False, \n","_link_instead_of_copy:bool=True, \n","base_path:str = '',\n","sync:bool=False, # 同步且按顺序下载\n","thread_num:int=None, \n","is_await:bool=False) # 异步不按顺序但等待下载完成\n","---- 下载文件功能的配置的格式\n","[目标目录] # 中括号必须写\n","下载链接1\n","名称:下载链接2 # 链接前面的名称可以用于对文件重命名\n","# 备注 井号后面的内容会被忽略\n","可以下载git仓库\n","可以下载huggingfacec仓库或者文件,会使用 huggingface_token 作为凭证下载\n","可以下载其他可以直接下载的互联网资源\n","可以作为文件链接功能,将某个目录或目录下的全部文件链接到目标目录\n","---- 下载文件功能结束结束\n","\n","创建kaggle数据集\n","createOrUpdateDataSet(path:str,datasetName:str)\n","下载kaggle数据集\n","downloadDatasetFiles(datasetName:str,outputPath:str)\n","同步目录到huggingface,可指定同步的文件类型\n","start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg'])\n","压缩指定目录到指定位置\n","zipPath(path:str,zipName:str,format='tar') # 可用tar或gz\n","停止后台线程 # 仅停止线程名称前缀为 solo_ 的线程\n","stop_solo_threads()\n","'''\n","def get_docs():\n"," print(docs)\n"," \n","print('可以执行get_docs()查看文档')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["if not (True if os.getenv('IS_INSTALL_NGINX','False') == 'True' else False):\n"," run('git lfs install')\n"," run('git config --global credential.helper store')\n"," run('sudo apt update -y')\n"," run('sudo apt install nginx -y')\n"," os.environ['IS_INSTALL_NGINX'] = 'True'\n","stop_solo_threads()\n","time.sleep(2)\n","threading.Thread(target = startProxy, daemon=True, name='solo_startProxy').start()\n","localProxy()\n","init_huggingface()"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"datasetId":2716934,"sourceId":6167400,"sourceType":"datasetVersion"},{"datasetId":3654544,"sourceId":6346544,"sourceType":"datasetVersion"},{"datasetId":2962375,"sourceId":6720235,"sourceType":"datasetVersion"},{"datasetId":3074484,"sourceId":6817788,"sourceType":"datasetVersion"}],"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}
|
|
|
1 |
+
{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["from pathlib import Path\n","import os\n","import time\n","import re\n","import subprocess\n","import threading\n","import sys\n","import socket\n","import torch\n","from typing import List\n","import uuid"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 内置参数默认值,当上下文有参数时可覆盖默认值\n","_runing = False\n","_useFrpc = locals().get('useFrpc') or globals().get('useFrpc') or True\n","\n","_useNgrok = locals().get('useNgrok') or globals().get('useNgrok') or True\n","\n","_server_port = locals().get('server_port') or globals().get('server_port') or 7860\n"," \n","_huggingface_token = locals().get('huggingface_token') or globals().get('huggingface_token') or '{input_path}/configs/huggingface_token.txt'\n","_huggingface_token = _huggingface_token\\\n"," .replace('{sdwui}','stable-diffusion-webui')\\\n"," .replace('{wui}',\"webui\")\n","\n","show_shell_info = locals().get('hidden_console_info') or globals().get('hidden_console_info')\n","if show_shell_info is None: show_shell_info = False\n","\n","run_by_none_device = False\n","\n","_proxy_path = locals().get('proxy_path') or globals().get('proxy_path') or {}\n","\n","_config_args:dict[str, str] = locals().get('config_args') or globals().get('config_args') or {}"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","def run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True) -> str:\n"," global show_shell_info\n"," if desc is not None:\n"," print(desc)\n","\n"," run_kwargs = {\n"," \"args\": command,\n"," \"shell\": True,\n"," \"cwd\": cwd,\n"," \"env\": os.environ if custom_env is None else custom_env,\n"," \"encoding\": 'utf8',\n"," \"errors\": 'ignore',\n"," }\n","\n"," if not show_shell_info:\n"," run_kwargs[\"stdout\"] = run_kwargs[\"stderr\"] = subprocess.PIPE\n","\n"," result = subprocess.run(**run_kwargs)\n","\n"," if result.returncode != 0:\n"," error_bits = [\n"," f\"{errdesc or 'Error running command'}.\",\n"," f\"Command: {command}\",\n"," f\"Error code: {result.returncode}\",\n"," ]\n"," if result.stdout:\n"," error_bits.append(f\"stdout: {result.stdout}\")\n"," if result.stderr:\n"," error_bits.append(f\"stderr: {result.stderr}\")\n"," if try_error:\n"," print((RuntimeError(\"\\n\".join(error_bits))))\n"," else:\n"," raise RuntimeError(\"\\n\".join(error_bits))\n","\n"," if show_shell_info:\n"," print((result.stdout or \"\"))\n"," return (result.stdout or \"\")\n","\n","def mkdirs(path, exist_ok=True):\n"," if path and not Path(path).exists():\n"," os.makedirs(path,exist_ok=exist_ok)\n","\n","\n","# 检查网络\n","def check_service(host, port):\n"," try:\n"," socket.create_connection((host, port), timeout=5)\n"," return True\n"," except socket.error:\n"," return False\n","\n","\n","# 检查gpu是否存在\n","def check_gpu():\n"," if not run_by_none_device and torch.cuda.device_count() == 0:\n"," raise Exception('当前环境没有GPU')\n","\n","\n","def echoToFile(content:str,path:str):\n"," if path.find('/') >= 0:\n"," _path = '/'.join(path.split('/')[:-1])\n"," run(f'''mkdir -p {_path}''')\n"," with open(path,'w') as sh:\n"," sh.write(content)\n"," \n","def get_freefrp_confog(local_port):\n"," rd_str = uuid.uuid1()\n"," return (f'''\n","[common]\n","server_addr = frp1.freefrp.net\n","server_port = 7000\n","token = freefrp.net\n","\n","[{rd_str}_http]\n","type = http\n","local_ip = 127.0.0.1\n","local_port = {local_port}\n","custom_domains = {rd_str}.frp.eaias.com\n","''',f'http://{rd_str}.frp.eaias.com')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","_install_path = f\"{os.environ['HOME']}/sd_webui\" # 安装目录\n","_output_path = '/kaggle/working' if os.path.exists('/kaggle/working/') else f\"{os.environ['HOME']}/.sdwui/Output\" # 输出目录 如果使用google云盘 会在google云盘增加sdwebui/Output\n","_input_path = '/kaggle/input' # 输入目录\n","_ui_dir_name = 'sd_main_dir'\n","\n","_install_path = locals().get('install_path') or globals().get('install_path') or _install_path\n","_output_path = locals().get('output_path') or globals().get('output_path') or _output_path\n","_input_path = locals().get('input_path') or globals().get('input_path') or _input_path\n","_ui_dir_name = locals().get('ui_dir_name') or globals().get('ui_dir_name') or _ui_dir_name\n","\n","install_path = _install_path\n","output_path = _output_path\n","input_path = _input_path\n","ui_dir_name = _ui_dir_name\n"," \n","google_drive = '' \n","\n","\n","_useGooglrDrive = locals().get('useGooglrDrive') or globals().get('useGooglrDrive') or True\n","\n","# 连接谷歌云\n","try:\n"," if _useGooglrDrive:\n"," from google.colab import drive\n"," drive.mount(f'~/google_drive')\n"," google_drive = f\"{os.environ['HOME']}/google_drive/MyDrive\"\n"," _output_path = f'{google_drive}/sdwebui/Output'\n"," _input_path = f'{google_drive}/sdwebui/Input'\n"," run(f'''mkdir -p {_input_path}''')\n"," print('''\n","已经链接到谷歌云盘\n","已在云盘创建Input和Output目录\n"," ''')\n","except:\n"," _useGooglrDrive = False\n","\n","run(f'''mkdir -p {_install_path}''')\n","run(f'''mkdir -p {_output_path}''')\n","\n","\n","os.environ['install_path'] = _install_path\n","os.environ['output_path'] = _output_path\n","os.environ['google_drive'] = google_drive\n","os.environ['input_path'] = _input_path\n","\n","def replace_path(input_str:str):\n"," if not input_str: return ''\n"," for key in _config_args:\n"," input_str = input_str.replace(key,_config_args[key])\n"," \n"," return input_str.replace('$install_path',_install_path)\\\n"," .replace('{install_path}',_install_path)\\\n"," .replace('$input_path',_input_path)\\\n"," .replace('{input_path}',_input_path)\\\n"," .replace('$output_path',_output_path)\\\n"," .replace('{output_path}',_output_path)\\\n"," .replace('{sdwui}','stable-diffusion-webui')\\\n"," .replace('{wui}',\"webui\")\n","\n","space_string = ' \\n\\r\\t\\'\\\",'\n","\n","def config_reader(conf:str):\n"," args = [replace_path(item.split('#')[0].strip(space_string)) for item in conf.split('\\n') if item.strip(space_string)]\n"," return [item.strip() for item in args if item.strip()]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"i3LhnwYHLCtC","trusted":true},"outputs":[],"source":["ngrokTokenFile = os.path.join(_input_path,'configs/ngrok_token.txt') # 非必填 存放ngrokToken的文件的路径\n","frpcConfigFile = os.path.join(_input_path,'configs/frpc_koishi.ini') # 非必填 frp 配置文件\n","# ss证书目录 下载nginx的版本,把pem格式改成crt格式\n","frpcSSLFFlies = [os.path.join(_input_path,'configs/koishi_ssl')]\n","if 'frp_ssl_dir' in locals() or 'frp_ssl_dir' in globals():\n"," frpcSSLFFlies = frpcSSLFFlies + config_reader(locals().get('frp_ssl_dir') or globals().get('frp_ssl_dir'))\n","# frpc 文件目录 如果目录不存在,会自动下载,也可以在数据集搜索 viyiviyi/utils 添加\n","frpcExePath = os.path.join(_input_path,'utils-tools/frpc')\n","# 其他需要加载的webui启动参数 写到【参数列表】这个配置去\n","\n","# 用于使用kaggle api的token文件 参考 https://www.kaggle.com/docs/api\n","# 此文件用于自动上传koishi的相关配置 也可以用于保存重要的输出文件\n","kaggleApiTokenFile = locals().get('kaggle_api_token') or globals().get('kaggle_api_token') or os.path.join(_input_path,'configs/kaggle.json')\n","\n","requirements = []\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"a_GtG2ayLCtD","trusted":true},"outputs":[],"source":["# 这下面的是用于初始化一些值或者环境变量的,轻易别改\n","_setting_file = replace_path(locals().get('setting_file') or globals().get('setting_file') or '/kaggle/working/configs/config.json')\n","\n","_ui_config_file = replace_path(locals().get('ui_config_file') or globals().get('ui_config_file') or '/kaggle/working/configs/ui-config.json')\n","\n","# 设置文件路径\n","if Path(f\"{os.environ['HOME']}/google_drive/MyDrive\").exists():\n"," if _setting_file == '/kaggle/working/configs/config.json':\n"," _setting_file = os.path.join(_output_path,'configs/config.json')\n"," if _ui_config_file == '/kaggle/working/configs/ui-config.json':\n"," _ui_config_file = os.path.join(_output_path,'configs/ui-config.json')\n"," \n","frpcStartArg = ''\n","freefrp_url = ''\n","_frp_temp_config_file = ''\n","_frp_config_or_file = replace_path(locals().get('frp_config_or_file') or globals().get('frp_config_or_file')) or frpcConfigFile\n","run(f'''mkdir -p {_install_path}/configFiles''')\n","if _frp_config_or_file:\n"," if '[common]' in _frp_config_or_file:\n"," echoToFile(_frp_config_or_file,f'{_install_path}/configFiles/temp_frpc_webui.ini')\n"," _frp_temp_config_file = f'{_install_path}/configFiles/temp_frpc_webui.ini'\n"," elif '.ini' in _frp_config_or_file:\n"," _frp_temp_config_file = _frp_config_or_file.strip()\n"," \n"," if _frp_temp_config_file:\n"," if Path(_frp_temp_config_file).exists():\n"," run(f'''cp -f {_frp_temp_config_file} {_install_path}/configFiles/frpc_webui.ini''')\n"," run(f'''sed -i \"s/local_port = .*/local_port = {_server_port}/g\" {_install_path}/configFiles/frpc_webui.ini''')\n"," frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n"," elif _frp_config_or_file.strip().startswith('-f'):\n"," frpcStartArg = _frp_config_or_file.strip()\n"," \n","if not frpcStartArg:\n"," conf,url = get_freefrp_confog(_server_port)\n"," echoToFile(conf,f'{_install_path}/configFiles/frpc_webui.ini')\n"," freefrp_url = url\n"," frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n","\n","ngrokToken=''\n","_ngrok_config_or_file = replace_path(locals().get('ngrok_config_or_file') or globals().get('ngrok_config_or_file')) or ngrokTokenFile\n","if _ngrok_config_or_file:\n"," if Path(_ngrok_config_or_file.strip()).exists():\n"," ngrokTokenFile = _ngrok_config_or_file.strip()\n"," if Path(ngrokTokenFile).exists():\n"," with open(ngrokTokenFile,encoding = \"utf-8\") as nkfile:\n"," ngrokToken = nkfile.readline()\n"," elif not _ngrok_config_or_file.strip().startswith('/'):\n"," ngrokToken=_ngrok_config_or_file.strip()\n"," \n","huggingface_headers:dict = None "]},{"cell_type":"markdown","metadata":{},"source":["## 文件下载工具\n","\n","---\n","\n","link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',sync:bool=False,thread_num:int=None)"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import concurrent.futures\n","import importlib\n","import os\n","import pprint\n","import re\n","from pathlib import Path\n","from typing import List\n","\n","import requests\n","\n","show_shell_info = False\n","\n","def is_installed(package):\n"," try:\n"," spec = importlib.util.find_spec(package)\n"," except ModuleNotFoundError:\n"," return False\n","\n"," return spec is not None\n","\n","def download_file(url:str, filename:str, dist_path:str, cache_path = '',_link_instead_of_copy:bool=True,headers={}):\n"," # 获取文件的真实文件名\n"," if not filename:\n"," with requests.get(url, stream=True,headers=headers) as r:\n"," if 'Content-Disposition' in r.headers:\n"," filename = r.headers['Content-Disposition'].split('filename=')[1].strip('\"')\n"," r.close()\n"," if not filename and re.search(r'/[^/]+\\.[^/]+$',url):\n"," filename = url.split('/')[-1].split('?')[0]\n"," \n"," filename = re.sub(r'[\\\\/:*?\"<>|;]', '', filename)\n"," filename = re.sub(r'[\\s\\t]+', '_', filename)\n"," \n"," print(f'下载 {filename} url: {url} --> {dist_path}')\n"," \n"," # 创建目录\n"," if cache_path and not Path(cache_path).exists():\n"," os.makedirs(cache_path,exist_ok=True)\n"," if dist_path and not Path(dist_path).exists():\n"," os.makedirs(dist_path,exist_ok=True)\n"," \n"," # 拼接文件的完整路径\n"," filepath = os.path.join(dist_path, filename)\n","\n"," if cache_path:\n"," cache_path = os.path.join(cache_path, filename)\n"," \n"," # 判断文件是否已存在\n"," if Path(filepath).exists():\n"," print(f'文件 {filename} 已存在 {dist_path}')\n"," return\n"," \n"," if cache_path and Path(cache_path).exists():\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n"," print(f'文件缓存 {cache_path} --> {dist_path}')\n"," return\n"," # 下载文件\n"," with requests.get(url, stream=True, headers=headers) as r:\n"," r.raise_for_status()\n"," with open(cache_path or filepath, 'wb') as f:\n"," for chunk in r.iter_content(chunk_size=1024):\n"," if chunk:\n"," f.write(chunk)\n"," # 如果使用了缓存目录 需要复制或链接文件到目标目录\n"," if cache_path:\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n"," print(f'下载完成 {filename} --> {dist_path}')\n"," \n","def download_git(url, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n"," if not Path(dist_path).exists():\n"," os.makedirs(dist_path,exist_ok=True)\n"," if show_shell_info:\n"," print(f'git 下载 {url} --> {dist_path}')\n"," if cache_path and not Path(cache_path).exists():\n"," os.makedirs(cache_path,exist_ok=True)\n"," run(f'git clone {url}',cwd = cache_path)\n"," if cache_path:\n"," run(f'cp -n -r -f {cache_path}/* {dist_path}')\n"," else:\n"," run(f'git clone {url}',cwd = dist_path)\n"," if show_shell_info:\n"," print(f'git 下载完成 {url} --> {dist_path}')\n"," \n"," \n","def download_huggingface(url:str, filename:str, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n"," fileReg = r'^https:\\/\\/huggingface.co(\\/([^\\/]+\\/)?[^\\/]+\\/[^\\/]+\\/(resolve|blob)\\/[^\\/]+\\/|[^\\.]+\\.[^\\.]+$|download=true)'\n"," def isFile(url:str):\n"," if re.match(fileReg,url):\n"," return True\n"," return False\n"," if isFile(url):\n"," download_file(url,filename,dist_path,cache_path,_link_instead_of_copy,headers=huggingface_headers)\n"," else:\n"," download_git(url,dist_path,cache_path,_link_instead_of_copy)\n"," \n","# 加入文件到下载列表\n","def pause_url(url:str,dist_path:str):\n"," file_name = ''\n"," if re.match(r'^[^:]+:(https?|ftps?)://', url, flags=0):\n"," file_name = re.findall(r'^[^:]+:',url)[0][:-1]\n"," url = url[len(file_name)+1:]\n"," if not re.match(r'^(https?|ftps?)://',url):\n"," return\n"," file_name = re.sub(r'\\s+','_',file_name or '')\n"," path_hash = str(hash(url)).replace('-','')\n"," \n"," return {'file_name':file_name,'path_hash':path_hash,'url':url,'dist_path':dist_path}\n","\n","def download_urls(download_list:List[dict],sync:bool=False,thread_num:int=5, \n"," cache_path:str=os.path.join(os.environ['HOME'],'.cache','download_util'),\n"," _link_instead_of_copy:bool=True,is_await:bool=False):\n"," if sync:\n"," for conf in download_list:\n"," cache_dir = os.path.join(cache_path,conf['path_hash'])\n"," if conf['url'].startswith('https://github.com'):\n"," download_git(conf['url'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," continue\n"," if conf['url'].startswith('https://huggingface.co'):\n"," download_huggingface(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," continue\n"," if conf['url'].startswith('https://civitai.com'):\n"," if not re.search(r'token=.+', conf['url']):\n"," if conf['url'].find('?') == -1:\n"," conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n"," else:\n"," conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n"," download_file(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n"," else:\n"," executor = concurrent.futures.ThreadPoolExecutor(max_workers=thread_num)\n"," futures = []\n"," for conf in download_list:\n"," cache_dir = os.path.join(cache_path,conf['path_hash'])\n"," if conf['url'].startswith('https://github.com'):\n"," futures.append(executor.submit(download_git, conf['url'],conf['dist_path'],\n"," cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," continue\n"," if conf['url'].startswith('https://huggingface.co'):\n"," futures.append(executor.submit(download_huggingface,conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," continue\n"," if conf['url'].startswith('https://civitai.com'):\n"," if not re.search(r'token=.+', conf['url']):\n"," if conf['url'].find('?') == -1:\n"," conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n"," else:\n"," conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n"," futures.append(executor.submit(download_file, conf['url'],conf['file_name'],conf['dist_path'],\n"," cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n"," if is_await:\n"," concurrent.futures.wait(futures)\n"," \n"," \n","def parse_config(config:str):\n"," space_string = ' \\n\\r\\t\\'\\\",'\n"," other_flie_list = [item.split('#')[0].strip(space_string) for item in config.split('\\n') if item.strip(space_string)]\n"," other_flie_list = [item.strip() for item in other_flie_list if item.strip()]\n"," other_flie_list_store = {}\n"," other_flie_list_store_name='default'\n"," other_flie_list_store_list_cache=[]\n"," \n"," for item in other_flie_list:\n"," if item.startswith('[') and item.endswith(']'):\n"," if not other_flie_list_store_name == 'default':\n"," other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n"," other_flie_list_store_list_cache = []\n"," other_flie_list_store_name = item[1:-1]\n"," else:\n"," other_flie_list_store_list_cache.append(item)\n"," other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n"," \n"," return other_flie_list_store\n","\n","\n","def link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',\n"," sync:bool=False,thread_num:int=None, is_await:bool=False):\n"," store:dict[str,List[str]] = parse_config(config)\n"," download_list = []\n"," for dist_dir in store.keys():\n"," dist_path = os.path.join(base_path,dist_dir)\n"," os.makedirs(dist_path,exist_ok=True)\n"," for path in store[dist_dir]:\n"," if 'https://' in path or 'http://' in path:\n"," if skip_url:\n"," continue\n"," if sync:\n"," download_urls([pause_url(path,dist_path)],_link_instead_of_copy = _link_instead_of_copy, sync=sync)\n"," continue\n"," download_list.append(pause_url(path,dist_path))\n"," else:\n"," run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {path} {dist_path}')\n"," if show_shell_info:\n"," print(f'{\"链接\" if _link_instead_of_copy else \"复制\"} {path} --> {dist_path}')\n"," run(f'rm -f {dist_path}/\\*.* ')\n"," if not skip_url:\n"," if show_shell_info:\n"," pprint.pprint(download_list)\n"," download_urls(download_list,_link_instead_of_copy = _link_instead_of_copy, sync=sync, thread_num=thread_num or 3,is_await=is_await)"]},{"cell_type":"markdown","metadata":{"id":"p0uS-BLULCtD"},"source":["## kaggle public API\n","\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"m8FJi4j0LCtD","trusted":true},"outputs":[],"source":["# 安装kaggle的api token文件\n","def initKaggleConfig():\n"," if Path('~/.kaggle/kaggle.json').exists():\n"," return True\n"," if Path(kaggleApiTokenFile).exists():\n"," run(f'''mkdir -p ~/.kaggle/''')\n"," run('cp '+kaggleApiTokenFile+' ~/.kaggle/kaggle.json')\n"," run(f'''chmod 600 ~/.kaggle/kaggle.json''')\n"," return True\n"," print('缺少kaggle的apiToken文件,访问:https://www.kaggle.com/你的kaggle用户名/account 获取')\n"," return False\n","\n","def getUserName():\n"," if not initKaggleConfig(): return\n"," import kaggle\n"," return kaggle.KaggleApi().read_config_file()['username']\n","\n","def createOrUpdateDataSet(path:str,datasetName:str):\n"," if not initKaggleConfig(): return\n"," print('创建或更新数据集 '+datasetName)\n"," import kaggle\n"," run(f'mkdir -p {_install_path}/kaggle_cache')\n"," run(f'rm -rf {_install_path}/kaggle_cache/*')\n"," datasetDirPath = _install_path+'/kaggle_cache/'+datasetName\n"," run('mkdir -p '+datasetDirPath)\n"," run('cp -f '+path+' '+datasetDirPath+'/')\n"," username = getUserName()\n"," print(\"kaggle username:\"+username)\n"," datasetPath = username+'/'+datasetName\n"," datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n"," print(datasetList)\n"," if len(datasetList) == 0 or datasetPath not in [str(d) for d in datasetList]: # 创建 create\n"," run('kaggle datasets init -p' + datasetDirPath)\n"," metadataFile = datasetDirPath+'/dataset-metadata.json'\n"," run('sed -i s/INSERT_TITLE_HERE/'+ datasetName + '/g ' + metadataFile)\n"," run('sed -i s/INSERT_SLUG_HERE/'+ datasetName + '/g ' + metadataFile)\n"," run('cat '+metadataFile)\n"," run('kaggle datasets create -p '+datasetDirPath)\n"," print('create database done')\n"," else:\n"," kaggle.api.dataset_metadata(datasetPath,datasetDirPath)\n"," kaggle.api.dataset_create_version(datasetDirPath, 'auto update',dir_mode='zip')\n"," print('upload database done')\n","\n","def downloadDatasetFiles(datasetName:str,outputPath:str):\n"," if not initKaggleConfig(): return\n"," print('下载数据集文件 '+datasetName)\n"," import kaggle\n"," username = getUserName()\n"," datasetPath = username+'/'+datasetName\n"," datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n"," if datasetPath not in [str(d) for d in datasetList]:\n"," return False\n"," run('mkdir -p '+outputPath)\n"," kaggle.api.dataset_download_files(datasetPath,path=outputPath,unzip=True)\n"," return True\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["## 同步文件夹到 huggingface\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 文件夹与 huggingface 同步\n","if _huggingface_token:\n"," if not is_installed('watchdog'):\n"," requirements.append('watchdog')\n"," if not is_installed('huggingface_hub'):\n"," requirements.append('huggingface_hub')\n"," else:\n"," try:\n"," from huggingface_hub import HfApi,login,snapshot_download\n"," except:\n"," requirements.append('huggingface_hub')\n","\n","huggingface_is_init = False\n","\n","def init_huggingface():\n"," if not _huggingface_token:\n"," return False\n","\n"," global huggingface_headers\n"," global huggingface_is_init\n"," \n"," from huggingface_hub import login\n"," token = replace_path(_huggingface_token)\n"," if not _huggingface_token.startswith('hf_') and Path(token).exists():\n"," with open(token,encoding = \"utf-8\") as nkfile:\n"," token = nkfile.readline()\n"," if not token.startswith('hf_'):\n"," print('huggingface token 不正确,请将 token 或 仅存放token 的txt文件路径填入 _huggingface_token 配置')\n"," return False\n"," login(token,add_to_git_credential=True)\n"," huggingface_headers = {'Authorization': 'Bearer '+token}\n"," print('huggingface token 已经加载,可以下载私有仓库或文件')\n"," \n"," huggingface_is_init = True\n"," return True\n","\n","\n","def start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg']):\n"," if not huggingface_is_init:\n"," print('huggingface 相关功能未初始化 请调用 init_huggingface() 初始化')\n"," \n"," if not directory_to_watch:\n"," print('请指定需要同步的本地目录 directory_to_watch')\n"," return\n"," if not Path(directory_to_watch).exists():\n"," run(f'mkdir -p {directory_to_watch}')\n"," from watchdog.observers import Observer\n"," from watchdog.events import FileSystemEventHandler\n"," from huggingface_hub import HfApi,login,snapshot_download\n"," \n"," # 配置监视的目录和 Hugging Face 仓库信息\n"," class FileChangeHandler(FileSystemEventHandler):\n"," def __init__(self, api, repo_id, repo_type,directory_to_watch):\n"," self.api = api\n"," self.repo_id = repo_id\n"," self.repo_type = repo_type\n"," self.directory_to_watch = directory_to_watch\n"," def on_created(self, event):\n"," if not event.is_directory:\n"," # 上传新文件到 Hugging Face 仓库\n"," file_path = event.src_path\n"," file_name:str = os.path.basename(file_path)\n"," print(file_name)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," print(file_name,'>>','huggingface')\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_deleted(self, event):\n"," if not event.is_directory:\n"," # 从 Hugging Face 仓库删除文件\n"," file_path = event.src_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," try:\n"," self.api.delete_file(\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_modified(self, event):\n"," if not event.is_directory:\n"," # 更新 Hugging Face 仓库中的文件\n"," file_path = event.src_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in ['.png','.jpg','.txt','.webp','.jpeg']: return\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," def on_moved(self, event):\n"," if not event.is_directory:\n"," file_path = event.dest_path\n"," file_name = os.path.basename(file_path)\n"," if file_name[file_name.rindex('.'):] not in file_types: return\n"," if event.dest_path.startswith(self.directory_to_watch):\n"," try:\n"," self.api.upload_file(\n"," path_or_fileobj=file_path,\n"," path_in_repo=file_path.replace(self.directory_to_watch,''),\n"," repo_id=self.repo_id,\n"," repo_type=self.repo_type,\n"," )\n"," except IOError as error:\n"," print(error)\n","\n"," api = HfApi()\n"," \n"," # 创建观察者对象并注册文件变化处理程序\n"," event_handler = FileChangeHandler(api,repo_id,repo_type,directory_to_watch)\n"," observer = Observer()\n"," observer.schedule(event_handler, directory_to_watch, recursive=True)\n","\n"," # 启动观察者\n"," observer.name = \"solo_directory_to_watch\"\n"," print(f'启动目录同步,{directory_to_watch} 将自动同步到 huggingface {repo_type} : {repo_id}')\n"," observer.start()"]},{"cell_type":"markdown","metadata":{"id":"sswa04veLCtE"},"source":["## 工具函数\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["\n","def zipPath(path:str,zipName:str,format='tar'):\n"," if path.startswith('$install_path'):\n"," path = path.replace('$install_path',_install_path)\n"," if path.startswith('$output_path'):\n"," path = path.replace('$install_path',_output_path)\n"," if not path.startswith('/'):\n"," path = f'{_install_path}/{_ui_dir_name}/{path}'\n"," if Path(path).exists():\n"," if 'tar' == format:\n"," run(f'tar -cf {_output_path}/'+ zipName +'.tar -C '+ path +' . ')\n"," elif 'gz' == format:\n"," run(f'tar -czf {_output_path}/'+ zipName +'.tar.gz -C '+ path +' . ')\n"," return\n"," print('指定的目录不存在:'+path)\n"]},{"cell_type":"markdown","metadata":{},"source":["## 内网穿透\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"id":"coqQvTSLLCtE","trusted":true},"outputs":[],"source":["def printUrl(url,name=''):\n"," print(f'{name} 访问地址:{url}')\n"," for key in sorted(_proxy_path.keys(), key=len)[::-1]:\n"," print(f'{name} 本地服务:{_proxy_path[key]} 访问地址:{url}{key}')\n","# ngrok\n","def startNgrok(ngrokToken:str,ngrokLocalPort:int):\n"," if not is_installed('pyngrok'):\n"," run('pip install pyngrok')\n"," from pyngrok import conf, ngrok\n"," try:\n"," conf.get_default().auth_token = ngrokToken\n"," conf.get_default().monitor_thread = False\n"," ssh_tunnels = ngrok.get_tunnels(conf.get_default())\n"," url = ''\n"," if len(ssh_tunnels) == 0:\n"," ssh_tunnel = ngrok.connect(ngrokLocalPort)\n"," url = ssh_tunnel.public_url\n"," print('ngrok 访问地址:'+ssh_tunnel.public_url)\n"," else:\n"," print('ngrok 访问地址:'+ssh_tunnels[0].public_url)\n"," url = ssh_tunnels[0].public_url\n"," printUrl(url,'ngrok')\n"," def auto_request_ngrok():\n"," if url:\n"," while(_runing):\n"," time.sleep(60*1)\n"," try:\n"," res = requests.get(url+'/',headers={\"ngrok-skip-browser-warning\" : \"1\"},timeout=10)\n"," except:\n"," ''\n"," # print('自动调用ngrok链接以保存链接不会断开',res.status_code)\n","\n"," # threading.Thread(target = auto_request_ngrok,daemon=True,name='solo_auto_request_ngrok').start()\n"," except:\n"," print('启动ngrok出错')\n"," \n","def startFrpc(name,configFile):\n"," if not Path(f'{_install_path}/frpc/frpc').exists():\n"," installFrpExe()\n"," if freefrp_url:\n"," printUrl(freefrp_url,'freefrp')\n"," echoToFile(f'''\n","cd {_install_path}/frpc/\n","{_install_path}/frpc/frpc {configFile}\n","''',f'{_install_path}/frpc/start.sh')\n"," get_ipython().system(f'''bash {_install_path}/frpc/start.sh''')\n"," \n","def installFrpExe():\n"," if _useFrpc:\n"," print('安装frpc')\n"," run(f'mkdir -p {_install_path}/frpc')\n"," if Path(frpcExePath).exists():\n"," run(f'cp -f -n {frpcExePath} {_install_path}/frpc/frpc')\n"," else:\n"," run(f'wget \"https://huggingface.co/datasets/ACCA225/Frp/resolve/main/frpc\" -O {_install_path}/frpc/frpc')\n"," \n"," for ssl in frpcSSLFFlies:\n"," if Path(ssl).exists():\n"," run(f'cp -f -n {ssl}/* {_install_path}/frpc/')\n"," run(f'chmod +x {_install_path}/frpc/frpc')\n"," run(f'{_install_path}/frpc/frpc -v')\n","\n","def startProxy():\n"," if _useNgrok:\n"," startNgrok(ngrokToken,_server_port)\n"," if _useFrpc:\n"," startFrpc('frpc_proxy',frpcStartArg)"]},{"cell_type":"markdown","metadata":{},"source":["## NGINX 反向代理\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"trusted":true},"outputs":[],"source":["\n","# nginx 反向代理配置文件\n","def localProxy():\n"," def getProxyLocation(subPath:str, localServer:str):\n"," return '''\n"," location '''+ subPath +'''\n"," {\n"," proxy_pass '''+ localServer +''';\n"," proxy_set_header Host $host;\n"," proxy_set_header X-Real-IP $remote_addr;\n"," proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n"," proxy_set_header REMOTE-HOST $remote_addr;\n"," proxy_set_header Upgrade $http_upgrade;\n"," proxy_set_header Connection upgrade;\n"," proxy_http_version 1.1;\n"," proxy_connect_timeout 10m;\n"," proxy_read_timeout 10m;\n"," }\n"," \n"," '''\n"," \n"," conf = '''\n","server\n","{\n"," listen '''+str(_server_port)+''';\n"," listen [::]:'''+str(_server_port)+''';\n"," server_name 127.0.0.1 localhost 0.0.0.0 \"\";\n"," \n"," if ($request_method = OPTIONS) {\n"," return 200;\n"," }\n"," fastcgi_send_timeout 10m;\n"," fastcgi_read_timeout 10m;\n"," fastcgi_connect_timeout 10m;\n"," \n"," '''+ ''.join([getProxyLocation(key,_proxy_path[key]) for key in sorted(_proxy_path.keys(), key=len)[::-1]]) +'''\n","}\n","'''\n"," echoToFile(conf,'/etc/nginx/conf.d/proxy_nginx.conf')\n"," if not check_service('localhost',_server_port):\n"," run(f'''nginx -c /etc/nginx/nginx.conf''')\n"," run(f'''nginx -s reload''')"]},{"cell_type":"markdown","metadata":{},"source":["## 线程清理工具\n","\n","---\n","\n","清理线程名以 solo_ 开头的所有线程"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["import inspect\n","import ctypes\n","\n","def _async_raise(tid, exctype):\n"," \"\"\"raises the exception, performs cleanup if needed\"\"\"\n"," tid = ctypes.c_long(tid)\n"," if not inspect.isclass(exctype):\n"," exctype = type(exctype)\n"," res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))\n"," if res == 0:\n"," raise ValueError(\"invalid thread id\")\n"," elif res != 1:\n"," # \"\"\"if it returns a number greater than one, you're in trouble,\n"," # and you should call it again with exc=NULL to revert the effect\"\"\"\n"," ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)\n"," raise SystemError(\"PyThreadState_SetAsyncExc failed\")\n","\n","def stop_thread(thread):\n"," _async_raise(thread.ident, SystemExit)\n","\n","def stop_solo_threads():\n"," global _runing\n"," _runing = False\n"," # 获取当前所有活动的线程\n"," threads = threading.enumerate()\n"," # 关闭之前创建的子线程\n"," for thread in threads:\n"," if thread.name.startswith('solo_'):\n"," print(f'结束线程:{thread.name}')\n"," try:\n"," stop_thread(thread)\n"," except socket.error:\n"," print(f'结束线程:{thread.name} 执行失败')\n"," "]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["docs = '''\n","# 配置项\n","server_port int 本地服务端口,穿透使用的端口\n","huggingface_token string huggingface token,用于同步文件或者下载需要登录的文件\n","hidden_console_info bool 是否隐藏控制台信息\n","proxy_path {string:string} 将哪个路径映射到哪个服务,格式为:{'/':'http://127.0.0.1:5000/'}\n","kaggle_api_token string kaggle api token,用于访问kaggle的api\n","ngrok_config_or_file ngrok的token文件内容或者放token的文件的路径\n","frp_config_or_file frp的配置内容或者配置文件的路径\n","frp_ssl_dir frp的https证书存放目录\n","# 工具函数\n","执行cmd命令:\n","run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True)\n","检查网络是否可以访问\n","check_service(host, port)\n","检查gpu是否存在\n","check_gpu()\n","写入文本到文件\n","echoToFile(content:str,path:str)\n","获取一个免费frp配置\n","get_freefrp_confog(local_port)\n","\n","下载文件\n","link_or_download_flie(config:str, \n","skip_url:bool=False, \n","_link_instead_of_copy:bool=True, \n","base_path:str = '',\n","sync:bool=False, # 同步且按顺序下载\n","thread_num:int=None, \n","is_await:bool=False) # 异步不按顺序但等待下载完成\n","---- 下载文件功能的配置的格式\n","[目标目录] # 中括号必须写\n","下载链接1\n","名称:下载链接2 # 链接前面的名称可以用于对文件重命名\n","# 备注 井号后面的内容会被忽略\n","可以下载git仓库\n","可以下载huggingfacec仓库或者文件,会使用 huggingface_token 作为凭证下载\n","可以下载其他可以直接下载的互联网资源\n","可以作为文件链接功能,将某个目录或目录下的全部文件链接到目标目录\n","---- 下载文件功能结束结束\n","\n","创建kaggle数据集\n","createOrUpdateDataSet(path:str,datasetName:str)\n","下载kaggle数据集\n","downloadDatasetFiles(datasetName:str,outputPath:str)\n","同步目录到huggingface,可指定同步的文件类型\n","start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg'])\n","压缩指定目录到指定位置\n","zipPath(path:str,zipName:str,format='tar') # 可用tar或gz\n","停止后台线程 # 仅停止线程名称前缀为 solo_ 的线程\n","stop_solo_threads()\n","'''\n","def get_docs():\n"," print(docs)\n"," \n","print('可以执行get_docs()查看文档')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["if not (True if os.getenv('IS_INSTALL_NGINX','False') == 'True' else False):\n"," run('git lfs install')\n"," run('git config --global credential.helper store')\n"," run('sudo apt update -y')\n"," run('sudo apt install nginx -y')\n"," os.environ['IS_INSTALL_NGINX'] = 'True'\n","stop_solo_threads()\n","time.sleep(2)\n","threading.Thread(target = startProxy, daemon=True, name='solo_startProxy').start()\n","localProxy()\n","init_huggingface()"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"datasetId":2716934,"sourceId":6167400,"sourceType":"datasetVersion"},{"datasetId":3654544,"sourceId":6346544,"sourceType":"datasetVersion"},{"datasetId":2962375,"sourceId":6720235,"sourceType":"datasetVersion"},{"datasetId":3074484,"sourceId":6817788,"sourceType":"datasetVersion"}],"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}
|