2024-05-15 12:57:15 +08:00
|
|
|
|
# pip install scenedetect opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
|
|
|
|
|
|
|
|
|
|
|
|
from scenedetect.video_manager import VideoManager
|
|
|
|
|
|
from scenedetect.scene_manager import SceneManager
|
|
|
|
|
|
from scenedetect.stats_manager import StatsManager
|
|
|
|
|
|
from scenedetect.detectors.content_detector import ContentDetector
|
|
|
|
|
|
import os
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import subprocess
|
|
|
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
import public_tools
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 获取智能画面分割的时间或者秒数
|
|
|
|
|
|
def find_scenes(video_path, sensitivity):
|
|
|
|
|
|
print(
|
|
|
|
|
|
"正在计算分镜数据" + "sensitivity:" + str(sensitivity) + "path : " + video_path
|
|
|
|
|
|
)
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
video_manager = VideoManager([video_path])
|
|
|
|
|
|
stats_manager = StatsManager()
|
|
|
|
|
|
scene_manager = SceneManager(stats_manager)
|
|
|
|
|
|
|
|
|
|
|
|
# 使用contect-detector
|
|
|
|
|
|
scene_manager.add_detector(ContentDetector(threshold=float(sensitivity)))
|
|
|
|
|
|
|
|
|
|
|
|
shijian_list = []
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
video_manager.set_downscale_factor()
|
|
|
|
|
|
video_manager.start()
|
|
|
|
|
|
scene_manager.detect_scenes(frame_source=video_manager)
|
|
|
|
|
|
scene_list = scene_manager.get_scene_list()
|
|
|
|
|
|
print("分镜数据列表:")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
for i, scene in enumerate(scene_list):
|
|
|
|
|
|
shijian_list.append([scene[0].get_timecode(), scene[1].get_timecode()])
|
|
|
|
|
|
print(
|
|
|
|
|
|
"Scene %2d: Start %s / Frame %d, End %s / Frame %d"
|
|
|
|
|
|
% (
|
|
|
|
|
|
i + 1,
|
|
|
|
|
|
scene[0].get_timecode(),
|
|
|
|
|
|
scene[0].get_frames(),
|
|
|
|
|
|
scene[1].get_timecode(),
|
|
|
|
|
|
scene[1].get_frames(),
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
finally:
|
|
|
|
|
|
video_manager.release()
|
|
|
|
|
|
|
|
|
|
|
|
return shijian_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 如果不存在就创建
|
|
|
|
|
|
def createDir(file_dir):
|
|
|
|
|
|
# 如果不存在文件夹,就创建
|
|
|
|
|
|
if not os.path.isdir(file_dir):
|
|
|
|
|
|
os.mkdir(file_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 切分一个视频
|
2024-06-01 15:08:22 +08:00
|
|
|
|
def ClipVideo(video_path, out_folder, image_out_folder, sensitivity, gpu_type):
|
2024-05-15 12:57:15 +08:00
|
|
|
|
shijian_list = find_scenes(video_path, sensitivity) # 多组时间列表
|
|
|
|
|
|
shijian_list_len = len(shijian_list)
|
|
|
|
|
|
|
|
|
|
|
|
print("总共有%s个场景" % str(shijian_list_len))
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
video_list = []
|
|
|
|
|
|
for i in range(0, shijian_list_len):
|
|
|
|
|
|
start_time_str = shijian_list[i][0]
|
|
|
|
|
|
end_time_str = shijian_list[i][1]
|
|
|
|
|
|
|
|
|
|
|
|
print("开始输出第" + str(i + 1) + "个分镜")
|
|
|
|
|
|
video_name = "{:05d}".format(i + 1)
|
|
|
|
|
|
out_video_file = os.path.join(out_folder, video_name + ".mp4")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
video_list.append(
|
|
|
|
|
|
{
|
|
|
|
|
|
"start_time_str": start_time_str,
|
|
|
|
|
|
"end_time_str": end_time_str,
|
|
|
|
|
|
"out_video_file": out_video_file,
|
|
|
|
|
|
"video_name": video_name,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 使用 ffmpeg 裁剪视频
|
2024-06-01 15:08:22 +08:00
|
|
|
|
command = []
|
|
|
|
|
|
command.append("ffmpeg")
|
|
|
|
|
|
command.append("-i")
|
|
|
|
|
|
command.append(video_path)
|
|
|
|
|
|
command.append("-ss")
|
|
|
|
|
|
command.append(start_time_str)
|
|
|
|
|
|
command.append("-to")
|
|
|
|
|
|
command.append(end_time_str)
|
|
|
|
|
|
command.append("-c:v")
|
|
|
|
|
|
|
|
|
|
|
|
if gpu_type == "NVIDIA":
|
|
|
|
|
|
command.append("h264_nvenc")
|
|
|
|
|
|
elif gpu_type == "AMD":
|
|
|
|
|
|
command.append("h264_vaapi")
|
|
|
|
|
|
else:
|
|
|
|
|
|
command.append("libx264")
|
|
|
|
|
|
|
|
|
|
|
|
command.append("-preset")
|
|
|
|
|
|
command.append("fast")
|
|
|
|
|
|
command.append("-c:a")
|
|
|
|
|
|
command.append("copy")
|
|
|
|
|
|
command.append(out_video_file)
|
|
|
|
|
|
command.append("-loglevel")
|
|
|
|
|
|
command.append("error")
|
|
|
|
|
|
|
2024-05-15 12:57:15 +08:00
|
|
|
|
subprocess.run(
|
2024-06-01 15:08:22 +08:00
|
|
|
|
command,
|
2024-05-15 12:57:15 +08:00
|
|
|
|
check=True,
|
|
|
|
|
|
stderr=subprocess.PIPE,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print("分镜输出完成。开始抽帧")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
for vi in video_list:
|
|
|
|
|
|
h, m, s = vi["start_time_str"].split(":")
|
|
|
|
|
|
start_seconds = int(h) * 3600 + int(m) * 60 + float(s)
|
|
|
|
|
|
|
|
|
|
|
|
h, m, s = vi["end_time_str"].split(":")
|
|
|
|
|
|
end_seconds = int(h) * 3600 + int(m) * 60 + float(s)
|
|
|
|
|
|
print("正在抽帧:" + vi["video_name"])
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
subprocess.run(
|
|
|
|
|
|
[
|
|
|
|
|
|
"ffmpeg",
|
|
|
|
|
|
"-ss",
|
|
|
|
|
|
str((end_seconds - start_seconds) / 2),
|
|
|
|
|
|
"-i",
|
|
|
|
|
|
vi["out_video_file"],
|
|
|
|
|
|
"-frames:v",
|
|
|
|
|
|
"1",
|
|
|
|
|
|
os.path.join(image_out_folder, vi["video_name"] + ".png"),
|
|
|
|
|
|
"-loglevel",
|
|
|
|
|
|
"error",
|
|
|
|
|
|
]
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
print("抽帧完成,开始识别文案")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
return video_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def SplitAudio(video_out_folder, video_list):
|
|
|
|
|
|
# ffmpeg -i input_file.mp4 -vn -ab 128k output_file.mp3
|
|
|
|
|
|
print("正在分离音频!!")
|
|
|
|
|
|
mp3_list = []
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
for v in video_list:
|
|
|
|
|
|
mp3_path = os.path.join(video_out_folder, v["video_name"] + ".mp3")
|
|
|
|
|
|
mp3_list.append(mp3_path)
|
|
|
|
|
|
subprocess.run(
|
|
|
|
|
|
[
|
|
|
|
|
|
"ffmpeg",
|
|
|
|
|
|
"-i",
|
|
|
|
|
|
v["out_video_file"],
|
|
|
|
|
|
"-vn",
|
|
|
|
|
|
"-ab",
|
|
|
|
|
|
"128k",
|
|
|
|
|
|
mp3_path,
|
|
|
|
|
|
"-loglevel",
|
|
|
|
|
|
"error",
|
|
|
|
|
|
],
|
|
|
|
|
|
check=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
return mp3_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def GetText(out_folder, mp3_list):
|
|
|
|
|
|
text = []
|
|
|
|
|
|
# 先获取模型
|
|
|
|
|
|
print("正在下载或加载模型")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
model_path = Path(
|
|
|
|
|
|
hf_hub_download(repo_id="Systran/faster-whisper-large-v3", filename="model.bin")
|
|
|
|
|
|
)
|
|
|
|
|
|
hf_hub_download(
|
|
|
|
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|
|
|
|
|
filename="config.json",
|
|
|
|
|
|
)
|
|
|
|
|
|
hf_hub_download(
|
|
|
|
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|
|
|
|
|
filename="preprocessor_config.json",
|
|
|
|
|
|
)
|
|
|
|
|
|
hf_hub_download(
|
|
|
|
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|
|
|
|
|
filename="tokenizer.json",
|
|
|
|
|
|
)
|
|
|
|
|
|
hf_hub_download(
|
|
|
|
|
|
repo_id="Systran/faster-whisper-large-v3",
|
|
|
|
|
|
filename="vocabulary.json",
|
|
|
|
|
|
)
|
|
|
|
|
|
model = WhisperModel(
|
|
|
|
|
|
model_size_or_path=os.path.dirname(model_path),
|
|
|
|
|
|
device="auto",
|
|
|
|
|
|
local_files_only=True,
|
|
|
|
|
|
)
|
|
|
|
|
|
print("模型加载成功,开始识别")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
for mp in mp3_list:
|
|
|
|
|
|
segments, info = model.transcribe(
|
|
|
|
|
|
mp,
|
|
|
|
|
|
beam_size=5,
|
|
|
|
|
|
language="zh",
|
|
|
|
|
|
vad_filter=True,
|
|
|
|
|
|
vad_parameters=dict(min_silence_duration_ms=1000),
|
|
|
|
|
|
)
|
|
|
|
|
|
tmp_text = ""
|
|
|
|
|
|
for segment in segments:
|
|
|
|
|
|
tmp_text += segment.text + "。"
|
|
|
|
|
|
print(mp + "识别完成")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
text.append(tmp_text)
|
|
|
|
|
|
|
|
|
|
|
|
# 数据写出
|
|
|
|
|
|
print("文本全部识别成功,正在写出")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
tools = public_tools.PublicTools()
|
|
|
|
|
|
tools.write_to_file(text, os.path.join(out_folder, "文案.txt"))
|
|
|
|
|
|
print("写出完成")
|
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
|
|
|
|
|
2024-06-01 15:08:22 +08:00
|
|
|
|
def init(video_path, video_out_folder, image_out_folder, sensitivity, gpu_type):
|
|
|
|
|
|
v_l = ClipVideo(
|
|
|
|
|
|
video_path, video_out_folder, image_out_folder, sensitivity, gpu_type
|
|
|
|
|
|
)
|
2024-05-15 12:57:15 +08:00
|
|
|
|
|
|
|
|
|
|
# 开始分离音频
|
|
|
|
|
|
m_l = SplitAudio(video_out_folder, v_l)
|
|
|
|
|
|
# 开始识别字幕
|
|
|
|
|
|
GetText(os.path.dirname(video_out_folder), m_l)
|