55 56 def check_values(self): 57 super().check_values() 58 c = asdict(self) 59 assert ( 60 c["model_params"]["input_dim"] == self.audio.num_mels 61 ), " [!] model input dimendion must be equal to melspectrogram dimension."
125 # get classid 126 class_id = self.classname_to_classid[class_name] 127 # load wav file 128 wav = self.load_wav(utter_path) 129 offset = random.randint(0, wav.shape[0] - self.seq_len) 130 wav = wav[offset : offset + self.seq_len] 131 132 if self.augmentator is not None and self.data_augmentation_p:
129 offset = random.randint(0, wav.shape[0] - self.seq_len) 130 wav = wav[offset : offset + self.seq_len] 131 132 if self.augmentator is not None and self.data_augmentation_p: 133 if random.random() < self.data_augmentation_p: 134 wav = self.augmentator.apply_one(wav) 135 136 if not self.use_torch_spec:
24 self.loss_method = loss_method 25 26 print(" > Initialized Generalized End-to-End loss") 27 28 assert self.loss_method in ["softmax", "contrast"] 29 30 if self.loss_method == "softmax": 31 self.embed_loss = self.embed_loss_softmax
107 """ 108 Calculates the GE2E loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats) 109 """ 110 111 assert x.size()[1] >= 2 112 113 centroids = torch.mean(x, 1) 114 cos_sim_matrix = self.calc_cosine_sim(x, centroids)
145 """ 146 Calculates the AngleProto loss for an input of dimensions (num_speakers, num_utts_per_speaker, dvec_feats) 147 """ 148 149 assert x.size()[1] >= 2 150 151 out_anchor = torch.mean(x[:, 1:, :], 1) 152 out_positive = x[:, 0, :]
15 self.coefficient = coefficient 16 self.register_buffer("filter", torch.FloatTensor([-self.coefficient, 1.0]).unsqueeze(0).unsqueeze(0)) 17 18 def forward(self, x): 19 assert len(x.size()) == 2 20 21 x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect") 22 return torch.nn.functional.conv1d(x, self.filter).squeeze(1)
153 criterion = criterion.cuda() 154 155 if eval: 156 self.eval() 157 assert not self.training 158 159 if not eval: 160 return criterion, state["step"]
66 clean_db = 10 * np.log10(np.mean(audio**2) + 1e-4) 67 68 noise_list = random.sample( 69 self.noise_list[noise_type], 70 random.randint( 71 self.additive_noise_config[noise_type]["min_num_noises"], 72 self.additive_noise_config[noise_type]["max_num_noises"], 73 ), 74 ) 75 76 audio_len = audio.shape[0]
80 81 if noiseaudio.shape[0] < audio_len: 82 continue 83 84 noise_snr = random.uniform( 85 self.additive_noise_config[noise_type]["min_snr_in_db"], 86 self.additive_noise_config[noise_type]["max_num_noises"], 87 ) 88 noise_db = 10 * np.log10(np.mean(noiseaudio**2) + 1e-4) 89 noise_wav = np.sqrt(10 ** ((clean_db - noise_db - noise_snr) / 10)) * noiseaudio 90
101 102 def reverberate(self, audio): 103 audio_len = audio.shape[0] 104 105 rir_file = random.choice(self.rir_files) 106 rir = self.ap.load_wav(rir_file, sr=self.ap.sample_rate) 107 rir = rir / np.sqrt(np.sum(rir**2)) 108 return signal.convolve(audio, rir, mode=self.rir_config["conv_mode"])[:audio_len]
107 rir = rir / np.sqrt(np.sum(rir**2)) 108 return signal.convolve(audio, rir, mode=self.rir_config["conv_mode"])[:audio_len] 109 110 def apply_one(self, audio): 111 noise_type = random.choice(self.global_noise_list) 112 if noise_type == "RIR_AUG": 113 return self.reverberate(audio) 114
20 """ voxceleb 1 & 2 """ 21 22 import hashlib 23 import os 24 import subprocess 25 import sys 26 import zipfile 27
79 continue 80 logging.info("Downloading %s to %s" % (url, zip_filepath)) 81 subprocess.call( 82 "wget %s --user %s --password %s -O %s" % (url, USER["user"], USER["password"], zip_filepath), 83 shell=True, 84 ) 85 86 statinfo = os.stat(zip_filepath) 87 logging.info("Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)) 88 89 # concatenate all parts into zip files
88 89 # concatenate all parts into zip files 90 if ".zip" not in zip_filepath: 91 zip_filepath = "_".join(zip_filepath.split("_")[:-1]) 92 subprocess.call("cat %s* > %s.zip" % (zip_filepath, zip_filepath), shell=True) 93 zip_filepath += ".zip" 94 extract_path = zip_filepath.strip(".zip") 95
94 extract_path = zip_filepath.strip(".zip") 95 96 # check zip file md5sum 97 with open(zip_filepath, "rb") as f_zip: 98 md5 = hashlib.md5(f_zip.read()).hexdigest() 99 if md5 != MD5SUM[subset]: 100 raise ValueError("md5sum of %s mismatch" % zip_filepath) 101
101 102 with zipfile.ZipFile(zip_filepath, "r") as zfile: 103 zfile.extractall(directory) 104 extract_path_ori = os.path.join(directory, zfile.infolist()[0].filename) 105 subprocess.call("mv %s %s" % (extract_path_ori, extract_path), shell=True) 106 finally: 107 # os.remove(zip_filepath) 108 pass
115 Return: 116 int, the return code. 117 """ 118 try: 119 retcode = subprocess.call(cmd, shell=True) 120 if retcode < 0: 121 logging.info(f"Child was terminated by signal {retcode}") 122 except OSError as e:
160 Raises: 161 AssertionError: when the parameters network is not defined 162 AssertionError: transition probability is not between 0 and 1 163 """ 164 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 165 assert ( 166 len(self.outputnet_size) >= 1 167 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
161 AssertionError: when the parameters network is not defined 162 AssertionError: transition probability is not between 0 and 1 163 """ 164 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 165 assert ( 166 len(self.outputnet_size) >= 1 167 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}" 168 assert ( 169 0 < self.flat_start_params["transition_p"] < 1 170 ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
164 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 165 assert ( 166 len(self.outputnet_size) >= 1 167 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}" 168 assert ( 169 0 < self.flat_start_params["transition_p"] < 1 170 ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
191 Raises: 192 AssertionError: when the parameters network is not defined 193 AssertionError: transition probability is not between 0 and 1 194 """ 195 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 196 assert ( 197 len(self.outputnet_size) >= 1 198 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}"
192 AssertionError: when the parameters network is not defined 193 AssertionError: transition probability is not between 0 and 1 194 """ 195 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 196 assert ( 197 len(self.outputnet_size) >= 1 198 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}" 199 assert ( 200 0 < self.flat_start_params["transition_p"] < 1 201 ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
195 assert self.ar_order > 0, "AR order must be greater than 0 it is an autoregressive model." 196 assert ( 197 len(self.outputnet_size) >= 1 198 ), f"Parameter Network must have atleast one layer check the config file for parameter network. Provided: {self.parameternetwork}" 199 assert ( 200 0 < self.flat_start_params["transition_p"] < 1 201 ), f"Transition probability must be between 0 and 1. Provided: {self.flat_start_params['transition_p']}"
223 ) 224 225 def check_values(self): 226 if self.gradual_training: 227 assert ( 228 self.gradual_training[0][1] == self.r 229 ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}" 230 if self.model == "tacotron" and self.audio is not None: 231 assert self.out_channels == ( 232 self.audio.fft_size // 2 + 1
227 assert ( 228 self.gradual_training[0][1] == self.r 229 ), f"[!] the first scheduled gradual training `r` must be equal to the model's `r` value. {self.gradual_training[0][1]} vs {self.r}" 230 if self.model == "tacotron" and self.audio is not None: 231 assert self.out_channels == ( 232 self.audio.fft_size // 2 + 1 233 ), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}" 234 if self.model == "tacotron2" and self.audio is not None: 235 assert self.out_channels == self.audio.num_mels
231 assert self.out_channels == ( 232 self.audio.fft_size // 2 + 1 233 ), f"{self.out_channels} vs {self.audio.fft_size // 2 + 1}" 234 if self.model == "tacotron2" and self.audio is not None: 235 assert self.out_channels == self.audio.num_mels
33 eval_split_size = min(eval_split_max_size, int(len(items) * eval_split_size)) 34 else: 35 eval_split_size = int(len(items) * eval_split_size) 36 37 assert ( 38 eval_split_size > 0 39 ), " [!] You do not have enough samples for the evaluation set. You can work around this setting the 'eval_split_size' parameter to a minimum of {}".format( 40 1 / len(items) 41 ) 42 np.random.seed(0) 43 np.random.shuffle(items) 44 if is_multi_speaker:
117 if formatter is None: 118 formatter = _get_formatter_by_name(formatter_name) 119 # load train set 120 meta_data_train = formatter(root_path, meta_file_train, ignored_speakers=ignored_speakers) 121 assert len(meta_data_train) > 0, f" [!] No training samples found in {root_path}/{meta_file_train}" 122 123 meta_data_train = add_extra_keys(meta_data_train, language, dataset_name) 124
220 print(f"{indent}| > Number of instances : {len(self.samples)}") 221 222 def load_wav(self, filename): 223 waveform = self.ap.load_wav(filename) 224 assert waveform.size > 0 225 return waveform 226 227 def get_phonemes(self, idx, text):
225 return waveform 226 227 def get_phonemes(self, idx, text): 228 out_dict = self.phoneme_dataset[idx] 229 assert text == out_dict["text"], f"{text} != {out_dict['text']}" 230 assert len(out_dict["token_ids"]) > 0 231 return out_dict 232
226 227 def get_phonemes(self, idx, text): 228 out_dict = self.phoneme_dataset[idx] 229 assert text == out_dict["text"], f"{text} != {out_dict['text']}" 230 assert len(out_dict["token_ids"]) > 0 231 return out_dict 232 233 def get_f0(self, idx):
232 233 def get_f0(self, idx): 234 out_dict = self.f0_dataset[idx] 235 item = self.samples[idx] 236 assert item["audio_unique_name"] == out_dict["audio_unique_name"] 237 return out_dict 238 239 def get_energy(self, idx):
238 239 def get_energy(self, idx): 240 out_dict = self.energy_dataset[idx] 241 item = self.samples[idx] 242 assert item["audio_unique_name"] == out_dict["audio_unique_name"] 243 return out_dict 244 245 @staticmethod
333 return idxs 334 335 @staticmethod 336 def create_buckets(samples, batch_group_size: int): 337 assert batch_group_size > 0 338 for i in range(len(samples) // batch_group_size): 339 offset = i * batch_group_size 340 end_offset = offset + batch_group_size
499 if self.compute_linear_spec: 500 linear = [self.ap.spectrogram(w).astype("float32") for w in batch["wav"]] 501 linear = prepare_tensor(linear, self.outputs_per_step) 502 linear = linear.transpose(0, 2, 1) 503 assert mel.shape[1] == linear.shape[1] 504 linear = torch.FloatTensor(linear).contiguous() 505 506 # format waveforms
519 520 # format F0 521 if self.compute_f0: 522 pitch = prepare_data(batch["pitch"]) 523 assert mel.shape[1] == pitch.shape[1], f"[!] {mel.shape} vs {pitch.shape}" 524 pitch = torch.FloatTensor(pitch)[:, None, :].contiguous() # B x 1 xT 525 else: 526 pitch = None
526 pitch = None 527 # format energy 528 if self.compute_energy: 529 energy = prepare_data(batch["energy"]) 530 assert mel.shape[1] == energy.shape[1], f"[!] {mel.shape} vs {energy.shape}" 531 energy = torch.FloatTensor(energy)[:, None, :].contiguous() # B x 1 xT 532 else: 533 energy = None
537 attns = [batch["attn"][idx].T for idx in ids_sorted_decreasing] 538 for idx, attn in enumerate(attns): 539 pad2 = mel.shape[1] - attn.shape[1] 540 pad1 = token_ids.shape[1] - attn.shape[0] 541 assert pad1 >= 0 and pad2 >= 0, f"[!] Negative padding - {pad1} and {pad2}" 542 attn = np.pad(attn, [[0, pad1], [0, pad2]]) 543 attns[idx] = attn 544 attns = prepare_tensor(attns, self.outputs_per_step)
720 def __getitem__(self, idx): 721 item = self.samples[idx] 722 f0 = self.compute_or_load(item["audio_file"], string2filename(item["audio_unique_name"])) 723 if self.normalize_f0: 724 assert self.mean is not None and self.std is not None, " [!] Mean and STD is not available" 725 f0 = self.normalize(f0) 726 return {"audio_unique_name": item["audio_unique_name"], "f0": f0} 727
871 def __getitem__(self, idx): 872 item = self.samples[idx] 873 energy = self.compute_or_load(item["audio_file"], string2filename(item["audio_unique_name"])) 874 if self.normalize_energy: 875 assert self.mean is not None and self.std is not None, " [!] Mean and STD is not available" 876 energy = self.normalize(energy) 877 return {"audio_unique_name": item["audio_unique_name"], "energy": energy} 878
1 import os 2 import re 3 import xml.etree.ElementTree as ET 4 from glob import glob 5 from pathlib import Path 6 from typing import List 7 8 import pandas as pd
25 if len(line.split("|")) != num_cols: 26 print(f" > Missing column in line {idx + 1} -> {line.strip()}") 27 # load metadata 28 metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") 29 assert all(x in metadata.columns for x in ["wav_filename", "transcript"]) 30 client_id = None if "client_id" in metadata.columns else "default" 31 emotion_name = None if "emotion_name" in metadata.columns else "neutral" 32 items = []
63 if len(line.split("|")) != num_cols: 64 print(f" > Missing column in line {idx + 1} -> {line.strip()}") 65 # load metadata 66 metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") 67 assert all(x in metadata.columns for x in ["audio_file", "text"]) 68 speaker_name = None if "speaker_name" in metadata.columns else "coqui" 69 emotion_name = None if "emotion_name" in metadata.columns else "neutral" 70 items = []
241 def sam_accenture(root_path, meta_file, **kwargs): # pylint: disable=unused-argument 242 """Normalizes the sam-accenture meta data file to TTS format 243 https://github.com/Sam-Accenture-Non-Binary-Voice/non-binary-voice-files""" 244 xml_file = os.path.join(root_path, "voice_over_recordings", meta_file) 245 xml_root = ET.parse(xml_file).getroot() 246 items = [] 247 speaker_name = "sam_accenture" 248 for item in xml_root.findall("./fileid"):
351 "root_path": root_path, 352 } 353 ) 354 for item in items: 355 assert os.path.exists(item["audio_file"]), f" [!] wav files don't exist - {item['audio_file']}" 356 return items 357 358
537 return _voxcel_x(root_path, meta_file, voxcel_idx="1") 538 539 540 def _voxcel_x(root_path, meta_file, voxcel_idx): 541 assert voxcel_idx in ["1", "2"] 542 expected_count = 148_000 if voxcel_idx == "1" else 1_000_000 543 voxceleb_path = Path(root_path) 544 cache_to = voxceleb_path / f"metafile_voxceleb{voxcel_idx}.csv"
558 desc=f"Building VoxCeleb {voxcel_idx} Meta file ... this needs to be done only once.", 559 total=expected_count, 560 ): 561 speaker_id = str(Path(path).parent.parent.stem) 562 assert speaker_id.startswith("id") 563 text = None # VoxCel does not provide transciptions, and they are not needed for training the SE 564 meta_data.append(f"{text}|{path}|voxcel{voxcel_idx}_{speaker_id}\n") 565 cnt += 1
13 download_url: str = "https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt", model_path: str = "" 14 ): 15 if not os.path.isfile(model_path): 16 print("Downloading HuBERT base model") 17 urllib.request.urlretrieve(download_url, model_path) 18 print("Downloaded HuBERT") 19 return model_path 20 return None
183 184 Returns: 185 np.ndarray: The generated semantic tokens. 186 """ 187 assert isinstance(text, str) 188 text = _normalize_whitespace(text) 189 assert len(text.strip()) > 0 190 if all(v is not None for v in history_prompt) or base is not None:
185 np.ndarray: The generated semantic tokens. 186 """ 187 assert isinstance(text, str) 188 text = _normalize_whitespace(text) 189 assert len(text.strip()) > 0 190 if all(v is not None for v in history_prompt) or base is not None: 191 if history_prompt is not None: 192 semantic_history = history_prompt[0]
191 if history_prompt is not None: 192 semantic_history = history_prompt[0] 193 if base is not None: 194 semantic_history = base[0] 195 assert ( 196 isinstance(semantic_history, np.ndarray) 197 and len(semantic_history.shape) == 1 198 and len(semantic_history) > 0 199 and semantic_history.min() >= 0 200 and semantic_history.max() <= model.config.SEMANTIC_VOCAB_SIZE - 1 201 ) 202 else: 203 semantic_history = None 204 encoded_text = np.array(_tokenize(model.tokenizer, text)) + model.config.TEXT_ENCODING_OFFSET
226 semantic_history = np.array([model.config.SEMANTIC_PAD_TOKEN] * 256) 227 x = torch.from_numpy( 228 np.hstack([encoded_text, semantic_history, np.array([model.config.SEMANTIC_INFER_TOKEN])]).astype(np.int64) 229 )[None] 230 assert x.shape[1] == 256 + 256 + 1 231 with inference_mode(): 232 x = x.to(model.device) 233 n_tot_steps = 768
288 pbar.update(req_pbar_state - pbar_state) 289 pbar_state = req_pbar_state 290 pbar.close() 291 out = x.detach().cpu().numpy().squeeze()[256 + 256 + 1 :] 292 assert all(out >= 0) and all(out < model.config.SEMANTIC_VOCAB_SIZE) 293 clear_cuda_cache() 294 return out 295
294 return out 295 296 297 def _flatten_codebooks(arr, offset_size): 298 assert len(arr.shape) == 2 299 arr = arr.copy() 300 if offset_size is not None: 301 for n in range(1, arr.shape[0]):
334 335 Returns: 336 np.ndarray: The generated coarse audio codes. 337 """ 338 assert ( 339 isinstance(x_semantic, np.ndarray) 340 and len(x_semantic.shape) == 1 341 and len(x_semantic) > 0 342 and x_semantic.min() >= 0 343 and x_semantic.max() <= model.config.SEMANTIC_VOCAB_SIZE - 1 344 ) 345 assert 60 <= max_coarse_history <= 630 346 assert max_coarse_history + sliding_window_len <= 1024 - 256 347 semantic_to_coarse_ratio = (
341 and len(x_semantic) > 0 342 and x_semantic.min() >= 0 343 and x_semantic.max() <= model.config.SEMANTIC_VOCAB_SIZE - 1 344 ) 345 assert 60 <= max_coarse_history <= 630 346 assert max_coarse_history + sliding_window_len <= 1024 - 256 347 semantic_to_coarse_ratio = ( 348 model.config.COARSE_RATE_HZ / model.config.SEMANTIC_RATE_HZ * model.config.N_COARSE_CODEBOOKS
342 and x_semantic.min() >= 0 343 and x_semantic.max() <= model.config.SEMANTIC_VOCAB_SIZE - 1 344 ) 345 assert 60 <= max_coarse_history <= 630 346 assert max_coarse_history + sliding_window_len <= 1024 - 256 347 semantic_to_coarse_ratio = ( 348 model.config.COARSE_RATE_HZ / model.config.SEMANTIC_RATE_HZ * model.config.N_COARSE_CODEBOOKS 349 )
355 x_coarse_history = x_history[1] 356 if base is not None: 357 x_semantic_history = base[0] 358 x_coarse_history = base[1] 359 assert ( 360 isinstance(x_semantic_history, np.ndarray) 361 and len(x_semantic_history.shape) == 1 362 and len(x_semantic_history) > 0 363 and x_semantic_history.min() >= 0 364 and x_semantic_history.max() <= model.config.SEMANTIC_VOCAB_SIZE - 1 365 and isinstance(x_coarse_history, np.ndarray) 366 and len(x_coarse_history.shape) == 2 367 and x_coarse_history.shape[0] == model.config.N_COARSE_CODEBOOKS 368 and x_coarse_history.shape[-1] >= 0 369 and x_coarse_history.min() >= 0 370 and x_coarse_history.max() <= model.config.CODEBOOK_SIZE - 1 371 and ( 372 round(x_coarse_history.shape[-1] / len(x_semantic_history), 1) 373 == round(semantic_to_coarse_ratio / model.config.N_COARSE_CODEBOOKS, 1) 374 ) 375 ) 376 x_coarse_history = ( 377 _flatten_codebooks(x_coarse_history, model.config.CODEBOOK_SIZE) + model.config.SEMANTIC_VOCAB_SIZE 378 )
398 np.floor(len(x_semantic) * semantic_to_coarse_ratio / model.config.N_COARSE_CODEBOOKS) 399 * model.config.N_COARSE_CODEBOOKS 400 ) 401 ) 402 assert n_steps > 0 and n_steps % model.config.N_COARSE_CODEBOOKS == 0 403 x_semantic = np.hstack([x_semantic_history, x_semantic]).astype(np.int32) 404 x_coarse = x_coarse_history.astype(np.int32) 405 base_semantic_idx = len(x_semantic_history)
470 del x_in 471 del x_semantic_in 472 gen_coarse_arr = x_coarse_in.detach().cpu().numpy().squeeze()[len(x_coarse_history) :] 473 del x_coarse_in 474 assert len(gen_coarse_arr) == n_steps 475 gen_coarse_audio_arr = ( 476 gen_coarse_arr.reshape(-1, model.config.N_COARSE_CODEBOOKS).T - model.config.SEMANTIC_VOCAB_SIZE 477 )
501 502 Returns: 503 np.ndarray: The generated full audio codes. 504 """ 505 assert ( 506 isinstance(x_coarse_gen, np.ndarray) 507 and len(x_coarse_gen.shape) == 2 508 and 1 <= x_coarse_gen.shape[0] <= model.config.N_FINE_CODEBOOKS - 1 509 and x_coarse_gen.shape[1] > 0 510 and x_coarse_gen.min() >= 0 511 and x_coarse_gen.max() <= model.config.CODEBOOK_SIZE - 1 512 ) 513 if all(v is not None for v in history_prompt) or base is not None: 514 if history_prompt is not None: 515 x_fine_history = history_prompt[2]
514 if history_prompt is not None: 515 x_fine_history = history_prompt[2] 516 if base is not None: 517 x_fine_history = base[2] 518 assert ( 519 isinstance(x_fine_history, np.ndarray) 520 and len(x_fine_history.shape) == 2 521 and x_fine_history.shape[0] == model.config.N_FINE_CODEBOOKS 522 and x_fine_history.shape[1] >= 0 523 and x_fine_history.min() >= 0 524 and x_fine_history.max() <= model.config.CODEBOOK_SIZE - 1 525 ) 526 else: 527 x_fine_history = None 528 n_coarse = x_coarse_gen.shape[0]
589 del in_arr 590 gen_fine_arr = gen_fine_arr[:, n_history:] 591 if n_remove_from_end > 0: 592 gen_fine_arr = gen_fine_arr[:, :-n_remove_from_end] 593 assert gen_fine_arr.shape[-1] == x_coarse_gen.shape[-1] 594 clear_cuda_cache() 595 return gen_fine_arr 596
37 ) 38 39 40 def _md5(fname): 41 hash_md5 = hashlib.md5() 42 with open(fname, "rb") as f: 43 for chunk in iter(lambda: f.read(4096), b""): 44 hash_md5.update(chunk)
46 47 48 def _download(from_s3_path, to_local_path, CACHE_DIR): 49 os.makedirs(CACHE_DIR, exist_ok=True) 50 response = requests.get(from_s3_path, stream=True) 51 total_size_in_bytes = int(response.headers.get("content-length", 0)) 52 block_size = 1024 # 1 Kibibyte 53 progress_bar = tqdm.tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
25 26 class CausalSelfAttention(nn.Module): 27 def __init__(self, config): 28 super().__init__() 29 assert config.n_embd % config.n_head == 0 30 # key, query, value projections for all heads, but in a batch 31 self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias) 32 # output projection
145 146 class GPT(nn.Module): 147 def __init__(self, config): 148 super().__init__() 149 assert config.input_vocab_size is not None 150 assert config.output_vocab_size is not None 151 assert config.block_size is not None 152 self.config = config
146 class GPT(nn.Module): 147 def __init__(self, config): 148 super().__init__() 149 assert config.input_vocab_size is not None 150 assert config.output_vocab_size is not None 151 assert config.block_size is not None 152 self.config = config 153
147 def __init__(self, config): 148 super().__init__() 149 assert config.input_vocab_size is not None 150 assert config.output_vocab_size is not None 151 assert config.block_size is not None 152 self.config = config 153 154 self.transformer = nn.ModuleDict(
178 def forward(self, idx, merge_context=False, past_kv=None, position_ids=None, use_cache=False): 179 device = idx.device 180 _, t = idx.size() 181 if past_kv is not None: 182 assert t == 1 183 tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) 184 else: 185 if merge_context:
182 assert t == 1 183 tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) 184 else: 185 if merge_context: 186 assert idx.shape[1] >= 256 + 256 + 1 187 t = idx.shape[1] - 256 188 else: 189 assert (
185 if merge_context: 186 assert idx.shape[1] >= 256 + 256 + 1 187 t = idx.shape[1] - 256 188 else: 189 assert ( 190 t <= self.config.block_size 191 ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}" 192 193 # forward the GPT model itself 194 if merge_context:
210 211 if position_ids is None: 212 position_ids = torch.arange(past_length, t + past_length, dtype=torch.long, device=device) 213 position_ids = position_ids.unsqueeze(0) # shape (1, t) 214 assert position_ids.shape == (1, t) 215 216 pos_emb = self.transformer.wpe(position_ids) # position embeddings of shape (1, t, n_embd) 217
14 15 class NonCausalSelfAttention(nn.Module): 16 def __init__(self, config): 17 super().__init__() 18 assert config.n_embd % config.n_head == 0 19 # key, query, value projections for all heads, but in a batch 20 self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias) 21 # output projection
99 100 def forward(self, pred_idx, idx): 101 device = idx.device 102 b, t, codes = idx.size() 103 assert ( 104 t <= self.config.block_size 105 ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}" 106 assert pred_idx > 0, "cannot predict 0th codebook" 107 assert codes == self.n_codes_total, (b, t, codes) 108 pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0) # shape (1, t)
102 b, t, codes = idx.size() 103 assert ( 104 t <= self.config.block_size 105 ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}" 106 assert pred_idx > 0, "cannot predict 0th codebook" 107 assert codes == self.n_codes_total, (b, t, codes) 108 pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0) # shape (1, t) 109
103 assert ( 104 t <= self.config.block_size 105 ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}" 106 assert pred_idx > 0, "cannot predict 0th codebook" 107 assert codes == self.n_codes_total, (b, t, codes) 108 pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0) # shape (1, t) 109 110 # forward the GPT model itself
353 d_model: int = 512, 354 num_heads: int = 16, 355 ): 356 super().__init__() 357 assert d_model % num_heads == 0, "d_model % num_heads should be zero." 358 self.d_model = d_model 359 self.d_head = int(d_model / num_heads) 360 self.num_heads = num_heads
56 use_weight_norm=False, 57 ): 58 super(ConvNorm, self).__init__() # pylint: disable=super-with-arguments 59 if padding is None: 60 assert kernel_size % 2 == 1 61 padding = int(dilation * (kernel_size - 1) / 2) 62 self.kernel_size = kernel_size 63 self.dilation = dilation
640 (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length). 641 """ 642 batch, _, in_length = x.shape 643 batch, _, out_channels, kernel_size, kernel_length = kernel.shape 644 assert in_length == (kernel_length * hop_size), "length of (x, kernel) is not matched" 645 646 padding = dilation * int((kernel_size - 1) / 2) 647 x = F.pad(x, (padding, padding), "constant", 0) # (batch, in_channels, in_length + 2*padding)
9 from TTS.tts.layers.delightful_tts.conv_layers import ConvNorm 10 11 12 def initialize_embeddings(shape: Tuple[int]) -> torch.Tensor: 13 assert len(shape) == 2, "Can only initialize 2-D embedding matrices ..." 14 # Kaiming initialization 15 return torch.randn(shape) * np.sqrt(2 / shape[1]) 16
142 ) 143 elif encoder_type.lower() == "residual_conv_bn": 144 self.encoder = ResidualConv1dBNEncoder(in_hidden_channels, out_channels, in_hidden_channels, encoder_params) 145 elif encoder_type.lower() == "fftransformer": 146 assert ( 147 in_hidden_channels == out_channels 148 ), "[!] must be `in_channels` == `out_channels` when encoder type is 'fftransformer'" 149 # pylint: disable=unexpected-keyword-arg 150 self.encoder = FFTransformerBlock(in_hidden_channels, **encoder_params) 151 else:
100 def __init__( 101 self, in_channels, out_channels, hidden_channels, kernel_size, dilations, num_res_blocks=13, num_conv_blocks=2 102 ): 103 super().__init__() 104 assert len(dilations) == num_res_blocks 105 self.res_blocks = nn.ModuleList() 106 for idx, dilation in enumerate(dilations): 107 block = Conv1dBNBlock(
59 60 class TimeDepthSeparableConvBlock(nn.Module): 61 def __init__(self, in_channels, hid_channels, out_channels, num_layers, kernel_size, bias=True): 62 super().__init__() 63 assert (kernel_size - 1) % 2 == 0 64 assert num_layers > 1 65 66 self.layers = nn.ModuleList()
60 class TimeDepthSeparableConvBlock(nn.Module): 61 def __init__(self, in_channels, hid_channels, out_channels, num_layers, kernel_size, bias=True): 62 super().__init__() 63 assert (kernel_size - 1) % 2 == 0 64 assert num_layers > 1 65 66 self.layers = nn.ModuleList() 67 layer = TimeDepthSeparableConv(
45 dropout_p=0, 46 weight_norm=True, 47 ): 48 super().__init__() 49 assert kernel_size % 2 == 1 50 assert hidden_channels % 2 == 0 51 self.in_channels = in_channels 52 self.hidden_channels = hidden_channels
46 weight_norm=True, 47 ): 48 super().__init__() 49 assert kernel_size % 2 == 1 50 assert hidden_channels % 2 == 0 51 self.in_channels = in_channels 52 self.hidden_channels = hidden_channels 53 self.kernel_size = kernel_size
33 self.out_channels = out_channels 34 self.kernel_size = kernel_size 35 self.num_layers = num_layers 36 self.dropout_p = dropout_p 37 assert num_layers > 1, " [!] number of layers should be > 0." 38 assert kernel_size % 2 == 1, " [!] kernel size should be odd number." 39 40 self.conv_layers = nn.ModuleList()
34 self.kernel_size = kernel_size 35 self.num_layers = num_layers 36 self.dropout_p = dropout_p 37 assert num_layers > 1, " [!] number of layers should be > 0." 38 assert kernel_size % 2 == 1, " [!] kernel size should be odd number." 39 40 self.conv_layers = nn.ModuleList() 41 self.norm_layers = nn.ModuleList()
83 """ 84 85 def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs): # pylint: disable=unused-argument 86 super().__init__() 87 assert num_splits % 2 == 0 88 self.channels = channels 89 self.num_splits = num_splits 90 self.no_jacobian = no_jacobian
105 - x: :math:`[B, C, T]` 106 - x_mask: :math:`[B, 1, T]` 107 """ 108 b, c, t = x.size() 109 assert c % self.num_splits == 0 110 if x_mask is None: 111 x_mask = 1 112 x_len = torch.ones((b,), dtype=x.dtype, device=x.device) * t
64 proximal_bias=False, 65 proximal_init=False, 66 ): 67 super().__init__() 68 assert channels % num_heads == 0, " [!] channels should be divisible by num_heads." 69 # class attributes 70 self.channels = channels 71 self.out_channels = out_channels
129 # compute raw attention scores 130 scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.k_channels) 131 # relative positional encoding for scores 132 if self.rel_attn_window_size is not None: 133 assert t_s == t_t, "Relative attention is only available for self-attention." 134 # get relative key embeddings 135 key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) 136 rel_logits = self._matmul_with_relative_keys(query, key_relative_embeddings)
138 scores_local = rel_logits / math.sqrt(self.k_channels) 139 scores = scores + scores_local 140 # proximan bias 141 if self.proximal_bias: 142 assert t_s == t_t, "Proximal bias is only available for self-attention." 143 scores = scores + self._attn_proximity_bias(t_s).to(device=scores.device, dtype=scores.dtype) 144 # attention score masking 145 if mask is not None:
73 74 self.transition_model = TransitionModel() 75 self.emission_model = EmissionModel() 76 77 assert ar_order > 0, f"AR order must be greater than 0 provided {ar_order}" 78 79 self.ar_order = ar_order 80 self.prenet = Prenet(
168 for conv1d in self.conv1d_banks: 169 out = conv1d(x) 170 outs.append(out) 171 x = torch.cat(outs, dim=1) 172 assert x.size(1) == self.conv_bank_features * len(self.conv1d_banks) 173 for conv1d in self.conv1d_projections: 174 x = conv1d(x) 175 x += inputs
23 """ 24 25 def __init__(self, in_channels, out_channels, kernel_size, activation=None): 26 super().__init__() 27 assert (kernel_size - 1) % 2 == 0 28 padding = (kernel_size - 1) // 2 29 self.convolution1d = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding) 30 self.batch_normalization = nn.BatchNorm1d(out_channels, momentum=0.1, eps=1e-5)
39 elif channels <= 64: 40 groups = 16 41 while channels % groups != 0: 42 groups = int(groups / 2) 43 assert groups > 2 44 return GroupNorm32(groups, channels) 45 46
60 :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. 61 :return: an [N x (H * C) x T] tensor after attention. 62 """ 63 bs, width, length = qkv.shape 64 assert width % (3 * self.n_heads) == 0 65 ch = width // (3 * self.n_heads) 66 q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) 67 scale = 1 / math.sqrt(math.sqrt(ch))
101 self.do_checkpoint = do_checkpoint 102 if num_head_channels == -1: 103 self.num_heads = num_heads 104 else: 105 assert ( 106 channels % num_head_channels == 0 107 ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" 108 self.num_heads = channels // num_head_channels 109 self.norm = normalization(channels) 110 self.qkv = nn.Conv1d(channels, channels * 3, 1)
151 pad = 2 152 self.conv = nn.Conv1d(self.channels, self.out_channels, ksize, padding=pad) 153 154 def forward(self, x): 155 assert x.shape[1] == self.channels 156 x = F.interpolate(x, scale_factor=self.factor, mode="nearest") 157 if self.use_conv: 158 x = self.conv(x)
176 stride = factor 177 if use_conv: 178 self.op = nn.Conv1d(self.channels, self.out_channels, ksize, stride=stride, padding=pad) 179 else: 180 assert self.channels == self.out_channels 181 self.op = nn.AvgPool1d(kernel_size=stride, stride=stride) 182 183 def forward(self, x):
180 assert self.channels == self.out_channels 181 self.op = nn.AvgPool1d(kernel_size=stride, stride=stride) 182 183 def forward(self, x): 184 assert x.shape[1] == self.channels 185 return self.op(x) 186 187
341 if ( 342 len(inp.shape) == 3 343 ): # Automatically squeeze out the channels dimension if it is present (assuming mono-audio) 344 inp = inp.squeeze(1) 345 assert len(inp.shape) == 2 346 self.mel_stft = self.mel_stft.to(inp.device) 347 mel = self.mel_stft(inp) 348 # Perform dynamic range compression
364 self.wrap = wrap 365 366 def forward(self, x, *args, **kwargs): 367 for k, v in kwargs.items(): 368 assert not (isinstance(v, torch.Tensor) and v.requires_grad) # This would screw up checkpointing. 369 partial = functools.partial(self.wrap, **kwargs) 370 return partial(x, *args) 371
38 elif audiopath[-4:] == ".mp3": 39 audio, lsr = librosa.load(audiopath, sr=None) 40 audio = torch.FloatTensor(audio) 41 else: 42 assert False, f"Unsupported audio format provided: {audiopath[-4:]}" 43 44 # Remove any channel data. 45 if len(audio.shape) > 1:
45 if len(audio.shape) > 1: 46 if audio.shape[0] < 5: 47 audio = audio[0] 48 else: 49 assert audio.shape[1] < 5 50 audio = audio[:, 0] 51 52 return audio, lsr
139 print("Cannot combine a random voice with a non-random voice. Just using a random voice.") 140 return None, None 141 clip, latent = load_voice(voice, extra_voice_dirs) 142 if latent is None: 143 assert ( 144 len(latents) == 0 145 ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this." 146 clips.extend(clip) 147 elif clip is None: 148 assert (
144 len(latents) == 0 145 ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this." 146 clips.extend(clip) 147 elif clip is None: 148 assert ( 149 len(clips) == 0 150 ), "Can only combine raw audio voices or latent voices, not both. Do it yourself if you want this." 151 latents.append(latent) 152 if len(latents) == 0: 153 return clips, None
95 output_attentions=None, 96 output_hidden_states=None, 97 return_dict=None, 98 ): 99 assert self.cached_mel_emb is not None 100 assert inputs_embeds is None # Not supported by this inference model. 101 assert labels is None # Training not supported by this inference model. 102 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
96 output_hidden_states=None, 97 return_dict=None, 98 ): 99 assert self.cached_mel_emb is not None 100 assert inputs_embeds is None # Not supported by this inference model. 101 assert labels is None # Training not supported by this inference model. 102 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 103
97 return_dict=None, 98 ): 99 assert self.cached_mel_emb is not None 100 assert inputs_embeds is None # Not supported by this inference model. 101 assert labels is None # Training not supported by this inference model. 102 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 103 104 # Create embedding
582 trunc_index = fake_inputs.shape[1] 583 if input_tokens is None: 584 inputs = fake_inputs 585 else: 586 assert ( 587 num_return_sequences % input_tokens.shape[0] == 0 588 ), "The number of return sequences must be divisible by the number of input sequences" 589 fake_inputs = fake_inputs.repeat(num_return_sequences, 1) 590 input_tokens = input_tokens.repeat(num_return_sequences // input_tokens.shape[0], 1) 591 inputs = torch.cat([fake_inputs, input_tokens], dim=1)
39 for obj in (mean1, logvar1, mean2, logvar2): 40 if isinstance(obj, th.Tensor): 41 tensor = obj 42 break 43 assert tensor is not None, "at least one argument must be a Tensor" 44 45 # Force variances to be Tensors. Broadcasting helps convert scalars to 46 # Tensors, but it does not work for th.exp().
67 :param means: the Gaussian mean Tensor. 68 :param log_scales: the Gaussian log stddev Tensor. 69 :return: a tensor like x of log probabilities (in nats). 70 """ 71 assert x.shape == means.shape == log_scales.shape 72 centered_x = x - means 73 inv_stdv = th.exp(-log_scales) 74 plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
82 x < -0.999, 83 log_cdf_plus, 84 th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))), 85 ) 86 assert log_probs.shape == x.shape 87 return log_probs 88 89
214 215 # Use float64 for accuracy. 216 betas = np.array(betas, dtype=np.float64) 217 self.betas = betas 218 assert len(betas.shape) == 1, "betas must be 1-D" 219 assert (betas > 0).all() and (betas <= 1).all() 220 221 self.num_timesteps = int(betas.shape[0])
215 # Use float64 for accuracy. 216 betas = np.array(betas, dtype=np.float64) 217 self.betas = betas 218 assert len(betas.shape) == 1, "betas must be 1-D" 219 assert (betas > 0).all() and (betas <= 1).all() 220 221 self.num_timesteps = int(betas.shape[0]) 222
223 alphas = 1.0 - betas 224 self.alphas_cumprod = np.cumprod(alphas, axis=0) 225 self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1]) 226 self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0) 227 assert self.alphas_cumprod_prev.shape == (self.num_timesteps,) 228 229 # calculations for diffusion q(x_t | x_{t-1}) and others 230 self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod)
266 :return: A noisy version of x_start. 267 """ 268 if noise is None: 269 noise = th.randn_like(x_start) 270 assert noise.shape == x_start.shape 271 return ( 272 _extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start 273 + _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
279 280 q(x_{t-1} | x_t, x_0) 281 282 """ 283 assert x_start.shape == x_t.shape 284 posterior_mean = ( 285 _extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start 286 + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
286 + _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t 287 ) 288 posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape) 289 posterior_log_variance_clipped = _extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape) 290 assert ( 291 posterior_mean.shape[0] 292 == posterior_variance.shape[0] 293 == posterior_log_variance_clipped.shape[0] 294 == x_start.shape[0] 295 ) 296 return posterior_mean, posterior_variance, posterior_log_variance_clipped 297 298 def p_mean_variance(self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None):
319 if model_kwargs is None: 320 model_kwargs = {} 321 322 B, C = x.shape[:2] 323 assert t.shape == (B,) 324 model_output = model(x, self._scale_timesteps(t), **model_kwargs) 325 if self.conditioning_free: 326 model_output_no_conditioning = model(x, self._scale_timesteps(t), conditioning_free=True, **model_kwargs)
325 if self.conditioning_free: 326 model_output_no_conditioning = model(x, self._scale_timesteps(t), conditioning_free=True, **model_kwargs) 327 328 if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]: 329 assert model_output.shape == (B, C * 2, *x.shape[2:]) 330 model_output, model_var_values = th.split(model_output, C, dim=1) 331 if self.conditioning_free: 332 model_output_no_conditioning, _ = th.split(model_output_no_conditioning, C, dim=1)
357 model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape) 358 359 if self.conditioning_free: 360 if self.ramp_conditioning_free: 361 assert t.shape[0] == 1 # This should only be used in inference. 362 cfk = self.conditioning_free_k * (1 - self._scale_timesteps(t)[0].item() / self.num_timesteps) 363 else: 364 cfk = self.conditioning_free_k
382 model_mean, _, _ = self.q_posterior_mean_variance(x_start=pred_xstart, x_t=x, t=t) 383 else: 384 raise NotImplementedError(self.model_mean_type) 385 386 assert model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape 387 return { 388 "mean": model_mean, 389 "variance": model_variance,
391 "pred_xstart": pred_xstart, 392 } 393 394 def _predict_xstart_from_eps(self, x_t, t, eps): 395 assert x_t.shape == eps.shape 396 return ( 397 _extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t 398 - _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps
398 - _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps 399 ) 400 401 def _predict_xstart_from_xprev(self, x_t, t, xprev): 402 assert x_t.shape == xprev.shape 403 return ( # (xprev - coef2*x_t) / coef1 404 _extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev 405 - _extract_into_tensor(self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape) * x_t
461 device=None, # ALL UNUSED 462 model_kwargs=None, # {'precomputed_aligned_embeddings': precomputed_embeddings}, 463 progress=False, # unused as well 464 ): 465 assert isinstance(model_kwargs, dict) 466 if device is None: 467 device = next(model.parameters()).device 468 s_in = noise.new_ones([noise.shape[0]])
652 p_sample(). 653 """ 654 if device is None: 655 device = next(model.parameters()).device 656 assert isinstance(shape, (tuple, list)) 657 if noise is not None: 658 img = noise 659 else:
728 ): 729 """ 730 Sample x_{t+1} from the model using DDIM reverse ODE. 731 """ 732 assert eta == 0.0, "Reverse ODE only for deterministic path" 733 out = self.p_mean_variance( 734 model, 735 x,
804 Same usage as p_sample_loop_progressive(). 805 """ 806 if device is None: 807 device = next(model.parameters()).device 808 assert isinstance(shape, (tuple, list)) 809 if noise is not None: 810 img = noise 811 else:
852 853 decoder_nll = -discretized_gaussian_log_likelihood( 854 x_start, means=out["mean"], log_scales=0.5 * out["log_variance"] 855 ) 856 assert decoder_nll.shape == x_start.shape 857 decoder_nll = mean_flat(decoder_nll) / np.log(2.0) 858 859 # At the first timestep return the decoder NLL,
906 ModelVarType.LEARNED, 907 ModelVarType.LEARNED_RANGE, 908 ]: 909 B, C = x_t.shape[:2] 910 assert model_output.shape == (B, C * 2, *x_t.shape[2:]) 911 model_output, model_var_values = th.split(model_output, C, dim=1) 912 # Learn the variance using the variational bound, but don't let 913 # it affect our mean prediction.
934 target = noise 935 x_start_pred = self._predict_xstart_from_eps(x_t, t, model_output) 936 else: 937 raise NotImplementedError(self.model_mean_type) 938 assert model_output.shape == target.shape == x_start.shape 939 terms["mse"] = mean_flat((target - model_output) ** 2) 940 terms["x_start_predicted"] = x_start_pred 941 if "vb" in terms:
968 noise = th.randn_like(x_start) 969 x_t = self.q_sample(x_start, t, noise=noise) 970 terms = {} 971 if self.loss_type == LossType.KL or self.loss_type == LossType.RESCALED_KL: 972 assert False # not currently supported for this type of diffusion. 973 elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: 974 model_outputs = model(x_t, x_start, self._scale_timesteps(t), **model_kwargs) 975 terms.update({k: o for k, o in zip(model_output_keys, model_outputs)})
978 ModelVarType.LEARNED, 979 ModelVarType.LEARNED_RANGE, 980 ]: 981 B, C = x_t.shape[:2] 982 assert model_output.shape == (B, C, 2, *x_t.shape[2:]) 983 model_output, model_var_values = model_output[:, :, 0], model_output[:, :, 1] 984 # Learn the variance using the variational bound, but don't let 985 # it affect our mean prediction.
1006 target = noise 1007 x_start_pred = self._predict_xstart_from_eps(x_t, t, model_output) 1008 else: 1009 raise NotImplementedError(self.model_mean_type) 1010 assert model_output.shape == target.shape == x_start.shape 1011 terms["mse"] = mean_flat((target - model_output) ** 2) 1012 terms["x_start_predicted"] = x_start_pred 1013 if "vb" in terms:
339 :param precomputed_aligned_embeddings: Embeddings returned from self.timestep_independent() 340 :param conditioning_free: When set, all conditioning inputs (including tokens and conditioning_input) will not be considered. 341 :return: an [N x C x ...] Tensor of outputs. 342 """ 343 assert precomputed_aligned_embeddings is not None or ( 344 aligned_conditioning is not None and conditioning_latent is not None 345 ) 346 assert not ( 347 return_code_pred and precomputed_aligned_embeddings is not None 348 ) # These two are mutually exclusive.
342 """ 343 assert precomputed_aligned_embeddings is not None or ( 344 aligned_conditioning is not None and conditioning_latent is not None 345 ) 346 assert not ( 347 return_code_pred and precomputed_aligned_embeddings is not None 348 ) # These two are mutually exclusive. 349 350 unused_params = [] 351 if conditioning_free:
379 self.training 380 and self.layer_drop > 0 381 and i != 0 382 and i != (len(self.layers) - 1) 383 and random.random() < self.layer_drop 384 ): 385 unused_params.extend(list(lyr.parameters())) 386 else:
104 if schedule == "discrete": 105 if betas is not None: 106 log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0) 107 else: 108 assert alphas_cumprod is not None 109 log_alphas = 0.5 * torch.log(alphas_cumprod) 110 self.total_N = len(log_alphas) 111 self.T = 1.0
355 """ 356 if guidance_type == "uncond": 357 return noise_pred_fn(x, t_continuous) 358 elif guidance_type == "classifier": 359 assert classifier_fn is not None 360 t_input = get_model_input_time(t_continuous) 361 cond_grad = cond_grad_fn(x, t_input) 362 sigma_t = noise_schedule.marginal_std(t_continuous)
371 c_in = torch.cat([unconditional_condition, condition]) 372 noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) 373 return noise_uncond + guidance_scale * (noise - noise_uncond) 374 375 assert model_type in ["noise", "x_start", "v", "score"] 376 assert guidance_type in ["uncond", "classifier", "classifier-free"] 377 return model_fn 378
372 noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2) 373 return noise_uncond + guidance_scale * (noise - noise_uncond) 374 375 assert model_type in ["noise", "x_start", "v", "score"] 376 assert guidance_type in ["uncond", "classifier", "classifier-free"] 377 return model_fn 378 379
445 with deep language understanding. arXiv preprint arXiv:2205.11487, 2022b. 446 """ 447 self.model = lambda x, t: model_fn(x, t.expand((x.shape[0]))) 448 self.noise_schedule = noise_schedule 449 assert algorithm_type in ["dpmsolver", "dpmsolver++"] 450 self.algorithm_type = algorithm_type 451 if correcting_x0_fn == "dynamic_thresholding": 452 self.correcting_x0_fn = self.dynamic_thresholding_fn
1215 For discrete-time DPMs, we use `t_start=1/N`, where `N` is the total time steps during training. 1216 """ 1217 t_0 = 1.0 / self.noise_schedule.total_N if t_start is None else t_start 1218 t_T = self.noise_schedule.T if t_end is None else t_end 1219 assert ( 1220 t_0 > 0 and t_T > 0 1221 ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array" 1222 return self.sample( 1223 x, 1224 steps=steps,
1360 1361 """ 1362 t_0 = 1.0 / self.noise_schedule.total_N if t_end is None else t_end 1363 t_T = self.noise_schedule.T if t_start is None else t_start 1364 assert ( 1365 t_0 > 0 and t_T > 0 1366 ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array" 1367 if return_intermediate: 1368 assert method in [ 1369 "multistep",
1364 assert ( 1365 t_0 > 0 and t_T > 0 1366 ), "Time range needs to be greater than 0. For discrete-time DPMs, it needs to be in [1 / N, 1], where N is the length of betas array" 1367 if return_intermediate: 1368 assert method in [ 1369 "multistep", 1370 "singlestep", 1371 "singlestep_fixed", 1372 ], "Cannot use adaptive solver when saving intermediate values" 1373 if self.correcting_xt_fn is not None: 1374 assert method in [ 1375 "multistep",
1370 "singlestep", 1371 "singlestep_fixed", 1372 ], "Cannot use adaptive solver when saving intermediate values" 1373 if self.correcting_xt_fn is not None: 1374 assert method in [ 1375 "multistep", 1376 "singlestep", 1377 "singlestep_fixed", 1378 ], "Cannot use adaptive solver when correcting_xt_fn is not None" 1379 device = x.device 1380 intermediates = [] 1381 with torch.no_grad():
1389 rtol=rtol, 1390 solver_type=solver_type, 1391 ) 1392 elif method == "multistep": 1393 assert steps >= order 1394 timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) 1395 assert timesteps.shape[0] - 1 == steps 1396 # Init the initial values.
1391 ) 1392 elif method == "multistep": 1393 assert steps >= order 1394 timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device) 1395 assert timesteps.shape[0] - 1 == steps 1396 # Init the initial values. 1397 step = 0 1398 t = timesteps[step]
45 # classes 46 class SequentialSequence(nn.Module): 47 def __init__(self, layers, args_route={}, layer_dropout=0.0): 48 super().__init__() 49 assert all( 50 len(route) == len(layers) for route in args_route.values() 51 ), "each argument route map must have the same depth as the number of sequential layers" 52 self.layers = layers 53 self.args_route = args_route 54 self.layer_dropout = layer_dropout
29 if os.path.exists(model_path): 30 continue 31 print(f"Downloading {model_name} from {url}...") 32 with tqdm(unit="B", unit_scale=True, unit_divisor=1024, miniters=1) as t: 33 request.urlretrieve(url, model_path, lambda nb, bs, fs, t=t: t.update(nb * bs - t.n)) 34 print("Done.") 35 36
243 (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length). 244 """ 245 batch, _, in_length = x.shape 246 batch, _, out_channels, kernel_size, kernel_length = kernel.shape 247 assert in_length == (kernel_length * hop_size), "length of (x, kernel) is not matched" 248 249 padding = dilation * int((kernel_size - 1) / 2) 250 x = F.pad(x, (padding, padding), "constant", 0) # (batch, in_channels, in_length + 2*padding)
398 print(c.shape) 399 400 y = model(c, z) 401 print(y.shape) 402 assert y.shape == torch.Size([3, 1, 2560]) 403 404 pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) 405 print(pytorch_total_params)
11 Finally got to use my DP skills! 12 """ 13 if record is None: 14 record = {} 15 assert skip_character not in s1, f"Found the skip character {skip_character} in the provided string, {s1}" 16 if len(s1) == 0: 17 return "" 18 if len(s2) == 0:
102 103 pop_till_you_win() 104 if not (len(expected_tokens) == 0 and len(alignments) == len(expected_text)): 105 torch.save([audio, expected_text], "alignment_debug.pth") 106 assert False, ( 107 "Something went wrong with the alignment algorithm. I've dumped a file, 'alignment_debug.pth' to" 108 "your current working directory. Please report this along with the file so it can get fixed." 109 ) 110 111 # Now fix up alignments. Anything with -1 should be interpolated. 112 alignments.append(orig_len) # This'll get removed but makes the algorithm below more readable.
127 return audio 128 splitted = expected_text.split("[") 129 fully_split = [splitted[0]] 130 for spl in splitted[1:]: 131 assert "]" in spl, 'Every "[" character must be paired with a "]" with no nesting.' 132 fully_split.extend(spl.split("]")) 133 134 # At this point, fully_split is a list of strings, with every other string being something that should be redacted.
575 self.to_out = nn.Sequential(nn.Linear(v_dim, dim * 2), nn.GLU()) if on_attn else nn.Linear(v_dim, dim) 576 577 self.rel_pos_bias = rel_pos_bias 578 if rel_pos_bias: 579 assert ( 580 rel_pos_num_buckets <= rel_pos_max_distance 581 ), "number of relative position buckets must be less than the relative position max distance" 582 self.rel_pos = RelativePositionBias( 583 scale=dim_head**0.5, 584 causal=causal,
695 dots.masked_fill_(~input_mask, mask_value) 696 del input_mask 697 698 if exists(attn_mask): 699 assert ( 700 2 <= attn_mask.ndim <= 4 701 ), "attention mask must have greater than 2 dimensions but less than or equal to 4" 702 if attn_mask.ndim == 2: 703 attn_mask = rearrange(attn_mask, "i j -> () () i j") 704 elif attn_mask.ndim == 3:
805 806 rotary_emb_dim = max(default(rotary_emb_dim, dim_head // 2), 32) 807 self.rotary_pos_emb = RotaryEmbedding(rotary_emb_dim) if rotary_pos_emb else None 808 809 assert not ( 810 alibi_pos_bias and rel_pos_bias 811 ), "you can only choose Alibi positional bias or T5 relative positional bias, not both" 812 813 if alibi_pos_bias: 814 alibi_num_heads = default(alibi_num_heads, heads)
811 ), "you can only choose Alibi positional bias or T5 relative positional bias, not both" 812 813 if alibi_pos_bias: 814 alibi_num_heads = default(alibi_num_heads, heads) 815 assert alibi_num_heads <= heads, "number of ALiBi heads must be less than the total number of heads" 816 alibi_pos_klass = LearnedAlibiPositionalBias if alibi_learned or not causal else AlibiPositionalBias 817 self.rel_pos = alibi_pos_klass(heads=alibi_num_heads, bidirectional=not causal) 818 else:
817 self.rel_pos = alibi_pos_klass(heads=alibi_num_heads, bidirectional=not causal) 818 else: 819 self.rel_pos = None 820 821 assert not (not pre_norm and sandwich_norm), "sandwich norm cannot be used when not using prenorm" 822 self.pre_norm = pre_norm 823 self.sandwich_norm = sandwich_norm 824
865 if exists(custom_layers): 866 layer_types = custom_layers 867 elif exists(par_ratio): 868 par_depth = depth * len(default_block) 869 assert 1 < par_ratio <= par_depth, "par ratio out of range" 870 default_block = tuple(filter(not_equals("f"), default_block)) 871 par_attn = par_depth // par_ratio 872 depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper
870 default_block = tuple(filter(not_equals("f"), default_block)) 871 par_attn = par_depth // par_ratio 872 depth_cut = par_depth * 2 // 3 # 2 / 3 attention layer cutoff suggested by PAR paper 873 par_width = (depth_cut + depth_cut // par_attn) // par_attn 874 assert len(default_block) <= par_width, "default block is too large for par_ratio" 875 par_block = default_block + ("f",) * (par_width - len(default_block)) 876 par_head = par_block * par_attn 877 layer_types = par_head + ("f",) * (par_depth - len(par_head))
875 par_block = default_block + ("f",) * (par_width - len(default_block)) 876 par_head = par_block * par_attn 877 layer_types = par_head + ("f",) * (par_depth - len(par_head)) 878 elif exists(sandwich_coef): 879 assert sandwich_coef > 0 and sandwich_coef <= depth, "sandwich coefficient should be less than the depth" 880 layer_types = ("a",) * sandwich_coef + default_block * (depth - sandwich_coef) + ("f",) * sandwich_coef 881 else: 882 layer_types = default_block * depth
937 norm_scale_shift_inp=None, 938 past_key_values=None, 939 expected_seq_len=None, 940 ): 941 assert not ( 942 self.cross_attend ^ (exists(context) or exists(full_context)) 943 ), "context must be passed in if cross_attend is set to True" 944 assert context is None or full_context is None, "only one of full_context or context can be provided" 945 946 hiddens = []
940 ): 941 assert not ( 942 self.cross_attend ^ (exists(context) or exists(full_context)) 943 ), "context must be passed in if cross_attend is set to True" 944 assert context is None or full_context is None, "only one of full_context or context can be provided" 945 946 hiddens = [] 947 intermediates = []
955 956 rotary_pos_emb = None 957 if exists(self.rotary_pos_emb): 958 if not self.training and self.causal: 959 assert ( 960 expected_seq_len is not None 961 ), "To decode a transformer with rotary embeddings, you must specify an `expected_seq_len`" 962 elif expected_seq_len is None: 963 expected_seq_len = 0 964 seq_len = x.shape[1]
1050 1051 1052 class Encoder(AttentionLayers): 1053 def __init__(self, **kwargs): 1054 assert "causal" not in kwargs, "cannot set causality on encoder" 1055 super().__init__(causal=False, **kwargs) 1056 1057
1056 1057 1058 class Decoder(AttentionLayers): 1059 def __init__(self, **kwargs): 1060 assert "causal" not in kwargs, "cannot set causality on decoder" 1061 super().__init__(causal=True, **kwargs) 1062 1063
1068 1069 class ViTransformerWrapper(nn.Module): 1070 def __init__(self, *, image_size, patch_size, attn_layers, num_classes=None, dropout=0.0, emb_dropout=0.0): 1071 super().__init__() 1072 assert isinstance(attn_layers, Encoder), "attention layers must be an Encoder" 1073 assert image_size % patch_size == 0, "image dimensions must be divisible by the patch size" 1074 dim = attn_layers.dim 1075 num_patches = (image_size // patch_size) ** 2
1069 class ViTransformerWrapper(nn.Module): 1070 def __init__(self, *, image_size, patch_size, attn_layers, num_classes=None, dropout=0.0, emb_dropout=0.0): 1071 super().__init__() 1072 assert isinstance(attn_layers, Encoder), "attention layers must be an Encoder" 1073 assert image_size % patch_size == 0, "image dimensions must be divisible by the patch size" 1074 dim = attn_layers.dim 1075 num_patches = (image_size // patch_size) ** 2 1076 patch_dim = 3 * patch_size**2
1122 tie_embedding=False, 1123 use_pos_emb=True, 1124 ): 1125 super().__init__() 1126 assert isinstance(attn_layers, AttentionLayers), "attention layers must be one of Encoder or Decoder" 1127 1128 dim = attn_layers.dim 1129 emb_dim = default(emb_dim, dim)
1214 def __init__( 1215 self, *, max_seq_len, attn_layers, dim_in=None, dim_out=None, emb_dim=None, emb_dropout=0.0, use_pos_emb=True 1216 ): 1217 super().__init__() 1218 assert isinstance(attn_layers, AttentionLayers), "attention layers must be one of Encoder or Decoder" 1219 1220 dim = attn_layers.dim 1221
82 Shapes: 83 - x: :math:`[B, T]` 84 - x_length: :math:`[B]` 85 """ 86 assert x.shape[0] == x_lengths.shape[0] 87 x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h] 88 89 # concat the lang emb in embedding chars
111 dropout_p=0, 112 cond_channels=0, 113 mean_only=False, 114 ): 115 assert channels % 2 == 0, "channels should be divisible by 2" 116 super().__init__() 117 self.half_channels = channels // 2 118 self.mean_only = mean_only
239 x = self.proj(x) * x_mask 240 241 if not reverse: 242 flows = self.flows 243 assert dr is not None 244 245 # condition encoder duration 246 h = self.post_pre(dr)
164 ) 165 c = -input_delta * (inputs - input_cumheights) 166 167 discriminant = b.pow(2) - 4 * a * c 168 assert (discriminant >= 0).all() 169 170 root = (2 * c) / (-b - torch.sqrt(discriminant)) 171 outputs = root * input_bin_widths + input_cumwidths
185 186 class UpsampledConv(nn.Module): 187 def __init__(self, conv, *args, **kwargs): 188 super().__init__() 189 assert "stride" in kwargs.keys() 190 self.stride = kwargs["stride"] 191 del kwargs["stride"] 192 self.conv = conv(*args, **kwargs)
230 self.discrete_loss = DiscretizationLoss( 231 num_tokens, 2, 1 / (num_tokens * 2), discretization_loss_averaging_steps 232 ) 233 234 assert positional_dims > 0 and positional_dims < 3 # This VAE only supports 1d and 2d inputs for now. 235 if positional_dims == 2: 236 conv = nn.Conv2d 237 conv_transpose = nn.ConvTranspose2d
245 act = nn.ReLU 246 elif activation == "silu": 247 act = nn.SiLU 248 else: 249 assert NotImplementedError() 250 251 enc_layers = [] 252 dec_layers = []
30 31 def forward(self, x): 32 sl = x.shape[1] 33 if self.relative: 34 start = random.randint(sl, self.seq_len) - sl 35 return self.emb(torch.arange(start, start + sl, device=x.device)) 36 else: 37 return self.emb(torch.arange(0, sl, device=x.device))
341 for i in range(prompt_codes.shape[0]): 342 if lengths[i] < prompt_len: 343 start = 0 344 else: 345 start = random.randint(0, lengths[i] - prompt_len) 346 prompt = prompt_codes[:, start : start + prompt_len] 347 348 # add start and stop tokens
395 If return_latent is specified, loss & logits are not computed or returned. Only the predicted latents are returned. 396 """ 397 # ❗ FIXIT 398 if self.max_conditioning_inputs == 0: 399 assert cond_mels is None, " ❗ cond_mels is not None, but max_conditioning_inputs == 0" 400 401 max_text_len = text_lengths.max() 402 code_lengths = torch.ceil(wav_lengths / self.code_stride_len).long() + 3
426 if max_mel_len > audio_codes.shape[-1]: 427 audio_codes = F.pad(audio_codes, (0, max_mel_len - audio_codes.shape[-1])) 428 429 # 💖 Lovely assertions 430 assert ( 431 max_mel_len <= audio_codes.shape[-1] 432 ), f" ❗ max_mel_len ({max_mel_len}) > audio_codes.shape[-1] ({audio_codes.shape[-1]})" 433 assert ( 434 max_text_len <= text_inputs.shape[-1] 435 ), f" ❗ max_text_len ({max_text_len}) > text_inputs.shape[-1] ({text_inputs.shape[-1]})"
429 # 💖 Lovely assertions 430 assert ( 431 max_mel_len <= audio_codes.shape[-1] 432 ), f" ❗ max_mel_len ({max_mel_len}) > audio_codes.shape[-1] ({audio_codes.shape[-1]})" 433 assert ( 434 max_text_len <= text_inputs.shape[-1] 435 ), f" ❗ max_text_len ({max_text_len}) > text_inputs.shape[-1] ({text_inputs.shape[-1]})" 436 437 # Append stop token to text inputs 438 text_inputs = F.pad(text_inputs[:, :max_text_len], (0, 1), value=self.stop_text_token)
533 for idx, l in enumerate(code_lengths): 534 mel_targets[idx, l + 1 :] = -1 535 536 # check if stoptoken is in every row of mel_targets 537 assert (mel_targets == self.stop_audio_token).sum() >= mel_targets.shape[ 538 0 539 ], f" ❗ mel_targets does not contain stop token ({self.stop_audio_token}) in every row." 540 541 # ignore the loss for the segment used for conditioning 542 # coin flip for the segment to be ignored
68 output_attentions=None, 69 output_hidden_states=None, 70 return_dict=None, 71 ): 72 assert self.cached_prefix_emb is not None 73 assert inputs_embeds is None # Not supported by this inference model. 74 assert labels is None # Training not supported by this inference model. 75 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
69 output_hidden_states=None, 70 return_dict=None, 71 ): 72 assert self.cached_prefix_emb is not None 73 assert inputs_embeds is None # Not supported by this inference model. 74 assert labels is None # Training not supported by this inference model. 75 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 76
70 return_dict=None, 71 ): 72 assert self.cached_prefix_emb is not None 73 assert inputs_embeds is None # Not supported by this inference model. 74 assert labels is None # Training not supported by this inference model. 75 return_dict = return_dict if return_dict is not None else self.config.use_return_dict 76 77 # assert len(past_key_values) + len(input_ids) == attention_mask.shape[1]
330 state = torch.load(checkpoint_path, map_location=torch.device("cpu")) 331 self.load_state_dict(state["model"]) 332 if eval: 333 self.eval() 334 assert not self.training 335 self.remove_weight_norm() 336 337
411 self.coefficient = coefficient 412 self.register_buffer("filter", torch.FloatTensor([-self.coefficient, 1.0]).unsqueeze(0).unsqueeze(0)) 413 414 def forward(self, x): 415 assert len(x.size()) == 2 416 417 x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect") 418 return torch.nn.functional.conv1d(x, self.filter).squeeze(1)
604 criterion = criterion.cuda() 605 606 if eval: 607 self.eval() 608 assert not self.training 609 610 if not eval: 611 return criterion, state["step"]
727 728 self.load_state_dict(state) 729 if eval: 730 self.eval() 731 assert not self.training 732 self.waveform_decoder.remove_weight_norm()
29 elif channels <= 64: 30 groups = 16 31 while channels % groups != 0: 32 groups = int(groups / 2) 33 assert groups > 2 34 return GroupNorm32(groups, channels) 35 36
52 :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. 53 :return: an [N x (H * C) x T] tensor after attention. 54 """ 55 bs, width, length = qkv.shape 56 assert width % (3 * self.n_heads) == 0 57 ch = width // (3 * self.n_heads) 58 q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) 59 scale = 1 / math.sqrt(math.sqrt(ch))
85 self.do_activation = do_activation 86 if num_head_channels == -1: 87 self.num_heads = num_heads 88 else: 89 assert ( 90 channels % num_head_channels == 0 91 ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" 92 self.num_heads = channels // num_head_channels 93 self.norm = normalization(channels) 94 self.qkv = conv_nd(1, channels, out_channels * 3, 1)
43 self.causal = causal 44 self.register_buffer("mask", None, persistent=False) 45 46 self.use_flash = use_flash 47 assert not ( 48 use_flash and version.parse(torch.__version__) < version.parse("2.0.0") 49 ), "in order to use flash attention, you must be using pytorch 2.0 or above" 50 51 # determine efficient attention configs for cuda and cpu 52 self.config = namedtuple("EfficientAttentionConfig", ["enable_flash", "enable_math", "enable_mem_efficient"])
180 181 if not self.cond: 182 return out 183 184 assert exists(cond) 185 gamma, beta = self.to_gamma_beta(cond).chunk(2, dim=-1) 186 gamma, beta = map(lambda t: rearrange(t, "b d -> b 1 d"), (gamma, beta)) 187 return out * gamma + beta
193 (kernel_size,) = self.kernel_size 194 (dilation,) = self.dilation 195 (stride,) = self.stride 196 197 assert stride == 1 198 self.causal_padding = dilation * (kernel_size - 1) 199 200 def forward(self, x):
530 try: 531 text = re.sub(_currency_re["GBP"], lambda m: _expand_currency(m, lang, "GBP"), text) 532 text = re.sub(_currency_re["USD"], lambda m: _expand_currency(m, lang, "USD"), text) 533 text = re.sub(_currency_re["EUR"], lambda m: _expand_currency(m, lang, "EUR"), text) 534 except: 535 pass 536 if lang != "tr": 537 text = re.sub(_decimal_number_re, lambda m: _expand_decimal_point(m, lang), text) 538 text = re.sub(_ordinal_re[lang], lambda m: _expand_ordinal(m, lang), text)
763 ("이것은 1 번째 테스트입니다", "이것은 첫 번째 테스트입니다", "ko"), 764 ] 765 for a, b, lang in test_cases: 766 out = expand_numbers_multilingual(a, lang=lang) 767 assert out == b, f"'{out}' vs '{b}'" 768 769 770 def test_abbreviations_multilingual():
806 ] 807 808 for a, b, lang in test_cases: 809 out = expand_abbreviations_multilingual(a, lang=lang) 810 assert out == b, f"'{out}' vs '{b}'" 811 812 813 def test_symbols_multilingual():
833 ] 834 835 for a, b, lang in test_cases: 836 out = expand_symbols_multilingual(a, lang=lang) 837 assert out == b, f"'{out}' vs '{b}'" 838 839 840 if __name__ == "__main__":
15 """Returns a dictionary of samples keyed by language.""" 16 samples_by_col = {} 17 for sample in samples: 18 col_val = sample[col] 19 assert isinstance(col_val, str) 20 if col_val not in samples_by_col: 21 samples_by_col[col_val] = [] 22 samples_by_col[col_val].append(sample)
28 # if eval uses a middle size sample when it is possible to be more reproducible 29 if is_eval: 30 sample_length = int((min_sample_length + max_sample_length) / 2) 31 else: 32 sample_length = random.randint(min_sample_length, max_sample_length) 33 gap = rel_clip.shape[-1] - sample_length 34 if gap < 0: 35 sample_length = rel_clip.shape[-1] // 2
38 # if eval start always from the position 0 to be more reproducible 39 if is_eval: 40 rand_start = 0 41 else: 42 rand_start = random.randint(0, gap) 43 44 rand_end = rand_start + sample_length 45 rel_clip = rel_clip[:, rand_start:rand_end]
61 self.sample_rate = sample_rate 62 self.max_wav_len = model_args.max_wav_length 63 self.max_text_len = model_args.max_text_length 64 self.use_masking_gt_prompt_approach = model_args.gpt_use_masking_gt_prompt_approach 65 assert self.max_wav_len is not None and self.max_text_len is not None 66 67 self.samples = samples 68 if not is_eval:
81 new_samples = [] 82 for sample in self.samples: 83 try: 84 tseq, _, wav, _, _, _ = self.load_item(sample) 85 except: 86 continue 87 # Basically, this audio file is nonexistent or too long to be supported by the dataset. 88 if ( 89 wav is None
97 98 def get_text(self, text, lang): 99 tokens = self.tokenizer.encode(text, lang) 100 tokens = torch.IntTensor(tokens) 101 assert not torch.any(tokens == 1), f"UNK token found in {text} -> {self.tokenizer.decode(tokens)}" 102 # The stop token should always be sacred. 103 assert not torch.any(tokens == 0), f"Stop token found in {text}" 104 return tokens
99 tokens = self.tokenizer.encode(text, lang) 100 tokens = torch.IntTensor(tokens) 101 assert not torch.any(tokens == 1), f"UNK token found in {text} -> {self.tokenizer.decode(tokens)}" 102 # The stop token should always be sacred. 103 assert not torch.any(tokens == 0), f"Stop token found in {text}" 104 return tokens 105 106 def load_item(self, sample):
140 sample = self.samples[index] 141 sample_id = str(index) 142 else: 143 # select a random language 144 lang = random.choice(list(self.samples.keys())) 145 # select random sample 146 index = random.randint(0, len(self.samples[lang]) - 1) 147 sample = self.samples[lang][index]
142 else: 143 # select a random language 144 lang = random.choice(list(self.samples.keys())) 145 # select random sample 146 index = random.randint(0, len(self.samples[lang]) - 1) 147 sample = self.samples[lang][index] 148 # a unique id for each sampel to deal with fails 149 sample_id = lang + "_" + str(index)
56 57 def callback_clearml_load_save(operation_type, model_info): 58 # return None means skip the file upload/log, returning model_info will continue with the log/upload 59 # you can also change the upload destination file name model_info.upload_filename or check the local file size with Path(model_info.local_model_path).stat().st_size 60 assert operation_type in ("load", "save") 61 # print(operation_type, model_info.__dict__) 62 63 if "similarities.pth" in model_info.__dict__["local_model_path"]:
475 config, 476 checkpoint_path, 477 eval=False, 478 strict=True, 479 cache_storage="/tmp/tts_cache", 480 target_protocol="s3", 481 target_options={"anon": True}, 482 ): # pylint: disable=unused-argument, disable=W0201, disable=W0102, redefined-builtin 483 """Load the model checkpoint and setup for training or inference""" 484 485 state = self.xtts.get_compatible_checkpoint_state_dict(checkpoint_path) 486 487 # load the model weights 488 self.xtts.load_state_dict(state, strict=strict) 489
489 490 if eval: 491 self.xtts.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache, use_deepspeed=False) 492 self.eval() 493 assert not self.training 494 495 @staticmethod 496 def init_from_config(config: "GPTTrainerConfig", samples: Union[List[List], List[Dict]] = None):
1168 ndone = 0 1169 with open(args.ifile, "r", encoding="utf8") as istream, open(args.ofile, "w+", encoding="utf8") as ostream: 1170 if args.format == "tsv": 1171 reader = csv.DictReader(istream, delimiter="\t") 1172 assert "TEXT" in reader.fieldnames 1173 print("\t".join(reader.fieldnames), file=ostream) 1174 1175 for item in reader:
402 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 403 self.load_state_dict(state["model"]) 404 if eval: 405 self.eval() 406 assert not self.training 407 408 def get_criterion(self): 409 from TTS.tts.layers.losses import AlignTTSLoss # pylint: disable=import-outside-toplevel
116 self.decoder.set_r(config.r) 117 if eval: 118 self.eval() 119 print(f" > Model's reduction rate `r` is set to: {self.decoder.r}") 120 assert not self.training 121 122 def get_criterion(self) -> nn.Module: 123 """Get the model criterion used in training."""
205 # by cutting off from the largest duration indeces. 206 extra_frames = dur.sum() - mel_lengths[idx] 207 largest_idxs = torch.argsort(-dur)[:extra_frames] 208 dur[largest_idxs] -= 1 209 assert ( 210 dur.sum() == mel_lengths[idx] 211 ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}" 212 durations[idx, : text_lengths[idx]] = dur 213 214 # set stop targets wrt reduction factor
95 return torch.ceil(lens / stride).int() 96 97 98 def initialize_embeddings(shape: Tuple[int]) -> torch.Tensor: 99 assert len(shape) == 2, "Can only initialize 2-D embedding matrices ..." 100 return torch.randn(shape) * np.sqrt(2 / shape[1]) 101 102
135 """ 136 x, sr = torchaudio.load( 137 file_path, 138 ) 139 assert (x > 1).sum() + (x < -1).sum() == 0 140 return x, sr 141 142
1546 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu")) 1547 self.load_state_dict(state["model"]) 1548 if eval: 1549 self.eval() 1550 assert not self.training 1551 1552 def get_state_dict(self): 1553 """Custom state dict of the model with all the necessary components for inference."""
833 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 834 self.load_state_dict(state["model"]) 835 if eval: 836 self.eval() 837 assert not self.training 838 839 def get_criterion(self): 840 from TTS.tts.layers.losses import ForwardTTSLoss # pylint: disable=import-outside-toplevel
121 self.embedded_speaker_dim = ( 122 config.d_vector_dim if "d_vector_dim" in config and config.d_vector_dim is not None else 512 123 ) 124 if self.speaker_manager is not None: 125 assert ( 126 config.d_vector_dim == self.speaker_manager.embedding_dim 127 ), " [!] d-vector dimension mismatch b/w config and speaker manager." 128 # init speaker embedding layer 129 if config.use_speaker_embedding and not config.use_d_vector_file: 130 print(" > Init speaker_embedding layer.")
526 self.load_state_dict(state["model"]) 527 if eval: 528 self.eval() 529 self.store_inverse() 530 assert not self.training 531 532 @staticmethod 533 def get_criterion():
257 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu")) 258 self.load_state_dict(state["model"]) 259 if eval: 260 self.eval() 261 assert not self.training 262 263 def on_init_start(self, trainer): 264 """If the current dataset does not have normalisation statistics and initialisation transition_probability it computes them otherwise loads."""
273 self.load_state_dict(state["model"]) 274 if eval: 275 self.eval() 276 self.decoder.store_inverse() 277 assert not self.training 278 279 def on_init_start(self, trainer): 280 """If the current dataset does not have normalisation statistics and initialisation transition_probability it computes them otherwise loads."""
79 gap = clip.shape[-1] - cond_length 80 if gap < 0: 81 clip = F.pad(clip, pad=(0, abs(gap))) 82 elif gap > 0: 83 rand_start = random.randint(0, gap) 84 clip = clip[:, rand_start : rand_start + cond_length] 85 mel_clip = TorchMelSpectrogram(**kwargs)(clip.unsqueeze(0)).squeeze(0) 86 return mel_clip.unsqueeze(0).to(device)
418 0 - latents will be generated as in original tortoise, using ~4.27s from each voice sample, averaging latent across all samples 419 1 - latents will be generated using (almost) entire voice samples, averaged across all the ~4.27s chunks 420 2 - latents will be generated using (almost) entire voice samples, averaged per voice sample 421 """ 422 assert latent_averaging_mode in [ 423 0, 424 1, 425 2, 426 ], "latent_averaging mode has to be one of (0, 1, 2)" 427 428 with torch.no_grad(): 429 voice_samples = [[v.to(self.device) for v in ls] for ls in voice_samples]
670 deterministic_seed = deterministic_state(seed=use_deterministic_seed) 671 672 text_tokens = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).to(self.device) 673 text_tokens = F.pad(text_tokens, (0, 1)) # This may not be necessary. 674 assert ( 675 text_tokens.shape[-1] < 400 676 ), "Too much text provided. Break the text up into separate segments and re-try inference." 677 678 if voice_samples is not None: 679 (
70 Return Shapes: 71 - x: :math:`[1, T]` 72 """ 73 x, sr = torchaudio.load(file_path) 74 assert (x > 1).sum() + (x < -1).sum() == 0 75 return x, sr 76 77
241 dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) 242 if multi_dict is not None: 243 # check if all keys are in the multi_dict 244 for k in multi_dict: 245 assert k in unique_attr_names, f"{k} not in {unique_attr_names}" 246 # scale weights 247 multiplier_samples = np.array([multi_dict.get(item[attr_name], 1.0) for item in items]) 248 dataset_samples_weight *= multiplier_samples
1138 x, x_mask, g=g if self.args.condition_dp_on_speaker else None, lang_emb=lang_emb 1139 ) 1140 w = torch.exp(logw) * x_mask * self.length_scale 1141 else: 1142 assert durations.shape[-1] == x.shape[-1] 1143 w = durations.unsqueeze(0) 1144 1145 w_ceil = torch.ceil(w)
1209 y_lengths (Tensor): Length of each reference spectrogram. Tensor of shape [B] 1210 speaker_cond_src (Tensor): Reference speaker ID. Tensor of shape [B,] 1211 speaker_cond_tgt (Tensor): Target speaker ID. Tensor of shape [B,] 1212 """ 1213 assert self.num_speakers > 0, "num_speakers have to be larger than 0." 1214 # speaker embedding 1215 if self.args.use_speaker_embedding and not self.args.use_d_vector_file: 1216 g_src = self.emb_g(torch.from_numpy((np.array(speaker_cond_src))).unsqueeze(0)).unsqueeze(-1)
1524 fmax=ac.mel_fmax, 1525 ) 1526 1527 if self.args.encoder_sample_rate: 1528 assert batch["spec"].shape[2] == int( 1529 batch["mel"].shape[2] / self.interpolate_factor 1530 ), f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}" 1531 else: 1532 assert batch["spec"].shape[2] == batch["mel"].shape[2], f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}" 1533
1528 assert batch["spec"].shape[2] == int( 1529 batch["mel"].shape[2] / self.interpolate_factor 1530 ), f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}" 1531 else: 1532 assert batch["spec"].shape[2] == batch["mel"].shape[2], f"{batch['spec'].shape[2]}, {batch['mel'].shape[2]}" 1533 1534 # compute spectrogram frame lengths 1535 batch["spec_lens"] = (batch["spec"].shape[2] * batch["waveform_rel_lens"]).int()
1535 batch["spec_lens"] = (batch["spec"].shape[2] * batch["waveform_rel_lens"]).int() 1536 batch["mel_lens"] = (batch["mel"].shape[2] * batch["waveform_rel_lens"]).int() 1537 1538 if self.args.encoder_sample_rate: 1539 assert (batch["spec_lens"] - (batch["mel_lens"] / self.interpolate_factor).int()).sum() == 0 1540 else: 1541 assert (batch["spec_lens"] - batch["mel_lens"]).sum() == 0 1542
1537 1538 if self.args.encoder_sample_rate: 1539 assert (batch["spec_lens"] - (batch["mel_lens"] / self.interpolate_factor).int()).sum() == 0 1540 else: 1541 assert (batch["spec_lens"] - batch["mel_lens"]).sum() == 0 1542 1543 # zero the padding frames 1544 batch["spec"] = batch["spec"] * sequence_mask(batch["spec_lens"]).unsqueeze(1)
1721 self.load_state_dict(state["model"], strict=strict) 1722 1723 if eval: 1724 self.eval() 1725 assert not self.training 1726 1727 def load_fairseq_checkpoint( 1728 self, config, checkpoint_dir, eval=False, strict=True
1765 new_chk = rehash_fairseq_vits_checkpoint(checkpoint_file) 1766 self.load_state_dict(new_chk, strict=strict) 1767 if eval: 1768 self.eval() 1769 assert not self.training 1770 1771 @staticmethod 1772 def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None, verbose=True):
1781 1782 upsample_rate = torch.prod(torch.as_tensor(config.model_args.upsample_rates_decoder)).item() 1783 1784 if not config.model_args.encoder_sample_rate: 1785 assert ( 1786 upsample_rate == config.audio.hop_length 1787 ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {config.audio.hop_length}" 1788 else: 1789 encoder_to_vocoder_upsampling_factor = config.audio.sample_rate / config.model_args.encoder_sample_rate 1790 effective_hop_length = config.audio.hop_length * encoder_to_vocoder_upsampling_factor
1787 ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {config.audio.hop_length}" 1788 else: 1789 encoder_to_vocoder_upsampling_factor = config.audio.sample_rate / config.model_args.encoder_sample_rate 1790 effective_hop_length = config.audio.hop_length * encoder_to_vocoder_upsampling_factor 1791 assert ( 1792 upsample_rate == effective_hop_length 1793 ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}" 1794 1795 ap = AudioProcessor.init_from_config(config, verbose=verbose) 1796 tokenizer, new_config = TTSTokenizer.init_from_config(config)
394 `text_input` as text token IDs after tokenizer, `voice_samples` as samples used for cloning, `conditioning_latents` 395 as latents used at inference. 396 397 """ 398 assert ( 399 "zh-cn" if language == "zh" else language in self.config.languages 400 ), f" ❗ Language {language} is not supported. Supported languages are {self.config.languages}" 401 # Use generally found best tuning knobs for generation. 402 settings = { 403 "temperature": config.temperature,
532 for sent in text: 533 sent = sent.strip().lower() 534 text_tokens = torch.IntTensor(self.tokenizer.encode(sent, lang=language)).unsqueeze(0).to(self.device) 535 536 assert ( 537 text_tokens.shape[-1] < self.args.gpt_max_text_tokens 538 ), " ❗ XTTS can only generate text with a maximum of 400 tokens." 539 540 with torch.no_grad(): 541 gpt_codes = self.gpt.generate(
640 for sent in text: 641 sent = sent.strip().lower() 642 text_tokens = torch.IntTensor(self.tokenizer.encode(sent, lang=language)).unsqueeze(0).to(self.device) 643 644 assert ( 645 text_tokens.shape[-1] < self.args.gpt_max_text_tokens 646 ), " ❗ XTTS can only generate text with a maximum of 400 tokens." 647 648 fake_inputs = self.gpt.compute_embeddings( 649 gpt_cond_latent.to(self.device),
5 6 7 def _pad_data(x, length): 8 _pad = 0 9 assert x.ndim == 1 10 return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=_pad) 11 12
16 17 18 def _pad_tensor(x, length): 19 _pad = 0.0 20 assert x.ndim == 2 21 x = np.pad(x, [[0, 0], [0, length - x.shape[1]]], mode="constant", constant_values=_pad) 22 return x 23
39 40 Returns: 41 np.ndarray: Padded stop target array. 42 """ 43 assert x.ndim == 1 44 return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=pad_val) 45 46
111 if let_short_samples: 112 _x_lenghts[len_diff < 0] = segment_size 113 len_diff = _x_lenghts - segment_size 114 else: 115 assert all( 116 len_diff > 0 117 ), f" [!] At least one sample is shorter than the segment size ({segment_size}). \n {_x_lenghts}" 118 segment_indices = (torch.rand([B]).type_as(x) * (len_diff + 1)).long() 119 ret = segment(x, segment_indices, segment_size, pad_short=pad_short) 120 return ret, segment_indices
86 Returns: 87 np.ndarray: embedding. 88 """ 89 if self.name_to_id: 90 return self.name_to_id[random.choices(list(self.name_to_id.keys()))[0]] 91 92 return None 93
288 embeddings = self.get_embeddings_by_name(idx) 289 if num_samples is None: 290 embeddings = np.stack(embeddings).mean(0) 291 else: 292 assert len(embeddings) >= num_samples, f" [!] {idx} has number of samples < {num_samples}" 293 if randomize: 294 embeddings = np.stack(random.choices(embeddings, k=num_samples)).mean(0) 295 else:
290 embeddings = np.stack(embeddings).mean(0) 291 else: 292 assert len(embeddings) >= num_samples, f" [!] {idx} has number of samples < {num_samples}" 293 if randomize: 294 embeddings = np.stack(random.choices(embeddings, k=num_samples)).mean(0) 295 else: 296 embeddings = np.stack(embeddings[:num_samples]).mean(0) 297 return embeddings
304 Returns: 305 np.ndarray: embedding. 306 """ 307 if self.embeddings: 308 return self.embeddings[random.choices(list(self.embeddings.keys()))[0]]["embedding"] 309 310 return None 311
179 speaker_manager.load_embeddings_from_file(speakers_file) 180 elif not c.use_d_vector_file: # restor speaker manager with speaker ID file. 181 speaker_ids_from_data = speaker_manager.name_to_id 182 speaker_manager.load_ids_from_file(speakers_file) 183 assert all( 184 speaker in speaker_manager.name_to_id for speaker in speaker_ids_from_data 185 ), " [!] You cannot introduce new speakers to a pre-trained model." 186 elif c.use_d_vector_file and c.d_vector_file: 187 # new speaker manager with external speaker embeddings. 188 speaker_manager.load_embeddings_from_file(c.d_vector_file)
43 44 x = tensors[0] 45 46 for t in tensors: 47 assert torch.is_tensor(t), f"Expected torch.Tensor, got {type(t)}" 48 assert t.device == x.device, f"Expected tensors to be on {x.device}, got {t.device}" 49 50 if size_range is None:
44 x = tensors[0] 45 46 for t in tensors: 47 assert torch.is_tensor(t), f"Expected torch.Tensor, got {type(t)}" 48 assert t.device == x.device, f"Expected tensors to be on {x.device}, got {t.device}" 49 50 if size_range is None: 51 assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}"
47 assert torch.is_tensor(t), f"Expected torch.Tensor, got {type(t)}" 48 assert t.device == x.device, f"Expected tensors to be on {x.device}, got {t.device}" 49 50 if size_range is None: 51 assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}" 52 else: 53 assert ( 54 t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]]
49 50 if size_range is None: 51 assert t.size() == x.size(), f"Expected tensors with same size, got {t.size()} and {x.size()}" 52 else: 53 assert ( 54 t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]] 55 ), f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}" 56 57 if dim_range[0] == dim_range[1]: 58 assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}"
54 t.size()[size_range[0] : size_range[1]] == x.size()[size_range[0] : size_range[1]] 55 ), f"Expected tensors with same size at given dimensions, got {t.size()} and {x.size()}" 56 57 if dim_range[0] == dim_range[1]: 58 assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}" 59 elif dim_range[0] < dim_range[1]: 60 assert ( 61 dim_range[0] <= t.dim() <= dim_range[1]
56 57 if dim_range[0] == dim_range[1]: 58 assert t.dim() == dim_range[0], f"Expected number of dimensions to be {dim_range[0]}, got {t.dim()}" 59 elif dim_range[0] < dim_range[1]: 60 assert ( 61 dim_range[0] <= t.dim() <= dim_range[1] 62 ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}" 63 64 if data_range[0] < data_range[1]: 65 assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t.min()}"
61 dim_range[0] <= t.dim() <= dim_range[1] 62 ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}" 63 64 if data_range[0] < data_range[1]: 65 assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t.min()}" 66 assert t.max() <= data_range[1], f"Expected values to be lower or equal to {data_range[1]}, got {t.max()}" 67 68
62 ), f"Expected number of dimensions to be between {dim_range[0]} and {dim_range[1]}, got {t.dim()}" 63 64 if data_range[0] < data_range[1]: 65 assert data_range[0] <= t.min(), f"Expected values to be greater or equal to {data_range[0]}, got {t.min()}" 66 assert t.max() <= data_range[1], f"Expected values to be lower or equal to {data_range[1]}, got {t.max()}" 67 68 69 def gaussian_filter(kernel_size: int, sigma: float) -> torch.Tensor:
124 IEEE Transactions on Image Processing, 13, 600-612. 125 https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf, 126 DOI: `10.1109/TIP.2003.819861` 127 """ 128 assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]" 129 _validate_input([x, y], dim_range=(4, 5), data_range=(0, data_range)) 130 131 x = x / float(data_range)
228 self.kernel_size = kernel_size 229 230 # This check might look redundant because kernel size is checked within the ssim function anyway. 231 # However, this check allows to fail fast when the loss is being initialised and training has not been started. 232 assert kernel_size % 2 == 1, f"Kernel size must be odd, got [{kernel_size}]" 233 self.kernel_sigma = kernel_sigma 234 self.k1 = k1 235 self.k2 = k2
185 186 language_name = None 187 if language_id is not None: 188 language = [k for k, v in model.language_manager.name_to_id.items() if v == language_id] 189 assert len(language) == 1, "language_id must be a valid language" 190 language_name = language[0] 191 192 # convert text to sequence of token IDs
290 _vocab = [self._pad] + _vocab if self._pad is not None and len(self._pad) > 0 else _vocab 291 self.vocab = _vocab + list(self._punctuations) 292 if self.is_unique: 293 duplicates = {x for x in self.vocab if self.vocab.count(x) > 1} 294 assert ( 295 len(self.vocab) == len(self._char_to_id) == len(self._id_to_char) 296 ), f" [!] There are duplicate characters in the character set. {duplicates}" 297 298 def char_to_id(self, char: str) -> int: 299 try:
1 import logging 2 import re 3 import subprocess 4 from typing import Dict, List 5 6 from packaging.version import Version 7 8 from TTS.tts.utils.text.phonemizers.base import BasePhonemizer
20 espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)") 21 22 23 def get_espeak_version(): 24 output = subprocess.getoutput("espeak --version") 25 match = espeak_version_pattern.search(output) 26 27 return match.group("version")
20 espeak_version_pattern = re.compile(r"text-to-speech:\s(?P<version>\d+\.\d+(\.\d+)?)") 21 22 23 def get_espeak_version(): 24 output = subprocess.getoutput("espeak --version") 25 match = espeak_version_pattern.search(output) 26 27 return match.group("version")
27 return match.group("version") 28 29 30 def get_espeakng_version(): 31 output = subprocess.getoutput("espeak-ng --version") 32 return output.split()[3] 33 34
27 return match.group("version") 28 29 30 def get_espeakng_version(): 31 output = subprocess.getoutput("espeak-ng --version") 32 return output.split()[3] 33 34
54 ] 55 cmd.extend(args) 56 logging.debug("espeakng: executing %s", repr(cmd)) 57 58 with subprocess.Popen( 59 cmd, 60 stdout=subprocess.PIPE, 61 stderr=subprocess.STDOUT, 62 ) as p: 63 res = iter(p.stdout.readline, b"") 64 if not sync: 65 p.stdout.close()
25 Returns: 26 np.ndarray: melspectrogram basis. 27 """ 28 if mel_fmax is not None: 29 assert mel_fmax <= sample_rate // 2 30 assert mel_fmax - mel_fmin > 0 31 return librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=mel_fmin, fmax=mel_fmax) 32
26 np.ndarray: melspectrogram basis. 27 """ 28 if mel_fmax is not None: 29 assert mel_fmax <= sample_rate // 2 30 assert mel_fmax - mel_fmin > 0 31 return librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=mel_fmin, fmax=mel_fmax) 32 33
39 Returns: 40 Tuple[int, int]: hop length and window length for STFT. 41 """ 42 factor = frame_length_ms / frame_shift_ms 43 assert (factor).is_integer(), " [!] frame_shift_ms should divide frame_length_ms" 44 win_length = int(frame_length_ms / 1000.0 * sample_rate) 45 hop_length = int(win_length / float(factor)) 46 return win_length, hop_length
68 69 Returns: 70 np.ndarray: Decibels spectrogram. 71 """ 72 assert (x < 0).sum() == 0, " [!] Input values must be non-negative." 73 return gain * _log(np.maximum(1e-8, x), base) 74 75
128 129 130 def mel_to_spec(*, mel: np.ndarray = None, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray: 131 """Convert a melspectrogram to full scale spectrogram.""" 132 assert (mel < 0).sum() == 0, " [!] Input values must be non-negative." 133 inv_mel_basis = np.linalg.pinv(mel_basis) 134 return np.maximum(1e-10, np.dot(inv_mel_basis, mel)) 135
276 >>> ap = AudioProcessor(**conf) 277 >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate] 278 >>> pitch = ap.compute_f0(wav) 279 """ 280 assert pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`." 281 assert pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`." 282 283 f0, voiced_mask, _ = pyin(
277 >>> wav = ap.load_wav(WAV_FILE, sr=ap.sample_rate)[:5 * ap.sample_rate] 278 >>> pitch = ap.compute_f0(wav) 279 """ 280 assert pitch_fmax is not None, " [!] Set `pitch_fmax` before caling `compute_f0`." 281 assert pitch_fmin is not None, " [!] Set `pitch_fmin` before caling `compute_f0`." 282 283 f0, voiced_mask, _ = pyin( 284 y=x.astype(np.double),
399 400 Returns: 401 np.ndarray: RMS normalized waveform. 402 """ 403 assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0" 404 wav = rms_norm(wav=x, db_level=db_level) 405 return wav 406
222 else: 223 # use stft parameters from config file 224 self.hop_length = hop_length 225 self.win_length = win_length 226 assert min_level_db != 0.0, " [!] min_level_db is 0" 227 assert ( 228 self.win_length <= self.fft_size 229 ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}"
223 # use stft parameters from config file 224 self.hop_length = hop_length 225 self.win_length = win_length 226 assert min_level_db != 0.0, " [!] min_level_db is 0" 227 assert ( 228 self.win_length <= self.fft_size 229 ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" 230 members = vars(self) 231 if verbose: 232 print(" > Setting up Audio Processor...")
357 for key in stats_config.keys(): 358 if key in skip_parameters: 359 continue 360 if key not in ["sample_rate", "trim_db"]: 361 assert ( 362 stats_config[key] == self.__dict__[key] 363 ), f" [!] Audio param {key} does not match the value used for computing mean-var stats. {stats_config[key]} vs {self.__dict__[key]}" 364 return mel_mean, mel_std, linear_mean, linear_std, stats_config 365 366 # pylint: disable=attribute-defined-outside-init
10 return rt 11 12 13 def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url): 14 assert torch.cuda.is_available(), "Distributed mode requires CUDA." 15 16 # Set cuda device so everything is done on the right GPU. 17 torch.cuda.set_device(rank % torch.cuda.device_count())
26 """ 27 28 # If we already have the whole file, there is no need to download it again 29 req = urllib.request.Request(url, method="HEAD") 30 with urllib.request.urlopen(req) as response: 31 url_size = int(response.info().get("Content-Length", -1)) 32 if url_size == start_byte: 33 return
35 req = urllib.request.Request(url) 36 if start_byte: 37 req.headers["Range"] = "bytes={}-".format(start_byte) 38 39 with urllib.request.urlopen(req) as upointer, tqdm( 40 unit="B", 41 unit_scale=True, 42 unit_divisor=1024,
75 resume (bool, optional): Enable resuming download (Default: ``False``). 76 """ 77 78 req = urllib.request.Request(url, method="HEAD") 79 req_info = urllib.request.urlopen(req).info() # pylint: disable=consider-using-with 80 81 # Detect filename 82 filename = filename or req_info.get_filename() or os.path.basename(url)
120 121 if hash_type == "sha256": 122 hash_func = hashlib.sha256() 123 elif hash_type == "md5": 124 hash_func = hashlib.md5() 125 else: 126 raise ValueError 127
3 import importlib 4 import logging 5 import os 6 import re 7 import subprocess 8 import sys 9 from pathlib import Path 10 from typing import Dict
30 31 32 def get_git_branch(): 33 try: 34 out = subprocess.check_output(["git", "branch"]).decode("utf8") 35 current = next(line for line in out.split("\n") if line.startswith("*")) 36 current.replace("* ", "") 37 except subprocess.CalledProcessError:
30 31 32 def get_git_branch(): 33 try: 34 out = subprocess.check_output(["git", "branch"]).decode("utf8") 35 current = next(line for line in out.split("\n") if line.startswith("*")) 36 current.replace("* ", "") 37 except subprocess.CalledProcessError:
49 # except: 50 # raise RuntimeError( 51 # " !! Commit before training to get the commit hash.") 52 try: 53 commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip() 54 # Not copying .git folder into docker container 55 except (subprocess.CalledProcessError, FileNotFoundError): 56 commit = "0000000"
49 # except: 50 # raise RuntimeError( 51 # " !! Commit before training to get the commit hash.") 52 try: 53 commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip() 54 # Not copying .git folder into docker container 55 except (subprocess.CalledProcessError, FileNotFoundError): 56 commit = "0000000"
1 import os 2 import pickle as pickle_tts 3 from typing import Any, Callable, Dict, Union 4 5 import fsspec 6 import torch 7 8 from TTS.utils.generic_utils import get_user_data_dir
402 # ToDo: we need a better way to handle it 403 if "xtts" in model_name: 404 try: 405 self.check_if_configs_are_equal(model_name, model_item, output_path) 406 except: 407 pass 408 else: 409 print(f" > {model_name} is already downloaded.") 410 else:
527 @staticmethod 528 def _download_zip_file(file_url, output_folder, progress_bar): 529 """Download the github releases""" 530 # download the file 531 r = requests.get(file_url, stream=True) 532 # extract the file 533 try: 534 total_size_in_bytes = int(r.headers.get("content-length", 0))
541 if progress_bar: 542 ModelManager.tqdm_progress.update(len(data)) 543 file.write(data) 544 with zipfile.ZipFile(temp_zip_name) as z: 545 z.extractall(output_folder) 546 os.remove(temp_zip_name) # delete zip after extract 547 except zipfile.BadZipFile: 548 print(f" > Error: Bad zip file - {file_url}")
562 @staticmethod 563 def _download_tar_file(file_url, output_folder, progress_bar): 564 """Download the github releases""" 565 # download the file 566 r = requests.get(file_url, stream=True) 567 # extract the file 568 try: 569 total_size_in_bytes = int(r.headers.get("content-length", 0))
576 if progress_bar: 577 ModelManager.tqdm_progress.update(len(data)) 578 file.write(data) 579 with tarfile.open(temp_tar_name) as t: 580 t.extractall(output_folder) 581 tar_names = t.getnames() 582 os.remove(temp_tar_name) # delete tar after extract 583 except tarfile.ReadError:
596 def _download_model_files(file_urls, output_folder, progress_bar): 597 """Download the github releases""" 598 for file_url in file_urls: 599 # download the file 600 r = requests.get(file_url, stream=True) 601 # extract the file 602 bease_filename = file_url.split("/")[-1] 603 temp_zip_name = os.path.join(output_folder, bease_filename)
48 drop_last=False, 49 label_key="class_name", 50 ): 51 super().__init__(dataset_items) 52 assert ( 53 batch_size % (num_classes_in_batch * num_gpus) == 0 54 ), "Batch size must be divisible by number of classes times the number of data parallel devices (if enabled)." 55 56 label_indices = {} 57 for idx, item in enumerate(dataset_items):
86 self.seg = self._get_segmenter("en") 87 self.use_cuda = use_cuda 88 self.voice_dir = voice_dir 89 if self.use_cuda: 90 assert torch.cuda.is_available(), "CUDA is not availabe on this machine." 91 92 if tts_checkpoint: 93 self._load_tts(tts_checkpoint, tts_config_path, use_cuda)
193 # by cutting off from the largest duration indeces. 194 extra_frames = dur.sum() - mel_lengths[idx] 195 largest_idxs = torch.argsort(-dur)[:extra_frames] 196 dur[largest_idxs] -= 1 197 assert ( 198 dur.sum() == mel_lengths[idx] 199 ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}" 200 durations[idx, : text_lengths[idx]] = dur 201 202 # set stop targets wrt reduction factor
34 self.out_channels = out_channels 35 self.kernel_size = kernel_size 36 self.n_layers = n_layers 37 self.p_dropout = p_dropout 38 assert n_layers > 1, "Number of layers should be larger than 0." 39 40 self.conv_layers = nn.ModuleList() 41 self.norm_layers = nn.ModuleList()
103 104 class WN(torch.nn.Module): 105 def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0): 106 super(WN, self).__init__() 107 assert kernel_size % 2 == 1 108 self.hidden_channels = hidden_channels 109 self.kernel_size = (kernel_size,) 110 self.dilation_rate = dilation_rate
346 p_dropout=0, 347 gin_channels=0, 348 mean_only=False, 349 ): 350 assert channels % 2 == 0, "channels should be divisible by 2" 351 super().__init__() 352 self.channels = channels 353 self.hidden_channels = hidden_channels
85 :return: the waveform slices and mel spectrogram slices as lists of array slices. Index 86 respectively the waveform and the mel spectrogram with these slices to obtain the partial 87 utterances. 88 """ 89 assert 0 < min_coverage <= 1 90 91 # Compute how many frames separate two partial utterances 92 samples_per_frame = int((sampling_rate * mel_window_step / 1000))
91 # Compute how many frames separate two partial utterances 92 samples_per_frame = int((sampling_rate * mel_window_step / 1000)) 93 n_frames = int(np.ceil((n_samples + 1) / samples_per_frame)) 94 frame_step = int(np.round((sampling_rate / rate) / samples_per_frame)) 95 assert 0 < frame_step, "The rate is too high" 96 assert frame_step <= partials_n_frames, "The rate is too low, it should be %f at least" % ( 97 sampling_rate / (samples_per_frame * partials_n_frames) 98 )
92 samples_per_frame = int((sampling_rate * mel_window_step / 1000)) 93 n_frames = int(np.ceil((n_samples + 1) / samples_per_frame)) 94 frame_step = int(np.round((sampling_rate / rate) / samples_per_frame)) 95 assert 0 < frame_step, "The rate is too high" 96 assert frame_step <= partials_n_frames, "The rate is too low, it should be %f at least" % ( 97 sampling_rate / (samples_per_frame * partials_n_frames) 98 ) 99 100 # Compute the slices 101 wav_slices, mel_slices = [], []
20 21 output_path = os.path.join(output_path, "WavLM-Large.pt") 22 if not os.path.exists(output_path): 23 print(f" > Downloading WavLM model to {output_path} ...") 24 urllib.request.urlretrieve(model_uri, output_path) 25 26 checkpoint = torch.load(output_path, map_location=torch.device(device)) 27 cfg = WavLMConfig(checkpoint["cfg"])
218 if p <= 0: 219 return module 220 221 # supported modules 222 assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d)) 223 224 # test whether module.weight has the right sizes wrt block_size 225 is_conv = module.weight.ndim == 4
225 is_conv = module.weight.ndim == 4 226 227 # 2D matrix 228 if not is_conv: 229 assert module.weight.size(1) % block_size == 0, "Input features must be a multiple of block sizes" 230 231 # 4D matrix 232 else:
231 # 4D matrix 232 else: 233 # 1x1 convolutions 234 if module.kernel_size == (1, 1): 235 assert module.in_channels % block_size == 0, "Input channels must be a multiple of block sizes" 236 # regular convolutions 237 else: 238 k = module.kernel_size[0] * module.kernel_size[1]
235 assert module.in_channels % block_size == 0, "Input channels must be a multiple of block sizes" 236 # regular convolutions 237 else: 238 k = module.kernel_size[0] * module.kernel_size[1] 239 assert k % block_size == 0, "Kernel size must be a multiple of block size" 240 241 def _forward_pre_hook(mod, input): 242 # no noise for evaluation
323 324 self.head_dim = embed_dim // num_heads 325 self.q_head_dim = self.head_dim 326 self.k_head_dim = self.head_dim 327 assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads" 328 self.scaling = self.head_dim**-0.5 329 330 self.self_attention = self_attention
329 330 self.self_attention = self_attention 331 self.encoder_decoder_attention = encoder_decoder_attention 332 333 assert not self.self_attention or self.qkv_same_dim, ( 334 "Self-attention requires query, key and " "value to be of the same size" 335 ) 336 337 k_bias = True 338 if rescale_init:
458 is_tpu = query.device.type == "xla" 459 460 tgt_len, bsz, embed_dim = query.size() 461 src_len = tgt_len 462 assert embed_dim == self.embed_dim 463 assert list(query.size()) == [tgt_len, bsz, embed_dim] 464 if key is not None: 465 src_len, key_bsz, _ = key.size()
459 460 tgt_len, bsz, embed_dim = query.size() 461 src_len = tgt_len 462 assert embed_dim == self.embed_dim 463 assert list(query.size()) == [tgt_len, bsz, embed_dim] 464 if key is not None: 465 src_len, key_bsz, _ = key.size() 466 if not torch.jit.is_scripting():
463 assert list(query.size()) == [tgt_len, bsz, embed_dim] 464 if key is not None: 465 src_len, key_bsz, _ = key.size() 466 if not torch.jit.is_scripting(): 467 assert key_bsz == bsz 468 assert value is not None 469 assert src_len, bsz == value.shape[:2] 470
464 if key is not None: 465 src_len, key_bsz, _ = key.size() 466 if not torch.jit.is_scripting(): 467 assert key_bsz == bsz 468 assert value is not None 469 assert src_len, bsz == value.shape[:2] 470 471 if self.has_relative_attention_bias and position_bias is None:
465 src_len, key_bsz, _ = key.size() 466 if not torch.jit.is_scripting(): 467 assert key_bsz == bsz 468 assert value is not None 469 assert src_len, bsz == value.shape[:2] 470 471 if self.has_relative_attention_bias and position_bias is None: 472 position_bias = self.compute_bias(tgt_len, src_len)
480 # treats bias in linear module as method. 481 and not torch.jit.is_scripting() 482 and self.q_head_dim == self.head_dim 483 ): 484 assert key is not None and value is not None 485 assert attn_mask is None 486 487 attn_mask_rel_pos = None
481 and not torch.jit.is_scripting() 482 and self.q_head_dim == self.head_dim 483 ): 484 assert key is not None and value is not None 485 assert attn_mask is None 486 487 attn_mask_rel_pos = None 488 if position_bias is not None:
536 if saved_state is not None and "prev_key" in saved_state: 537 # previous time steps are cached - no need to recompute 538 # key and value if they are static 539 if static_kv: 540 assert self.encoder_decoder_attention and not self.self_attention 541 key = value = None 542 else: 543 saved_state = None
549 elif self.encoder_decoder_attention: 550 # encoder-decoder attention 551 q = self.q_proj(query) 552 if key is None: 553 assert value is None 554 k = v = None 555 else: 556 k = self.k_proj(key)
556 k = self.k_proj(key) 557 v = self.v_proj(key) 558 559 else: 560 assert key is not None and value is not None 561 q = self.q_proj(query) 562 k = self.k_proj(key) 563 v = self.v_proj(value)
563 v = self.v_proj(value) 564 q *= self.scaling 565 566 if self.bias_k is not None: 567 assert self.bias_v is not None 568 k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) 569 v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) 570 if attn_mask is not None:
587 if saved_state is not None: 588 # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) 589 if "prev_key" in saved_state: 590 _prev_key = saved_state["prev_key"] 591 assert _prev_key is not None 592 prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) 593 if static_kv: 594 k = prev_key
592 prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) 593 if static_kv: 594 k = prev_key 595 else: 596 assert k is not None 597 k = torch.cat([prev_key, k], dim=1) 598 src_len = k.size(1) 599 if "prev_value" in saved_state:
597 k = torch.cat([prev_key, k], dim=1) 598 src_len = k.size(1) 599 if "prev_value" in saved_state: 600 _prev_value = saved_state["prev_value"] 601 assert _prev_value is not None 602 prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) 603 if static_kv: 604 v = prev_value
602 prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) 603 if static_kv: 604 v = prev_value 605 else: 606 assert v is not None 607 v = torch.cat([prev_value, v], dim=1) 608 prev_key_padding_mask: Optional[Tensor] = None 609 if "prev_key_padding_mask" in saved_state:
607 v = torch.cat([prev_value, v], dim=1) 608 prev_key_padding_mask: Optional[Tensor] = None 609 if "prev_key_padding_mask" in saved_state: 610 prev_key_padding_mask = saved_state["prev_key_padding_mask"] 611 assert k is not None and v is not None 612 key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( 613 key_padding_mask=key_padding_mask, 614 prev_key_padding_mask=prev_key_padding_mask,
620 saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) 621 saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) 622 saved_state["prev_key_padding_mask"] = key_padding_mask 623 # In this branch incremental_state is never None 624 assert incremental_state is not None 625 incremental_state = self._set_input_buffer(incremental_state, saved_state) 626 assert k is not None 627 assert k.size(1) == src_len
622 saved_state["prev_key_padding_mask"] = key_padding_mask 623 # In this branch incremental_state is never None 624 assert incremental_state is not None 625 incremental_state = self._set_input_buffer(incremental_state, saved_state) 626 assert k is not None 627 assert k.size(1) == src_len 628 629 # This is part of a workaround to get around fork/join parallelism
623 # In this branch incremental_state is never None 624 assert incremental_state is not None 625 incremental_state = self._set_input_buffer(incremental_state, saved_state) 626 assert k is not None 627 assert k.size(1) == src_len 628 629 # This is part of a workaround to get around fork/join parallelism 630 # not supporting Optional types.
631 if key_padding_mask is not None and key_padding_mask.dim() == 0: 632 key_padding_mask = None 633 634 if key_padding_mask is not None: 635 assert key_padding_mask.size(0) == bsz 636 assert key_padding_mask.size(1) == src_len 637 638 if self.add_zero_attn:
632 key_padding_mask = None 633 634 if key_padding_mask is not None: 635 assert key_padding_mask.size(0) == bsz 636 assert key_padding_mask.size(1) == src_len 637 638 if self.add_zero_attn: 639 assert v is not None
635 assert key_padding_mask.size(0) == bsz 636 assert key_padding_mask.size(1) == src_len 637 638 if self.add_zero_attn: 639 assert v is not None 640 src_len += 1 641 k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) 642 v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
653 654 attn_weights = torch.bmm(q, k.transpose(1, 2)) 655 attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) 656 657 assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] 658 659 if attn_mask is not None: 660 attn_mask = attn_mask.unsqueeze(0)
694 attn_weights_float = F.softmax(attn_weights, dim=-1) 695 attn_weights = attn_weights_float.type_as(attn_weights) 696 attn_probs = self.dropout_module(attn_weights) 697 698 assert v is not None 699 attn = torch.bmm(attn_probs, v) 700 assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] 701 attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
696 attn_probs = self.dropout_module(attn_weights) 697 698 assert v is not None 699 attn = torch.bmm(attn_probs, v) 700 assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] 701 attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) 702 attn = self.out_proj(attn) 703 attn_weights: Optional[Tensor] = None
223 super().__init__() 224 logger.info(f"WavLM Config: {cfg.__dict__}") 225 226 self.cfg = cfg 227 feature_enc_layers = eval(cfg.conv_feature_layers) 228 self.embed = feature_enc_layers[-1][0] 229 230 self.feature_extractor = ConvFeatureExtractionModel(
370 conv_type: str = "default", 371 ): 372 super().__init__() 373 374 assert mode in {"default", "layer_norm"} 375 376 def block( 377 n_in,
386 conv = nn.Conv1d(n_in, n_out, k, stride=stride, bias=conv_bias) 387 nn.init.kaiming_normal_(conv.weight) 388 return conv 389 390 assert (is_layer_norm and is_group_norm) == False, "layer norm and group norm are exclusive" 391 392 if is_layer_norm: 393 return nn.Sequential(
414 if self.conv_type == "default": 415 in_d = 1 416 self.conv_layers = nn.ModuleList() 417 for i, cl in enumerate(conv_layers): 418 assert len(cl) == 3, "invalid conv definition: " + str(cl) 419 (dim, k, stride) = cl 420 421 self.conv_layers.append(
433 elif self.conv_type == "conv2d": 434 in_d = 1 435 self.conv_layers = nn.ModuleList() 436 for i, cl in enumerate(conv_layers): 437 assert len(cl) == 3 438 (dim, k, stride) = cl 439 440 self.conv_layers.append(torch.nn.Conv2d(in_d, dim, k, stride))
444 in_d = 1 445 idim = 80 446 self.conv_layers = nn.ModuleList() 447 for i, cl in enumerate(conv_layers): 448 assert len(cl) == 3 449 (dim, k, stride) = cl 450 self.conv_layers.append(torch.nn.Conv2d(in_d, dim, k, stride, padding=1)) 451 self.conv_layers.append(torch.nn.LayerNorm([dim, idim]))
44 self.use_cache = use_cache 45 self.use_noise_augment = use_noise_augment 46 self.verbose = verbose 47 48 assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." 49 self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) 50 51 # map G and D instances
130 131 # correct the audio length wrt padding applied in stft 132 audio = np.pad(audio, (0, self.hop_len), mode="edge") 133 audio = audio[: mel.shape[-1] * self.hop_len] 134 assert ( 135 mel.shape[-1] * self.hop_len == audio.shape[-1] 136 ), f" [!] {mel.shape[-1] * self.hop_len} vs {audio.shape[-1]}" 137 138 audio = torch.from_numpy(audio).float().unsqueeze(0) 139 mel = torch.from_numpy(mel).float().squeeze(0)
139 mel = torch.from_numpy(mel).float().squeeze(0) 140 141 if self.return_segments: 142 max_mel_start = mel.shape[1] - self.feat_frame_len 143 mel_start = random.randint(0, max_mel_start) 144 mel_end = mel_start + self.feat_frame_len 145 mel = mel[:, mel_start:mel_end] 146
49 50 51 def load_wav_data(data_path, eval_split_size, file_ext="wav"): 52 wav_paths = find_wav_files(data_path, file_ext=file_ext) 53 assert len(wav_paths) > 0, f" [!] {data_path} is empty." 54 np.random.seed(0) 55 np.random.shuffle(wav_paths) 56 return wav_paths[:eval_split_size], wav_paths[eval_split_size:]
62 63 wav_paths.sort(key=lambda x: Path(x).stem) 64 feat_paths.sort(key=lambda x: Path(x).stem) 65 66 assert len(wav_paths) == len(feat_paths), f" [!] {len(wav_paths)} vs {feat_paths}" 67 for wav, feat in zip(wav_paths, feat_paths): 68 wav_name = Path(wav).stem 69 feat_name = Path(feat).stem
66 assert len(wav_paths) == len(feat_paths), f" [!] {len(wav_paths)} vs {feat_paths}" 67 for wav, feat in zip(wav_paths, feat_paths): 68 wav_name = Path(wav).stem 69 feat_name = Path(feat).stem 70 assert wav_name == feat_name 71 72 items = list(zip(wav_paths, feat_paths)) 73 np.random.seed(0)
43 self.use_noise_augment = use_noise_augment 44 self.verbose = verbose 45 46 if return_segments: 47 assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." 48 self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) 49 50 # cache acoustic features
104 if audio.shape[-1] < self.seq_len + self.pad_short: 105 audio = np.pad( 106 audio, (0, self.seq_len + self.pad_short - len(audio)), mode="constant", constant_values=0.0 107 ) 108 assert ( 109 audio.shape[-1] >= self.seq_len + self.pad_short 110 ), f"{audio.shape[-1]} vs {self.seq_len + self.pad_short}" 111 112 # correct the audio length wrt hop length 113 p = (audio.shape[-1] // self.hop_len + 1) * self.hop_len - audio.shape[-1]
117 self.cache[idx] = audio 118 119 if self.return_segments: 120 max_start = len(audio) - self.seq_len 121 start = random.randint(0, max_start) 122 end = start + self.seq_len 123 audio = audio[start:end] 124
27 self.is_training = is_training 28 self.verbose = verbose 29 self.return_segments = return_segments 30 31 assert self.seq_len % self.hop_len == 0 32 33 def __len__(self): 34 return len(self.item_list)
225 """ 226 227 def __init__(self, C): 228 super().__init__() 229 assert not ( 230 C.use_mse_gan_loss and C.use_hinge_gan_loss 231 ), " [!] Cannot use HingeGANLoss and MSEGANLoss together." 232 233 self.use_stft_loss = C.use_stft_loss if "use_stft_loss" in C else False 234 self.use_subband_stft_loss = C.use_subband_stft_loss if "use_subband_stft_loss" in C else False
254 self.hinge_loss = HingeGLoss() 255 if C.use_feat_match_loss: 256 self.feat_match_loss = MelganFeatureLoss() 257 if C.use_l1_spec_loss: 258 assert C.audio["sample_rate"] == C.l1_spec_loss_params["sample_rate"] 259 self.l1_spec_loss = L1SpecLoss(**C.l1_spec_loss_params) 260 261 def forward(
312 """Like ```GeneratorLoss```""" 313 314 def __init__(self, C): 315 super().__init__() 316 assert not ( 317 C.use_mse_gan_loss and C.use_hinge_gan_loss 318 ), " [!] Cannot use HingeGANLoss and MSEGANLoss together." 319 320 self.use_mse_gan_loss = C.use_mse_gan_loss 321 self.use_hinge_gan_loss = C.use_hinge_gan_loss
174 """ 175 batch, _, in_length = x.shape 176 batch, _, out_channels, kernel_size, kernel_length = kernel.shape 177 178 assert in_length == ( 179 kernel_length * hop_size 180 ), f"length of (x, kernel) is not matched, {in_length} vs {kernel_length * hop_size}" 181 182 padding = dilation * int((kernel_size - 1) / 2) 183 x = F.pad(x, (padding, padding), "constant", 0) # (batch, in_channels, in_length + 2*padding)
6 class ResidualStack(nn.Module): 7 def __init__(self, channels, num_res_blocks, kernel_size): 8 super().__init__() 9 10 assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd." 11 base_padding = (kernel_size - 1) // 2 12 13 self.blocks = nn.ModuleList()
22 # no future time stamps available 23 if use_causal_conv: 24 padding = (kernel_size - 1) * dilation 25 else: 26 assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." 27 padding = (kernel_size - 1) // 2 * dilation 28 self.use_causal_conv = use_causal_conv 29
60 xa, xb = x.split(x.size(splitdim) // 2, dim=splitdim) 61 62 # local conditioning 63 if c is not None: 64 assert self.conv1x1_aux is not None 65 c = self.conv1x1_aux(c) 66 ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim) 67 xa, xb = xa + ca, xb + cb
36 stretch = Stretch2d(scale, 1, interpolate_mode) 37 self.up_layers += [stretch] 38 39 # conv layer 40 assert (freq_axis_kernel_size - 1) % 2 == 0, "Not support even number freq axis kernel size." 41 freq_axis_padding = (freq_axis_kernel_size - 1) // 2 42 kernel_size = (freq_axis_kernel_size, scale * 2 + 1) 43 if use_causal_conv:
73 74 class UBlock(nn.Module): 75 def __init__(self, input_size, hidden_size, factor, dilation): 76 super().__init__() 77 assert isinstance(dilation, (list, tuple)) 78 assert len(dilation) == 4 79 80 self.factor = factor
74 class UBlock(nn.Module): 75 def __init__(self, input_size, hidden_size, factor, dilation): 76 super().__init__() 77 assert isinstance(dilation, (list, tuple)) 78 assert len(dilation) == 4 79 80 self.factor = factor 81 self.res_block = Conv1d(input_size, hidden_size, 1)
296 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 297 self.load_state_dict(state["model"]) 298 if eval: 299 self.eval() 300 assert not self.training 301 self.remove_weight_norm()
19 ): 20 super().__init__() 21 22 # assert model parameters 23 assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number." 24 25 # setup additional model parameters 26 base_padding = (proj_kernel - 1) // 2
90 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 91 self.load_state_dict(state["model"]) 92 if eval: 93 self.eval() 94 assert not self.training 95 self.remove_weight_norm()
27 nonlinear_activation_params={"negative_slope": 0.2}, 28 bias=True, 29 ): 30 super().__init__() 31 assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size." 32 assert dilation_factor > 0, " [!] dilation factor must be > 0." 33 self.conv_layers = nn.ModuleList() 34 conv_in_channels = in_channels
28 bias=True, 29 ): 30 super().__init__() 31 assert (kernel_size - 1) % 2 == 0, " [!] does not support even number kernel size." 32 assert dilation_factor > 0, " [!] dilation factor must be > 0." 33 self.conv_layers = nn.ModuleList() 34 conv_in_channels = in_channels 35 for i in range(num_layers - 1):
101 nonlinear_activation="LeakyReLU", 102 nonlinear_activation_params={"negative_slope": 0.2}, 103 ): 104 super().__init__() 105 assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." 106 107 self.in_channels = in_channels 108 self.out_channels = out_channels
111 self.kernel_size = kernel_size 112 self.res_factor = math.sqrt(1.0 / num_layers) 113 114 # check the number of num_layers and stacks 115 assert num_layers % stacks == 0 116 layers_per_stack = num_layers // stacks 117 118 # define first convolution
46 self.inference_padding = inference_padding 47 self.use_weight_norm = use_weight_norm 48 49 # check the number of layers and stacks 50 assert num_res_blocks % stacks == 0 51 layers_per_stack = num_res_blocks // stacks 52 53 # define first convolution
97 98 # perform upsampling 99 if c is not None and self.upsample_net is not None: 100 c = self.upsample_net(c) 101 assert ( 102 c.shape[-1] == x.shape[-1] 103 ), f" [!] Upsampling scale does not match the expected output. {c.shape} vs {x.shape}" 104 105 # encode to hidden representation 106 x = self.first_conv(x)
142 self.apply(_apply_weight_norm) 143 144 @staticmethod 145 def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): 146 assert layers % stacks == 0 147 layers_per_cycle = layers // stacks 148 dilations = [dilation(i % layers_per_cycle) for i in range(layers)] 149 return (kernel_size - 1) * sum(dilations) + 1
158 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 159 self.load_state_dict(state["model"]) 160 if eval: 161 self.eval() 162 assert not self.training 163 if self.use_weight_norm: 164 self.remove_weight_norm()
63 class ConditionalDiscriminator(nn.Module): 64 def __init__(self, in_channels, cond_channels, downsample_factors=(2, 2, 2), out_channels=(128, 256)): 65 super().__init__() 66 67 assert len(downsample_factors) == len(out_channels) + 1 68 69 self.in_channels = in_channels 70 self.cond_channels = cond_channels
154 self.base_window_size = self.hop_length * 2 155 self.ks = [ws // self.base_window_size for ws in window_sizes] 156 157 # check arguments 158 assert len(cond_disc_downsample_factors) == len(cond_disc_out_channels) == len(window_sizes) 159 for ws in window_sizes: 160 assert ws % hop_length == 0 161
156 157 # check arguments 158 assert len(cond_disc_downsample_factors) == len(cond_disc_out_channels) == len(window_sizes) 159 for ws in window_sizes: 160 assert ws % hop_length == 0 161 162 for idx, cf in enumerate(cond_disc_downsample_factors): 163 assert np.prod(cf) == hop_length // self.ks[idx]
159 for ws in window_sizes: 160 assert ws % hop_length == 0 161 162 for idx, cf in enumerate(cond_disc_downsample_factors): 163 assert np.prod(cf) == hop_length // self.ks[idx] 164 165 # define layers 166 self.unconditional_discriminators = nn.ModuleList([])
131 self.apply(_apply_weight_norm) 132 133 @staticmethod 134 def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): 135 assert layers % stacks == 0 136 layers_per_cycle = layers // stacks 137 dilations = [dilation(i % layers_per_cycle) for i in range(layers)] 138 return (kernel_size - 1) * sum(dilations) + 1
224 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 225 self.load_state_dict(state["model"]) 226 if eval: 227 self.eval() 228 assert not self.training 229 if self.config.model_params.use_weight_norm: 230 self.remove_weight_norm() 231 betas = np.linspace(
237 self.ap = AudioProcessor(**config.audio.to_dict()) 238 self.aux_dims = self.args.res_out_dims // 4 239 240 if self.args.use_upsample_net: 241 assert ( 242 np.cumproduct(self.args.upsample_factors)[-1] == config.audio.hop_length 243 ), " [!] upsample scales needs to be equal to hop_length" 244 self.upsample = UpsampleNetwork( 245 self.args.feat_dims, 246 self.args.upsample_factors,
546 state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) 547 self.load_state_dict(state["model"]) 548 if eval: 549 self.eval() 550 assert not self.training 551 552 def train_step(self, batch: Dict, criterion: Dict) -> Tuple[Dict, Dict]: 553 mels = batch["input"]
6 from torch.distributions.normal import Normal 7 8 9 def gaussian_loss(y_hat, y, log_std_min=-7.0): 10 assert y_hat.dim() == 3 11 assert y_hat.size(2) == 2 12 mean = y_hat[:, :, :1] 13 log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min)
7 8 9 def gaussian_loss(y_hat, y, log_std_min=-7.0): 10 assert y_hat.dim() == 3 11 assert y_hat.size(2) == 2 12 mean = y_hat[:, :, :1] 13 log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) 14 # TODO: replace with pytorch dist
16 return log_probs.squeeze().mean() 17 18 19 def sample_from_gaussian(y_hat, log_std_min=-7.0, scale_factor=1.0): 20 assert y_hat.size(2) == 2 21 mean = y_hat[:, :, :1] 22 log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) 23 dist = Normal(
43 def discretized_mix_logistic_loss(y_hat, y, num_classes=65536, log_scale_min=None, reduce=True): 44 if log_scale_min is None: 45 log_scale_min = float(np.log(1e-14)) 46 y_hat = y_hat.permute(0, 2, 1) 47 assert y_hat.dim() == 3 48 assert y_hat.size(1) % 3 == 0 49 nr_mix = y_hat.size(1) // 3 50
44 if log_scale_min is None: 45 log_scale_min = float(np.log(1e-14)) 46 y_hat = y_hat.permute(0, 2, 1) 47 assert y_hat.dim() == 3 48 assert y_hat.size(1) % 3 == 0 49 nr_mix = y_hat.size(1) // 3 50 51 # (B x T x C)
119 Tensor: sample in range of [-1, 1]. 120 """ 121 if log_scale_min is None: 122 log_scale_min = float(np.log(1e-14)) 123 assert y.size(1) % 3 == 0 124 nr_mix = y.size(1) // 3 125 126 # B x T x C
4 import time 5 import folder_paths 6 from scipy.io import wavfile 7 from scipy.io.wavfile import write 8 import subprocess 9 import sounddevice 10 import numpy as np 11 import torchaudio
1 import os 2 import subprocess 3 4 class DeepFuze: 5 def __init__(self): 6 self.video_path = "" 7 self.audio_path = "" 8 self.output_path = ""
56 if device=="cuda": 57 command.extend(['--execution-providers',"cuda"]) 58 elif device=="mps": 59 command.extend(['--execution-providers',"coreml"]) 60 result = subprocess.run(command,stdout=subprocess.PIPE) 61 print("Output:", result.stdout) 62 print("Errors:", result.stderr) 63 print(f"Lipsynced video saved at {output_path}")
1 import os 2 import subprocess 3 import ssl 4 import urllib.request 5 from typing import List 6 from functools import lru_cache 7 from tqdm import tqdm 8
22 initial_size = get_file_size(download_file_path) 23 download_size = get_download_size(url) 24 if initial_size < download_size: 25 with tqdm(total = download_size, initial = initial_size, desc = wording.get('downloading'), unit = 'B', unit_scale = True, unit_divisor = 1024, ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: 26 subprocess.Popen([ 'curl', '--create-dirs', '--silent', '--insecure', '--location', '--continue-at', '-', '--output', download_file_path, url ]) 27 current_size = initial_size 28 while current_size < download_size: 29 if is_file(download_file_path):
22 initial_size = get_file_size(download_file_path) 23 download_size = get_download_size(url) 24 if initial_size < download_size: 25 with tqdm(total = download_size, initial = initial_size, desc = wording.get('downloading'), unit = 'B', unit_scale = True, unit_divisor = 1024, ascii = ' =', disable = deepfuze.globals.log_level in [ 'warn', 'error' ]) as progress: 26 subprocess.Popen([ 'curl', '--create-dirs', '--silent', '--insecure', '--location', '--continue-at', '-', '--output', download_file_path, url ]) 27 current_size = initial_size 28 while current_size < download_size: 29 if is_file(download_file_path):
36 37 @lru_cache(maxsize = None) 38 def get_download_size(url : str) -> int: 39 try: 40 response = urllib.request.urlopen(url, timeout = 10) 41 return int(response.getheader('Content-Length')) 42 except (OSError, ValueError): 43 return 0
1 from typing import List, Any 2 from functools import lru_cache 3 import subprocess 4 import xml.etree.ElementTree as ElementTree 5 import onnxruntime 6 7 from deepfuze.typing import ExecutionDevice, ValueAndUnit 8
1 from typing import List, Any 2 from functools import lru_cache 3 import subprocess 4 import xml.etree.ElementTree as ElementTree 5 import onnxruntime 6 7 from deepfuze.typing import ExecutionDevice, ValueAndUnit 8
56 57 58 def run_nvidia_smi() -> subprocess.Popen[bytes]: 59 commands = [ 'nvidia-smi', '--query', '--xml-format' ] 60 return subprocess.Popen(commands, stdout = subprocess.PIPE) 61 62 63 @lru_cache(maxsize = None)
68 def detect_execution_devices() -> List[ExecutionDevice]: 69 execution_devices : List[ExecutionDevice] = [] 70 try: 71 output, _ = run_nvidia_smi().communicate() 72 root_element = ElementTree.fromstring(output) 73 except Exception: 74 root_element = ElementTree.Element('xml') 75
28 FACE_STORE['static_faces'] = {} 29 30 31 def create_frame_hash(vision_frame : VisionFrame) -> Optional[str]: 32 return hashlib.sha1(vision_frame.tobytes()).hexdigest() if numpy.any(vision_frame) else None 33 34 35 def get_reference_faces() -> Optional[FaceSet]:
1 from typing import List, Optional 2 import os 3 import subprocess 4 import filetype 5 6 import deepfuze.globals 7 from deepfuze import logger, process_manager 8 from deepfuze.typing import OutputVideoPreset, Fps, AudioBuffer
12 13 def run_ffmpeg(args : List[str]) -> bool: 14 commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] 15 commands.extend(args) 16 process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE) 17 18 while process_manager.is_processing(): 19 try:
27 28 def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: 29 commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] 30 commands.extend(args) 31 return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) 32 33 34 def log_debug(process : subprocess.Popen[bytes]) -> None:
1 from typing import Dict, Tuple 2 import sys 3 import os 4 import tempfile 5 import subprocess 6 import inquirer 7 from argparse import ArgumentParser, HelpFormatter 8
56 if answers: 57 onnxruntime = answers['onnxruntime'] 58 onnxruntime_name, onnxruntime_version = ONNXRUNTIMES[onnxruntime] 59 60 subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) 61 if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': 62 if python_id in [ 'cp39', 'cp310', 'cp311' ]: 63 rocm_version = onnxruntime.replace('-', '')
56 if answers: 57 onnxruntime = answers['onnxruntime'] 58 onnxruntime_name, onnxruntime_version = ONNXRUNTIMES[onnxruntime] 59 60 subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) 61 if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': 62 if python_id in [ 'cp39', 'cp310', 'cp311' ]: 63 rocm_version = onnxruntime.replace('-', '')
64 rocm_version = rocm_version.replace('.', '') 65 wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' 66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path)
64 rocm_version = rocm_version.replace('.', '') 65 wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' 66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path)
65 wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' 66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else:
65 wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' 66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else:
66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ])
66 wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) 67 wheel_url = 'https://download.onnxruntime.ai/' + wheel_name 68 subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) 69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ])
69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else:
69 subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) 70 subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) 71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else:
71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else: 77 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ])
71 os.remove(wheel_path) 72 else: 73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else: 77 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ])
73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else: 77 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ])
73 subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) 74 if onnxruntime == 'cuda-12.2': 75 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) 76 else: 77 subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ])
10 def normalize_output_path(target_path : Optional[str], output_path : Optional[str]) -> Optional[str]: 11 if target_path and output_path: 12 target_name, target_extension = os.path.splitext(os.path.basename(target_path)) 13 if is_directory(output_path): 14 output_hash = hashlib.sha1(str(deepfuze.globals.__dict__).encode('utf-8')).hexdigest()[:8] 15 output_name = target_name + '-' + output_hash 16 return os.path.join(output_path, output_name + target_extension) 17 output_name, output_extension = os.path.splitext(os.path.basename(output_path))
1 from typing import Optional, Generator, Deque 2 import os 3 import subprocess 4 import cv2 5 import gradio 6 from time import sleep 7 from concurrent.futures import ThreadPoolExecutor 8 from collections import deque
1 #!/usr/bin/env python3 2 3 import os 4 import subprocess 5 6 os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' 7 subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) 8
3 import os 4 import subprocess 5 6 os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' 7 subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) 8 9 from deepfuze import installer 10
3 import os 4 import subprocess 5 6 os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' 7 subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) 8 9 from deepfuze import installer 10
1 import os 2 import sys 3 import json 4 import subprocess 5 import numpy as np 6 import re 7 import cv2 8 import time
34 audio_dir = os.path.join(folder_paths.get_input_directory(),"audio") 35 36 try: 37 os.makedirs(result_dir) 38 except: pass 39 try: 40 os.makedirs(audio_dir) 41 except: pass
37 os.makedirs(result_dir) 38 except: pass 39 try: 40 os.makedirs(audio_dir) 41 except: pass 42 audio_extensions = ['mp3', 'mp4', 'wav', 'ogg'] 43 44
207 with open(video_format_path, 'r') as stream: 208 video_format = json.load(stream) 209 for w in gen_format_widgets(video_format): 210 print(w[0][0]) 211 assert(w[0][0] in kwargs) 212 if len(w[0]) > 3: 213 w[0] = Template(w[0][3]).substitute(val=kwargs[w[0][0]]) 214 else:
243 with open(metadata_path, "w") as f: 244 f.write(";FFMETADATA1\n") 245 f.write(metadata) 246 m_args = args[:1] + ["-i", metadata_path] + args[1:] + ["-metadata", "creation_time=now"] 247 with subprocess.Popen(m_args + [file_path], stderr=subprocess.PIPE, 248 stdin=subprocess.PIPE, env=env) as proc: 249 try: 250 while frame_data is not None: 251 proc.stdin.write(frame_data)
266 #Res was not set 267 print(err.decode("utf-8"), end="", file=sys.stderr) 268 print("An error occurred when saving with metadata") 269 if res != b'': 270 with subprocess.Popen(args + [file_path], stderr=subprocess.PIPE, 271 stdin=subprocess.PIPE, env=env) as proc: 272 try: 273 while frame_data is not None: 274 proc.stdin.write(frame_data)
286 print(res.decode("utf-8"), end="", file=sys.stderr) 287 288 def gifski_process(args, video_format, file_path, env): 289 frame_data = yield 290 with subprocess.Popen(args + video_format['main_pass'] + ['-f', 'yuv4mpegpipe', '-'], 291 stderr=subprocess.PIPE, stdin=subprocess.PIPE, 292 stdout=subprocess.PIPE, env=env) as procff: 293 with subprocess.Popen([gifski_path] + video_format['gifski_pass'] 294 + ['-q', '-o', file_path, '-'], stderr=subprocess.PIPE, 295 stdin=procff.stdout, stdout=subprocess.PIPE,
289 frame_data = yield 290 with subprocess.Popen(args + video_format['main_pass'] + ['-f', 'yuv4mpegpipe', '-'], 291 stderr=subprocess.PIPE, stdin=subprocess.PIPE, 292 stdout=subprocess.PIPE, env=env) as procff: 293 with subprocess.Popen([gifski_path] + video_format['gifski_pass'] 294 + ['-q', '-o', file_path, '-'], stderr=subprocess.PIPE, 295 stdin=procff.stdout, stdout=subprocess.PIPE, 296 env=env) as procgs: 297 try: 298 while frame_data is not None: 299 procff.stdin.write(frame_data)
777 images = [b''.join(images)] 778 os.makedirs(folder_paths.get_temp_directory(), exist_ok=True) 779 pre_pass_args = args[:13] + video_format['pre_pass'] 780 try: 781 subprocess.run(pre_pass_args, input=images[0], env=env, 782 capture_output=True, check=True) 783 except subprocess.CalledProcessError as e: 784 raise Exception("An error occurred in the ffmpeg prepass:\n" \ 785 + e.stderr.decode("utf-8"))
860 elif device=="mps": 861 command.extend(['--execution-providers',"coreml"]) 862 print(command) 863 if platform == "win32": 864 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 865 else: 866 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 867 # audio_file = os.path.join(audio_dir,str(time.time()).replace(".","")+".wav")
862 print(command) 863 if platform == "win32": 864 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 865 else: 866 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 867 # audio_file = os.path.join(audio_dir,str(time.time()).replace(".","")+".wav") 868 # subprocess.run(["ffmpeg","-i",faceswap_filename, audio_file, '-y']) 869 # ffmpeg -i sample.avi -q:a 0 -map a sample.mp3
887 elif device=="mps": 888 command.extend(['--execution-providers',"coreml"]) 889 print(command) 890 if platform == "win32": 891 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 892 else: 893 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 894
889 print(command) 890 if platform == "win32": 891 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 892 else: 893 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 894 895 if frame_enhancer!="None": 896 command = [
911 command.extend(['--execution-providers',"cuda"]) 912 elif device=="mps": 913 command.extend(['--execution-providers',"coreml"]) 914 if platform == "win32": 915 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 916 else: 917 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 918 # temp_file = "/".join(faceswap_filename.split("/")[:-1]) + "_"+faceswap_filename.split("/")[-1]
913 command.extend(['--execution-providers',"coreml"]) 914 if platform == "win32": 915 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 916 else: 917 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 918 # temp_file = "/".join(faceswap_filename.split("/")[:-1]) + "_"+faceswap_filename.split("/")[-1] 919 # subprocess.run(["ffmpeg","-i",faceswap_filename,"-i",audio_file,"-c","copy","-map","0:v:0","-map","1:a:0",temp_file,'-y']) 920 # faceswap_filename = temp_file
922 print(result.stderr) 923 if audio: 924 audio_file = os.path.join(audio_dir,str(time.time()).replace(".","")+".wav") 925 torchaudio.save(audio_file,audio["waveform"][0],audio["sample_rate"]) 926 subprocess.run(f"ffmpeg -i {faceswap_filename} -i {audio_file} -c copy {faceswap_filename.replace('.mp4','_.mp4')} -y".split()) 927 return load_video_cv(faceswap_filename.replace('.mp4','_.mp4'),0,'Disabled',512,512,0,0,1) 928 return load_video_cv(faceswap_filename,0,'Disabled',512,512,0,0,1) 929
1196 images = [b''.join(images)] 1197 os.makedirs(folder_paths.get_temp_directory(), exist_ok=True) 1198 pre_pass_args = args[:13] + video_format['pre_pass'] 1199 try: 1200 subprocess.run(pre_pass_args, input=images[0], env=env, 1201 capture_output=True, check=True) 1202 except subprocess.CalledProcessError as e: 1203 raise Exception("An error occurred in the ffmpeg prepass:\n" \ 1204 + e.stderr.decode("utf-8"))
1271 elif device=="mps": 1272 command.extend(['--execution-providers',"coreml"]) 1273 print(command) 1274 if platform == "win32": 1275 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1276 else: 1277 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1278 # print(result.stdout.splitlines()[-1])
1273 print(command) 1274 if platform == "win32": 1275 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1276 else: 1277 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1278 # print(result.stdout.splitlines()[-1]) 1279 if enhancer!="None": 1280 command = [
1295 elif device=="mps": 1296 command.extend(['--execution-providers',"coreml"]) 1297 print(command) 1298 if platform == "win32": 1299 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1300 else: 1301 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1302 filename = enhanced_filename
1297 print(command) 1298 if platform == "win32": 1299 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1300 else: 1301 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1302 filename = enhanced_filename 1303 1304 if frame_enhancer!="None":
1320 command.extend(['--execution-providers',"cuda"]) 1321 elif device=="mps": 1322 command.extend(['--execution-providers',"coreml"]) 1323 if platform == "win32": 1324 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1325 else: 1326 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1327 temp_file = enhanced_filename.replace(".mp4","_.mp4") # "/".join(enhanced_filename.split("/")[:-1]) + "_"+enhanced_filename.split("/")[-1]
1322 command.extend(['--execution-providers',"coreml"]) 1323 if platform == "win32": 1324 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1325 else: 1326 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1327 temp_file = enhanced_filename.replace(".mp4","_.mp4") # "/".join(enhanced_filename.split("/")[:-1]) + "_"+enhanced_filename.split("/")[-1] 1328 subprocess.run(["ffmpeg","-i",enhanced_filename,"-i",audio_file,"-c","copy","-map","0:v:0","-map","1:a:0",temp_file,'-y']) 1329 filename = temp_file
1324 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1325 else: 1326 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1327 temp_file = enhanced_filename.replace(".mp4","_.mp4") # "/".join(enhanced_filename.split("/")[:-1]) + "_"+enhanced_filename.split("/")[-1] 1328 subprocess.run(["ffmpeg","-i",enhanced_filename,"-i",audio_file,"-c","copy","-map","0:v:0","-map","1:a:0",temp_file,'-y']) 1329 filename = temp_file 1330 1331 print(result.stderr)
1324 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1325 else: 1326 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1327 temp_file = enhanced_filename.replace(".mp4","_.mp4") # "/".join(enhanced_filename.split("/")[:-1]) + "_"+enhanced_filename.split("/")[-1] 1328 subprocess.run(["ffmpeg","-i",enhanced_filename,"-i",audio_file,"-c","copy","-map","0:v:0","-map","1:a:0",temp_file,'-y']) 1329 filename = temp_file 1330 1331 print(result.stderr)
1427 '--output_file', file_path, 1428 '--device', device 1429 ] 1430 if platform == "win32": 1431 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1432 else: 1433 result = subprocess.run(command, cwd="custom_nodes/ComfyUI-DeepFuze",capture_output=True, text=True) 1434
1429 ] 1430 if platform == "win32": 1431 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1432 else: 1433 result = subprocess.run(command, cwd="custom_nodes/ComfyUI-DeepFuze",capture_output=True, text=True) 1434 1435 print("stdout:", result.stdout) 1436 print("stderr:", result.stderr)
1494 '--headless' 1495 ] 1496 print(command) 1497 if platform == "win32": 1498 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1499 else: 1500 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1501 print(result.stdout)
1496 print(command) 1497 if platform == "win32": 1498 result = subprocess.run(command,cwd="ComfyUI/custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1499 else: 1500 result = subprocess.run(command,cwd="custom_nodes/ComfyUI-DeepFuze",stdout=subprocess.PIPE) 1501 print(result.stdout) 1502 results.append({ 1503 "filename": "_"+file,
1 import subprocess 2 import pytest 3 4 from deepfuze.audio import get_audio_frame, read_static_audio 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
10 conditional_download('../../models/facefusion/examples', 11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3' 13 ]) 14 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 15 16 17 def test_get_audio_frame() -> None:
10 conditional_download('../../models/facefusion/examples', 11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3' 13 ]) 14 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 15 16 17 def test_get_audio_frame() -> None:
14 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 15 16 17 def test_get_audio_frame() -> None: 18 assert get_audio_frame('../../models/facefusion/examples/source.mp3', 25) is not None 19 assert get_audio_frame('../../models/facefusion/examples/source.wav', 25) is not None 20 assert get_audio_frame('invalid', 25) is None 21
15 16 17 def test_get_audio_frame() -> None: 18 assert get_audio_frame('../../models/facefusion/examples/source.mp3', 25) is not None 19 assert get_audio_frame('../../models/facefusion/examples/source.wav', 25) is not None 20 assert get_audio_frame('invalid', 25) is None 21 22
16 17 def test_get_audio_frame() -> None: 18 assert get_audio_frame('../../models/facefusion/examples/source.mp3', 25) is not None 19 assert get_audio_frame('../../models/facefusion/examples/source.wav', 25) is not None 20 assert get_audio_frame('invalid', 25) is None 21 22 23 def test_read_static_audio() -> None:
20 assert get_audio_frame('invalid', 25) is None 21 22 23 def test_read_static_audio() -> None: 24 assert len(read_static_audio('../../models/facefusion/examples/source.mp3', 25)) == 280 25 assert len(read_static_audio('../../models/facefusion/examples/source.wav', 25)) == 280 26 assert read_static_audio('invalid', 25) is None
21 22 23 def test_read_static_audio() -> None: 24 assert len(read_static_audio('../../models/facefusion/examples/source.mp3', 25)) == 280 25 assert len(read_static_audio('../../models/facefusion/examples/source.wav', 25)) == 280 26 assert read_static_audio('invalid', 25) is None
22 23 def test_read_static_audio() -> None: 24 assert len(read_static_audio('../../models/facefusion/examples/source.mp3', 25)) == 280 25 assert len(read_static_audio('../../models/facefusion/examples/source.wav', 25)) == 280 26 assert read_static_audio('invalid', 25) is None
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_debug_face_to_image() -> None:
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_debug_face_to_image() -> None:
16 17 18 def test_debug_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_debug_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode()
18 def test_debug_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_debug_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25
19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_debug_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25 26 def test_debug_face_to_video() -> None:
24 25 26 def test_debug_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
26 def test_debug_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_enhance_face_to_image() -> None:
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_enhance_face_to_image() -> None:
16 17 18 def test_enhance_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode()
18 def test_enhance_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25
19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25 26 def test_enhance_face_to_video() -> None:
24 25 26 def test_enhance_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
26 def test_enhance_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode() 32
27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode() 32
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_swap_face_to_image() -> None:
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_swap_face_to_image() -> None:
16 17 18 def test_swap_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_swap_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode()
18 def test_swap_face_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_swap_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25
19 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_swap_face_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25 26 def test_swap_face_to_video() -> None:
24 25 26 def test_swap_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
26 def test_swap_face_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
27 commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '../../models/facefusion/examples/source.jpg', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.jpg' ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.mp4' ]) 17 18
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.jpg' ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.mp4' ]) 17 18
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.jpg' ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.mp4' ]) 17 18 19 def test_colorize_frame_to_image() -> None:
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.jpg' ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'hue=s=0', '../../models/facefusion/examples/target-240p-0sat.mp4' ]) 17 18 19 def test_colorize_frame_to_image() -> None:
17 18 19 def test_colorize_frame_to_image() -> None: 20 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.jpg', '-o', '../../models/facefusion/examples/test_colorize_frame_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode()
19 def test_colorize_frame_to_image() -> None: 20 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.jpg', '-o', '../../models/facefusion/examples/test_colorize_frame_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode() 25 26
20 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.jpg', '-o', '../../models/facefusion/examples/test_colorize_frame_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode() 25 26 27 def test_colorize_frame_to_video() -> None:
25 26 27 def test_colorize_frame_to_video() -> None: 28 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.mp4', '-o', '../../models/facefusion/examples/test_colorize_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
27 def test_colorize_frame_to_video() -> None: 28 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.mp4', '-o', '../../models/facefusion/examples/test_colorize_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
28 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_colorizer', '-t', '../../models/facefusion/examples/target-240p-0sat.mp4', '-o', '../../models/facefusion/examples/test_colorize_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_enhance_frame_to_image() -> None:
11 [ 12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 14 ]) 15 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 16 17 18 def test_enhance_frame_to_image() -> None:
16 17 18 def test_enhance_frame_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_frame_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode()
18 def test_enhance_frame_to_image() -> None: 19 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_frame_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25
19 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_enhance_frame_to_image.jpg', '--headless' ] 20 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 21 22 assert run.returncode == 0 23 assert 'image succeed' in run.stdout.decode() 24 25 26 def test_enhance_frame_to_video() -> None:
24 25 26 def test_enhance_frame_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
26 def test_enhance_frame_to_video() -> None: 27 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
27 commands = [ sys.executable, 'run.py', '--frame-processors', 'frame_enhancer', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_enhance_frame_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 28 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 29 30 assert run.returncode == 0 31 assert 'video succeed' in run.stdout.decode()
1 import subprocess 2 import sys 3 import pytest 4 5 from deepfuze.download import conditional_download 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 18 19 def test_sync_lip_to_image() -> None:
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 18 19 def test_sync_lip_to_image() -> None:
17 18 19 def test_sync_lip_to_image() -> None: 20 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_sync_lip_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode()
19 def test_sync_lip_to_image() -> None: 20 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_sync_lip_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode() 25 26
20 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.jpg', '-o', '../../models/facefusion/examples/test_sync_lip_to_image.jpg', '--headless' ] 21 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 22 23 assert run.returncode == 0 24 assert 'image succeed' in run.stdout.decode() 25 26 27 def test_sync_lip_to_video() -> None:
25 26 27 def test_sync_lip_to_video() -> None: 28 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
27 def test_sync_lip_to_video() -> None: 28 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
28 commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '../../models/facefusion/examples/source.mp3', '-t', '../../models/facefusion/examples/target-240p.mp4', '-o', '../../models/facefusion/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] 29 run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) 30 31 assert run.returncode == 0 32 assert 'video succeed' in run.stdout.decode()
1 from deepfuze.common_helper import create_metavar, create_int_range, create_float_range 2 3 4 def test_create_metavar() -> None: 5 assert create_metavar([ 1, 2, 3, 4, 5 ]) == '[1-5]' 6 7 8 def test_create_int_range() -> None:
5 assert create_metavar([ 1, 2, 3, 4, 5 ]) == '[1-5]' 6 7 8 def test_create_int_range() -> None: 9 assert create_int_range(0, 2, 1) == [ 0, 1, 2 ] 10 assert create_float_range(0, 1, 1) == [ 0, 1 ] 11 12
6 7 8 def test_create_int_range() -> None: 9 assert create_int_range(0, 2, 1) == [ 0, 1, 2 ] 10 assert create_float_range(0, 1, 1) == [ 0, 1 ] 11 12 13 def test_create_float_range() -> None:
10 assert create_float_range(0, 1, 1) == [ 0, 1 ] 11 12 13 def test_create_float_range() -> None: 14 assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] 15 assert create_float_range(0.0, 1.0, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0 ]
11 12 13 def test_create_float_range() -> None: 14 assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] 15 assert create_float_range(0.0, 1.0, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 1.0 ]
47 }) 48 49 50 def test_get_str_value() -> None: 51 assert config.get_str_value('str.valid') == 'a' 52 assert config.get_str_value('str.unset', 'b') == 'b' 53 assert config.get_str_value('str.unset') is None 54 assert config.get_str_value('str.invalid') is None
48 49 50 def test_get_str_value() -> None: 51 assert config.get_str_value('str.valid') == 'a' 52 assert config.get_str_value('str.unset', 'b') == 'b' 53 assert config.get_str_value('str.unset') is None 54 assert config.get_str_value('str.invalid') is None 55
49 50 def test_get_str_value() -> None: 51 assert config.get_str_value('str.valid') == 'a' 52 assert config.get_str_value('str.unset', 'b') == 'b' 53 assert config.get_str_value('str.unset') is None 54 assert config.get_str_value('str.invalid') is None 55 56
50 def test_get_str_value() -> None: 51 assert config.get_str_value('str.valid') == 'a' 52 assert config.get_str_value('str.unset', 'b') == 'b' 53 assert config.get_str_value('str.unset') is None 54 assert config.get_str_value('str.invalid') is None 55 56 57 def test_get_int_value() -> None:
54 assert config.get_str_value('str.invalid') is None 55 56 57 def test_get_int_value() -> None: 58 assert config.get_int_value('int.valid') == 1 59 assert config.get_int_value('int.unset', '1') == 1 60 assert config.get_int_value('int.unset') is None 61 assert config.get_int_value('int.invalid') is None
55 56 57 def test_get_int_value() -> None: 58 assert config.get_int_value('int.valid') == 1 59 assert config.get_int_value('int.unset', '1') == 1 60 assert config.get_int_value('int.unset') is None 61 assert config.get_int_value('int.invalid') is None 62
56 57 def test_get_int_value() -> None: 58 assert config.get_int_value('int.valid') == 1 59 assert config.get_int_value('int.unset', '1') == 1 60 assert config.get_int_value('int.unset') is None 61 assert config.get_int_value('int.invalid') is None 62 63
57 def test_get_int_value() -> None: 58 assert config.get_int_value('int.valid') == 1 59 assert config.get_int_value('int.unset', '1') == 1 60 assert config.get_int_value('int.unset') is None 61 assert config.get_int_value('int.invalid') is None 62 63 64 def test_get_float_value() -> None:
61 assert config.get_int_value('int.invalid') is None 62 63 64 def test_get_float_value() -> None: 65 assert config.get_float_value('float.valid') == 1.0 66 assert config.get_float_value('float.unset', '1.0') == 1.0 67 assert config.get_float_value('float.unset') is None 68 assert config.get_float_value('float.invalid') is None
62 63 64 def test_get_float_value() -> None: 65 assert config.get_float_value('float.valid') == 1.0 66 assert config.get_float_value('float.unset', '1.0') == 1.0 67 assert config.get_float_value('float.unset') is None 68 assert config.get_float_value('float.invalid') is None 69
63 64 def test_get_float_value() -> None: 65 assert config.get_float_value('float.valid') == 1.0 66 assert config.get_float_value('float.unset', '1.0') == 1.0 67 assert config.get_float_value('float.unset') is None 68 assert config.get_float_value('float.invalid') is None 69 70
64 def test_get_float_value() -> None: 65 assert config.get_float_value('float.valid') == 1.0 66 assert config.get_float_value('float.unset', '1.0') == 1.0 67 assert config.get_float_value('float.unset') is None 68 assert config.get_float_value('float.invalid') is None 69 70 71 def test_get_bool_value() -> None:
68 assert config.get_float_value('float.invalid') is None 69 70 71 def test_get_bool_value() -> None: 72 assert config.get_bool_value('bool.valid') is True 73 assert config.get_bool_value('bool.unset', 'False') is False 74 assert config.get_bool_value('bool.unset') is None 75 assert config.get_bool_value('bool.invalid') is None
69 70 71 def test_get_bool_value() -> None: 72 assert config.get_bool_value('bool.valid') is True 73 assert config.get_bool_value('bool.unset', 'False') is False 74 assert config.get_bool_value('bool.unset') is None 75 assert config.get_bool_value('bool.invalid') is None 76
70 71 def test_get_bool_value() -> None: 72 assert config.get_bool_value('bool.valid') is True 73 assert config.get_bool_value('bool.unset', 'False') is False 74 assert config.get_bool_value('bool.unset') is None 75 assert config.get_bool_value('bool.invalid') is None 76 77
71 def test_get_bool_value() -> None: 72 assert config.get_bool_value('bool.valid') is True 73 assert config.get_bool_value('bool.unset', 'False') is False 74 assert config.get_bool_value('bool.unset') is None 75 assert config.get_bool_value('bool.invalid') is None 76 77 78 def test_get_str_list() -> None:
75 assert config.get_bool_value('bool.invalid') is None 76 77 78 def test_get_str_list() -> None: 79 assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] 80 assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] 81 assert config.get_str_list('str_list.unset') is None 82 assert config.get_str_list('str_list.invalid') is None
76 77 78 def test_get_str_list() -> None: 79 assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] 80 assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] 81 assert config.get_str_list('str_list.unset') is None 82 assert config.get_str_list('str_list.invalid') is None 83
77 78 def test_get_str_list() -> None: 79 assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] 80 assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] 81 assert config.get_str_list('str_list.unset') is None 82 assert config.get_str_list('str_list.invalid') is None 83 84
78 def test_get_str_list() -> None: 79 assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] 80 assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] 81 assert config.get_str_list('str_list.unset') is None 82 assert config.get_str_list('str_list.invalid') is None 83 84 85 def test_get_int_list() -> None:
82 assert config.get_str_list('str_list.invalid') is None 83 84 85 def test_get_int_list() -> None: 86 assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] 87 assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] 88 assert config.get_int_list('int_list.unset') is None 89 assert config.get_int_list('int_list.invalid') is None
83 84 85 def test_get_int_list() -> None: 86 assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] 87 assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] 88 assert config.get_int_list('int_list.unset') is None 89 assert config.get_int_list('int_list.invalid') is None 90
84 85 def test_get_int_list() -> None: 86 assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] 87 assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] 88 assert config.get_int_list('int_list.unset') is None 89 assert config.get_int_list('int_list.invalid') is None 90 91
85 def test_get_int_list() -> None: 86 assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] 87 assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] 88 assert config.get_int_list('int_list.unset') is None 89 assert config.get_int_list('int_list.invalid') is None 90 91 92 def test_get_float_list() -> None:
89 assert config.get_int_list('int_list.invalid') is None 90 91 92 def test_get_float_list() -> None: 93 assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] 94 assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] 95 assert config.get_float_list('float_list.unset') is None 96 assert config.get_float_list('float_list.invalid') is None
90 91 92 def test_get_float_list() -> None: 93 assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] 94 assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] 95 assert config.get_float_list('float_list.unset') is None 96 assert config.get_float_list('float_list.invalid') is None
91 92 def test_get_float_list() -> None: 93 assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] 94 assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] 95 assert config.get_float_list('float_list.unset') is None 96 assert config.get_float_list('float_list.invalid') is None
92 def test_get_float_list() -> None: 93 assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] 94 assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] 95 assert config.get_float_list('float_list.unset') is None 96 assert config.get_float_list('float_list.invalid') is None
11 ]) 12 13 14 def test_get_download_size() -> None: 15 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4') == 191675 16 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4') == 370732 17 assert get_download_size('invalid') == 0 18
12 13 14 def test_get_download_size() -> None: 15 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4') == 191675 16 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4') == 370732 17 assert get_download_size('invalid') == 0 18 19
13 14 def test_get_download_size() -> None: 15 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4') == 191675 16 assert get_download_size('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4') == 370732 17 assert get_download_size('invalid') == 0 18 19 20 def test_is_download_done() -> None:
17 assert get_download_size('invalid') == 0 18 19 20 def test_is_download_done() -> None: 21 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') is True 22 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False 23 assert is_download_done('invalid', 'invalid') is False
18 19 20 def test_is_download_done() -> None: 21 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') is True 22 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False 23 assert is_download_done('invalid', 'invalid') is False
19 20 def test_is_download_done() -> None: 21 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') is True 22 assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False 23 assert is_download_done('invalid', 'invalid') is False
1 from deepfuze.execution import encode_execution_providers, decode_execution_providers, has_execution_provider, apply_execution_provider_options 2 3 4 def test_encode_execution_providers() -> None: 5 assert encode_execution_providers([ 'CPUExecutionProvider' ]) == [ 'cpu' ] 6 7 8 def test_decode_execution_providers() -> None:
5 assert encode_execution_providers([ 'CPUExecutionProvider' ]) == [ 'cpu' ] 6 7 8 def test_decode_execution_providers() -> None: 9 assert decode_execution_providers([ 'cpu' ]) == [ 'CPUExecutionProvider' ] 10 11 12 def test_has_execution_provider() -> None:
9 assert decode_execution_providers([ 'cpu' ]) == [ 'CPUExecutionProvider' ] 10 11 12 def test_has_execution_provider() -> None: 13 assert has_execution_provider('CPUExecutionProvider') is True 14 assert has_execution_provider('InvalidExecutionProvider') is False 15 16
10 11 12 def test_has_execution_provider() -> None: 13 assert has_execution_provider('CPUExecutionProvider') is True 14 assert has_execution_provider('InvalidExecutionProvider') is False 15 16 17 def test_multiple_execution_providers() -> None:
23 'device_id': '1', 24 'cudnn_conv_algo_search': 'DEFAULT' 25 }) 26 ] 27 assert apply_execution_provider_options('1', [ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == execution_provider_with_options
1 import subprocess 2 import pytest 3 4 import deepfuze.globals 5 from deepfuze.download import conditional_download 6 from deepfuze.face_analyser import pre_check, clear_face_analyser, get_one_face 7 from deepfuze.typing import Face 8 from deepfuze.vision import read_static_image
13 conditional_download('../../models/facefusion/examples', 14 [ 15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20
13 conditional_download('../../models/facefusion/examples', 14 [ 15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20
14 [ 15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20 21
14 [ 15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20 21
15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20 21 22 @pytest.fixture(autouse = True)
15 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' 16 ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '../../models/facefusion/examples/source-80crop.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '../../models/facefusion/examples/source-70crop.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '../../models/facefusion/examples/source-60crop.jpg' ]) 20 21 22 @pytest.fixture(autouse = True)
42 for source_path in source_paths: 43 source_frame = read_static_image(source_path) 44 face = get_one_face(source_frame) 45 46 assert isinstance(face, Face) 47 48 49 def test_get_one_face_with_scrfd() -> None:
61 for source_path in source_paths: 62 source_frame = read_static_image(source_path) 63 face = get_one_face(source_frame) 64 65 assert isinstance(face, Face) 66 67 68 def test_get_one_face_with_yoloface() -> None:
80 for source_path in source_paths: 81 source_frame = read_static_image(source_path) 82 face = get_one_face(source_frame) 83 84 assert isinstance(face, Face) 85 86 87 def test_get_one_face_with_yunet() -> None:
99 for source_path in source_paths: 100 source_frame = read_static_image(source_path) 101 face = get_one_face(source_frame) 102 103 assert isinstance(face, Face)
1 import glob 2 import subprocess 3 import pytest 4 5 import deepfuze.globals 6 from deepfuze import process_manager 7 from deepfuze.filesystem import get_temp_directory_path, create_temp, clear_temp 8 from deepfuze.download import conditional_download
17 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 18 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ])
17 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 18 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ])
18 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25
18 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25
19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25 26
19 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' 20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25 26
20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25 26 27 @pytest.fixture(scope = 'function', autouse = True)
20 ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.wav' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 25 26 27 @pytest.fixture(scope = 'function', autouse = True)
42 for target_path in target_paths: 43 temp_directory_path = get_temp_directory_path(target_path) 44 create_temp(target_path) 45 46 assert extract_frames(target_path, '452x240', 30.0) is True 47 assert len(glob.glob1(temp_directory_path, '*.jpg')) == 324 48 49 clear_temp(target_path)
43 temp_directory_path = get_temp_directory_path(target_path) 44 create_temp(target_path) 45 46 assert extract_frames(target_path, '452x240', 30.0) is True 47 assert len(glob.glob1(temp_directory_path, '*.jpg')) == 324 48 49 clear_temp(target_path) 50
61 for target_path, frame_total in data_provider: 62 temp_directory_path = get_temp_directory_path(target_path) 63 create_temp(target_path) 64 65 assert extract_frames(target_path, '452x240', 30.0) is True 66 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 67 68 clear_temp(target_path)
62 temp_directory_path = get_temp_directory_path(target_path) 63 create_temp(target_path) 64 65 assert extract_frames(target_path, '452x240', 30.0) is True 66 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 67 68 clear_temp(target_path) 69
81 for target_path, frame_total in data_provider: 82 temp_directory_path = get_temp_directory_path(target_path) 83 create_temp(target_path) 84 85 assert extract_frames(target_path, '452x240', 30.0) is True 86 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 87 88 clear_temp(target_path)
82 temp_directory_path = get_temp_directory_path(target_path) 83 create_temp(target_path) 84 85 assert extract_frames(target_path, '452x240', 30.0) is True 86 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 87 88 clear_temp(target_path) 89
100 for target_path, frame_total in data_provider: 101 temp_directory_path = get_temp_directory_path(target_path) 102 create_temp(target_path) 103 104 assert extract_frames(target_path, '426x240', 30.0) is True 105 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 106 107 clear_temp(target_path)
101 temp_directory_path = get_temp_directory_path(target_path) 102 create_temp(target_path) 103 104 assert extract_frames(target_path, '426x240', 30.0) is True 105 assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total 106 107 clear_temp(target_path) 108
107 clear_temp(target_path) 108 109 110 def test_read_audio_buffer() -> None: 111 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.mp3', 1, 1), bytes) 112 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.wav', 1, 1), bytes) 113 assert read_audio_buffer('../../models/facefusion/examples/invalid.mp3', 1, 1) is None
108 109 110 def test_read_audio_buffer() -> None: 111 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.mp3', 1, 1), bytes) 112 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.wav', 1, 1), bytes) 113 assert read_audio_buffer('../../models/facefusion/examples/invalid.mp3', 1, 1) is None
109 110 def test_read_audio_buffer() -> None: 111 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.mp3', 1, 1), bytes) 112 assert isinstance(read_audio_buffer('../../models/facefusion/examples/source.wav', 1, 1), bytes) 113 assert read_audio_buffer('../../models/facefusion/examples/invalid.mp3', 1, 1) is None
17 shutil.copyfile('../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/söurce.jpg') 18 19 20 def test_get_file_size() -> None: 21 assert get_file_size('../../models/facefusion/examples/source.jpg') > 0 22 assert get_file_size('invalid') == 0 23 24
18 19 20 def test_get_file_size() -> None: 21 assert get_file_size('../../models/facefusion/examples/source.jpg') > 0 22 assert get_file_size('invalid') == 0 23 24 25 def test_is_file() -> None:
22 assert get_file_size('invalid') == 0 23 24 25 def test_is_file() -> None: 26 assert is_file('../../models/facefusion/examples/source.jpg') is True 27 assert is_file('../../models/facefusion/examples') is False 28 assert is_file('invalid') is False 29
23 24 25 def test_is_file() -> None: 26 assert is_file('../../models/facefusion/examples/source.jpg') is True 27 assert is_file('../../models/facefusion/examples') is False 28 assert is_file('invalid') is False 29 30
24 25 def test_is_file() -> None: 26 assert is_file('../../models/facefusion/examples/source.jpg') is True 27 assert is_file('../../models/facefusion/examples') is False 28 assert is_file('invalid') is False 29 30 31 def test_is_directory() -> None:
28 assert is_file('invalid') is False 29 30 31 def test_is_directory() -> None: 32 assert is_directory('../../models/facefusion/examples') is True 33 assert is_directory('../../models/facefusion/examples/source.jpg') is False 34 assert is_directory('invalid') is False 35
29 30 31 def test_is_directory() -> None: 32 assert is_directory('../../models/facefusion/examples') is True 33 assert is_directory('../../models/facefusion/examples/source.jpg') is False 34 assert is_directory('invalid') is False 35 36
30 31 def test_is_directory() -> None: 32 assert is_directory('../../models/facefusion/examples') is True 33 assert is_directory('../../models/facefusion/examples/source.jpg') is False 34 assert is_directory('invalid') is False 35 36 37 def test_is_audio() -> None:
34 assert is_directory('invalid') is False 35 36 37 def test_is_audio() -> None: 38 assert is_audio('../../models/facefusion/examples/source.mp3') is True 39 assert is_audio('../../models/facefusion/examples/source.jpg') is False 40 assert is_audio('invalid') is False 41
35 36 37 def test_is_audio() -> None: 38 assert is_audio('../../models/facefusion/examples/source.mp3') is True 39 assert is_audio('../../models/facefusion/examples/source.jpg') is False 40 assert is_audio('invalid') is False 41 42
36 37 def test_is_audio() -> None: 38 assert is_audio('../../models/facefusion/examples/source.mp3') is True 39 assert is_audio('../../models/facefusion/examples/source.jpg') is False 40 assert is_audio('invalid') is False 41 42 43 def test_has_audio() -> None:
40 assert is_audio('invalid') is False 41 42 43 def test_has_audio() -> None: 44 assert has_audio([ '../../models/facefusion/examples/source.mp3' ]) is True 45 assert has_audio([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.jpg' ]) is True 46 assert has_audio([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) is False 47 assert has_audio([ 'invalid' ]) is False
41 42 43 def test_has_audio() -> None: 44 assert has_audio([ '../../models/facefusion/examples/source.mp3' ]) is True 45 assert has_audio([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.jpg' ]) is True 46 assert has_audio([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) is False 47 assert has_audio([ 'invalid' ]) is False 48
42 43 def test_has_audio() -> None: 44 assert has_audio([ '../../models/facefusion/examples/source.mp3' ]) is True 45 assert has_audio([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.jpg' ]) is True 46 assert has_audio([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) is False 47 assert has_audio([ 'invalid' ]) is False 48 49
43 def test_has_audio() -> None: 44 assert has_audio([ '../../models/facefusion/examples/source.mp3' ]) is True 45 assert has_audio([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.jpg' ]) is True 46 assert has_audio([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) is False 47 assert has_audio([ 'invalid' ]) is False 48 49 50 def test_is_image() -> None:
47 assert has_audio([ 'invalid' ]) is False 48 49 50 def test_is_image() -> None: 51 assert is_image('../../models/facefusion/examples/source.jpg') is True 52 assert is_image('../../models/facefusion/examples/target-240p.mp4') is False 53 assert is_image('invalid') is False 54
48 49 50 def test_is_image() -> None: 51 assert is_image('../../models/facefusion/examples/source.jpg') is True 52 assert is_image('../../models/facefusion/examples/target-240p.mp4') is False 53 assert is_image('invalid') is False 54 55
49 50 def test_is_image() -> None: 51 assert is_image('../../models/facefusion/examples/source.jpg') is True 52 assert is_image('../../models/facefusion/examples/target-240p.mp4') is False 53 assert is_image('invalid') is False 54 55 56 def test_has_image() -> None:
53 assert is_image('invalid') is False 54 55 56 def test_has_image() -> None: 57 assert has_image([ '../../models/facefusion/examples/source.jpg' ]) is True 58 assert has_image([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) is True 59 assert has_image([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) is False 60 assert has_image([ 'invalid' ]) is False
54 55 56 def test_has_image() -> None: 57 assert has_image([ '../../models/facefusion/examples/source.jpg' ]) is True 58 assert has_image([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) is True 59 assert has_image([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) is False 60 assert has_image([ 'invalid' ]) is False 61
55 56 def test_has_image() -> None: 57 assert has_image([ '../../models/facefusion/examples/source.jpg' ]) is True 58 assert has_image([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) is True 59 assert has_image([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) is False 60 assert has_image([ 'invalid' ]) is False 61 62
56 def test_has_image() -> None: 57 assert has_image([ '../../models/facefusion/examples/source.jpg' ]) is True 58 assert has_image([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) is True 59 assert has_image([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) is False 60 assert has_image([ 'invalid' ]) is False 61 62 63 def test_is_video() -> None:
60 assert has_image([ 'invalid' ]) is False 61 62 63 def test_is_video() -> None: 64 assert is_video('../../models/facefusion/examples/target-240p.mp4') is True 65 assert is_video('../../models/facefusion/examples/source.jpg') is False 66 assert is_video('invalid') is False 67
61 62 63 def test_is_video() -> None: 64 assert is_video('../../models/facefusion/examples/target-240p.mp4') is True 65 assert is_video('../../models/facefusion/examples/source.jpg') is False 66 assert is_video('invalid') is False 67 68
62 63 def test_is_video() -> None: 64 assert is_video('../../models/facefusion/examples/target-240p.mp4') is True 65 assert is_video('../../models/facefusion/examples/source.jpg') is False 66 assert is_video('invalid') is False 67 68 69 def test_filter_audio_paths() -> None:
66 assert is_video('invalid') is False 67 68 69 def test_filter_audio_paths() -> None: 70 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.mp3' ] 71 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) == [] 72 assert filter_audio_paths([ 'invalid' ]) == [] 73
67 68 69 def test_filter_audio_paths() -> None: 70 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.mp3' ] 71 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) == [] 72 assert filter_audio_paths([ 'invalid' ]) == [] 73 74
68 69 def test_filter_audio_paths() -> None: 70 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.mp3' ] 71 assert filter_audio_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.jpg' ]) == [] 72 assert filter_audio_paths([ 'invalid' ]) == [] 73 74 75 def test_filter_image_paths() -> None:
72 assert filter_audio_paths([ 'invalid' ]) == [] 73 74 75 def test_filter_image_paths() -> None: 76 assert filter_image_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.jpg' ] 77 assert filter_image_paths([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) == [] 78 assert filter_audio_paths([ 'invalid' ]) == [] 79
73 74 75 def test_filter_image_paths() -> None: 76 assert filter_image_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.jpg' ] 77 assert filter_image_paths([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) == [] 78 assert filter_audio_paths([ 'invalid' ]) == [] 79 80
74 75 def test_filter_image_paths() -> None: 76 assert filter_image_paths([ '../../models/facefusion/examples/source.jpg', '../../models/facefusion/examples/source.mp3' ]) == [ '../../models/facefusion/examples/source.jpg' ] 77 assert filter_image_paths([ '../../models/facefusion/examples/source.mp3', '../../models/facefusion/examples/source.mp3' ]) == [] 78 assert filter_audio_paths([ 'invalid' ]) == [] 79 80 81 def test_list_directory() -> None:
78 assert filter_audio_paths([ 'invalid' ]) == [] 79 80 81 def test_list_directory() -> None: 82 assert list_directory('../../models/facefusion/examples') 83 assert list_directory('../../models/facefusion/examples/source.jpg') is None 84 assert list_directory('invalid') is None 85
79 80 81 def test_list_directory() -> None: 82 assert list_directory('../../models/facefusion/examples') 83 assert list_directory('../../models/facefusion/examples/source.jpg') is None 84 assert list_directory('invalid') is None 85 86
80 81 def test_list_directory() -> None: 82 assert list_directory('../../models/facefusion/examples') 83 assert list_directory('../../models/facefusion/examples/source.jpg') is None 84 assert list_directory('invalid') is None 85 86 87 def test_sanitize_path_for_windows() -> None:
85 86 87 def test_sanitize_path_for_windows() -> None: 88 if is_windows(): 89 assert sanitize_path_for_windows('../../models/facefusion/examples/söurce.jpg') == 'ASSETS~1/examples/SURCE~1.JPG' 90 assert sanitize_path_for_windows('invalid') is None
86 87 def test_sanitize_path_for_windows() -> None: 88 if is_windows(): 89 assert sanitize_path_for_windows('../../models/facefusion/examples/söurce.jpg') == 'ASSETS~1/examples/SURCE~1.JPG' 90 assert sanitize_path_for_windows('invalid') is None
2 from deepfuze.memory import limit_system_memory 3 4 5 def test_limit_system_memory() -> None: 6 assert limit_system_memory(4) is True 7 if is_linux() or is_macos(): 8 assert limit_system_memory(1024) is False
4 5 def test_limit_system_memory() -> None: 6 assert limit_system_memory(4) is True 7 if is_linux() or is_macos(): 8 assert limit_system_memory(1024) is False
3 4 5 def test_normalize_output_path() -> None: 6 if is_linux() or is_macos(): 7 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' 8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4'
4 5 def test_normalize_output_path() -> None: 6 if is_linux() or is_macos(): 7 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' 8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None
5 def test_normalize_output_path() -> None: 6 if is_linux() or is_macos(): 7 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' 8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None
6 if is_linux() or is_macos(): 7 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' 8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None
7 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/target-240p.mp4') == '../../models/facefusion/examples/target-240p.mp4' 8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None 14 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None
8 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').startswith('../../models/facefusion/examples/target-240p') 9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None 14 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None 15 assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None
9 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples').endswith('.mp4') 10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None 14 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None 15 assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None 16
10 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/output.mp4') == '../../models/facefusion/examples/output.mp4' 11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None 14 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None 15 assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None 16 17
11 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/examples/invalid') is None 12 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', '../../models/facefusion/invalid/output.mp4') is None 13 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', 'invalid') is None 14 assert normalize_output_path('../../models/facefusion/examples/target-240p.mp4', None) is None 15 assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None 16 17 18 def test_normalize_padding() -> None:
15 assert normalize_output_path(None, '../../models/facefusion/examples/output.mp4') is None 16 17 18 def test_normalize_padding() -> None: 19 assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) 20 assert normalize_padding([ 1 ]) == (1, 1, 1, 1) 21 assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) 22 assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2)
16 17 18 def test_normalize_padding() -> None: 19 assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) 20 assert normalize_padding([ 1 ]) == (1, 1, 1, 1) 21 assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) 22 assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) 23 assert normalize_padding(None) is None
17 18 def test_normalize_padding() -> None: 19 assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) 20 assert normalize_padding([ 1 ]) == (1, 1, 1, 1) 21 assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) 22 assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) 23 assert normalize_padding(None) is None 24
18 def test_normalize_padding() -> None: 19 assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) 20 assert normalize_padding([ 1 ]) == (1, 1, 1, 1) 21 assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) 22 assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) 23 assert normalize_padding(None) is None 24 25
19 assert normalize_padding([ 0, 0, 0, 0 ]) == (0, 0, 0, 0) 20 assert normalize_padding([ 1 ]) == (1, 1, 1, 1) 21 assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) 22 assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) 23 assert normalize_padding(None) is None 24 25 26 def test_normalize_fps() -> None:
23 assert normalize_padding(None) is None 24 25 26 def test_normalize_fps() -> None: 27 assert normalize_fps(0.0) == 1.0 28 assert normalize_fps(25.0) == 25.0 29 assert normalize_fps(61.0) == 60.0 30 assert normalize_fps(None) is None
24 25 26 def test_normalize_fps() -> None: 27 assert normalize_fps(0.0) == 1.0 28 assert normalize_fps(25.0) == 25.0 29 assert normalize_fps(61.0) == 60.0 30 assert normalize_fps(None) is None
25 26 def test_normalize_fps() -> None: 27 assert normalize_fps(0.0) == 1.0 28 assert normalize_fps(25.0) == 25.0 29 assert normalize_fps(61.0) == 60.0 30 assert normalize_fps(None) is None
26 def test_normalize_fps() -> None: 27 assert normalize_fps(0.0) == 1.0 28 assert normalize_fps(25.0) == 25.0 29 assert normalize_fps(61.0) == 60.0 30 assert normalize_fps(None) is None
4 def test_start() -> None: 5 set_process_state('pending') 6 start() 7 8 assert is_processing() 9 10 11 def test_stop() -> None:
11 def test_stop() -> None: 12 set_process_state('processing') 13 stop() 14 15 assert is_stopping() 16 17 18 def test_end() -> None:
18 def test_end() -> None: 19 set_process_state('processing') 20 end() 21 22 assert is_pending()
1 import subprocess 2 import pytest 3 4 from deepfuze.download import conditional_download 5 from deepfuze.vision import detect_image_resolution, restrict_image_resolution, create_image_resolutions, get_video_frame, count_video_frame_total, detect_video_fps, restrict_video_fps, detect_video_resolution, restrict_video_resolution, create_video_resolutions, normalize_resolution, pack_resolution, unpack_resolution 6 7 8 @pytest.fixture(scope = 'module', autouse = True)
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ])
12 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', 13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ])
13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ])
13 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ])
14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ])
14 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' 15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ])
15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ])
15 ]) 16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ])
16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ])
16 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-240p.jpg' ]) 17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ])
17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ])
17 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '../../models/facefusion/examples/target-1080p.jpg' ]) 18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ])
18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25
18 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.jpg' ]) 19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25
19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25 26
19 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.jpg' ]) 20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25 26
20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25 26 27 def test_detect_image_resolution() -> None:
20 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=25', '../../models/facefusion/examples/target-240p-25fps.mp4' ]) 21 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=30', '../../models/facefusion/examples/target-240p-30fps.mp4' ]) 22 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'fps=60', '../../models/facefusion/examples/target-240p-60fps.mp4' ]) 23 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-240p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-240p-90deg.mp4' ]) 24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25 26 27 def test_detect_image_resolution() -> None:
24 subprocess.run([ 'ffmpeg', '-i', '../../models/facefusion/examples/target-1080p.mp4', '-vf', 'transpose=0', '../../models/facefusion/examples/target-1080p-90deg.mp4' ]) 25 26 27 def test_detect_image_resolution() -> None: 28 assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) 29 assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) 30 assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) 31 assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048)
25 26 27 def test_detect_image_resolution() -> None: 28 assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) 29 assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) 30 assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) 31 assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048) 32 assert detect_image_resolution('invalid') is None
26 27 def test_detect_image_resolution() -> None: 28 assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) 29 assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) 30 assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) 31 assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048) 32 assert detect_image_resolution('invalid') is None 33
27 def test_detect_image_resolution() -> None: 28 assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) 29 assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) 30 assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) 31 assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048) 32 assert detect_image_resolution('invalid') is None 33 34
28 assert detect_image_resolution('../../models/facefusion/examples/target-240p.jpg') == (426, 226) 29 assert detect_image_resolution('../../models/facefusion/examples/target-240p-90deg.jpg') == (226, 426) 30 assert detect_image_resolution('../../models/facefusion/examples/target-1080p.jpg') == (2048, 1080) 31 assert detect_image_resolution('../../models/facefusion/examples/target-1080p-90deg.jpg') == (1080, 2048) 32 assert detect_image_resolution('invalid') is None 33 34 35 def test_restrict_image_resolution() -> None:
32 assert detect_image_resolution('invalid') is None 33 34 35 def test_restrict_image_resolution() -> None: 36 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (426, 226)) == (426, 226) 37 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) 38 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) 39
33 34 35 def test_restrict_image_resolution() -> None: 36 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (426, 226)) == (426, 226) 37 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) 38 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) 39 40
34 35 def test_restrict_image_resolution() -> None: 36 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (426, 226)) == (426, 226) 37 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) 38 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) 39 40 41 def test_create_image_resolutions() -> None:
38 assert restrict_image_resolution('../../models/facefusion/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) 39 40 41 def test_create_image_resolutions() -> None: 42 assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] 43 assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] 44 assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] 45 assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ]
39 40 41 def test_create_image_resolutions() -> None: 42 assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] 43 assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] 44 assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] 45 assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] 46 assert create_image_resolutions(None) == []
40 41 def test_create_image_resolutions() -> None: 42 assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] 43 assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] 44 assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] 45 assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] 46 assert create_image_resolutions(None) == [] 47
41 def test_create_image_resolutions() -> None: 42 assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] 43 assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] 44 assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] 45 assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] 46 assert create_image_resolutions(None) == [] 47 48
42 assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] 43 assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] 44 assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] 45 assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] 46 assert create_image_resolutions(None) == [] 47 48 49 def test_get_video_frame() -> None:
46 assert create_image_resolutions(None) == [] 47 48 49 def test_get_video_frame() -> None: 50 assert get_video_frame('../../models/facefusion/examples/target-240p-25fps.mp4') is not None 51 assert get_video_frame('invalid') is None 52 53
47 48 49 def test_get_video_frame() -> None: 50 assert get_video_frame('../../models/facefusion/examples/target-240p-25fps.mp4') is not None 51 assert get_video_frame('invalid') is None 52 53 54 def test_count_video_frame_total() -> None:
51 assert get_video_frame('invalid') is None 52 53 54 def test_count_video_frame_total() -> None: 55 assert count_video_frame_total('../../models/facefusion/examples/target-240p-25fps.mp4') == 270 56 assert count_video_frame_total('../../models/facefusion/examples/target-240p-30fps.mp4') == 324 57 assert count_video_frame_total('../../models/facefusion/examples/target-240p-60fps.mp4') == 648 58 assert count_video_frame_total('invalid') == 0
52 53 54 def test_count_video_frame_total() -> None: 55 assert count_video_frame_total('../../models/facefusion/examples/target-240p-25fps.mp4') == 270 56 assert count_video_frame_total('../../models/facefusion/examples/target-240p-30fps.mp4') == 324 57 assert count_video_frame_total('../../models/facefusion/examples/target-240p-60fps.mp4') == 648 58 assert count_video_frame_total('invalid') == 0 59
53 54 def test_count_video_frame_total() -> None: 55 assert count_video_frame_total('../../models/facefusion/examples/target-240p-25fps.mp4') == 270 56 assert count_video_frame_total('../../models/facefusion/examples/target-240p-30fps.mp4') == 324 57 assert count_video_frame_total('../../models/facefusion/examples/target-240p-60fps.mp4') == 648 58 assert count_video_frame_total('invalid') == 0 59 60
54 def test_count_video_frame_total() -> None: 55 assert count_video_frame_total('../../models/facefusion/examples/target-240p-25fps.mp4') == 270 56 assert count_video_frame_total('../../models/facefusion/examples/target-240p-30fps.mp4') == 324 57 assert count_video_frame_total('../../models/facefusion/examples/target-240p-60fps.mp4') == 648 58 assert count_video_frame_total('invalid') == 0 59 60 61 def test_detect_video_fps() -> None:
58 assert count_video_frame_total('invalid') == 0 59 60 61 def test_detect_video_fps() -> None: 62 assert detect_video_fps('../../models/facefusion/examples/target-240p-25fps.mp4') == 25.0 63 assert detect_video_fps('../../models/facefusion/examples/target-240p-30fps.mp4') == 30.0 64 assert detect_video_fps('../../models/facefusion/examples/target-240p-60fps.mp4') == 60.0 65 assert detect_video_fps('invalid') is None
59 60 61 def test_detect_video_fps() -> None: 62 assert detect_video_fps('../../models/facefusion/examples/target-240p-25fps.mp4') == 25.0 63 assert detect_video_fps('../../models/facefusion/examples/target-240p-30fps.mp4') == 30.0 64 assert detect_video_fps('../../models/facefusion/examples/target-240p-60fps.mp4') == 60.0 65 assert detect_video_fps('invalid') is None 66
60 61 def test_detect_video_fps() -> None: 62 assert detect_video_fps('../../models/facefusion/examples/target-240p-25fps.mp4') == 25.0 63 assert detect_video_fps('../../models/facefusion/examples/target-240p-30fps.mp4') == 30.0 64 assert detect_video_fps('../../models/facefusion/examples/target-240p-60fps.mp4') == 60.0 65 assert detect_video_fps('invalid') is None 66 67
61 def test_detect_video_fps() -> None: 62 assert detect_video_fps('../../models/facefusion/examples/target-240p-25fps.mp4') == 25.0 63 assert detect_video_fps('../../models/facefusion/examples/target-240p-30fps.mp4') == 30.0 64 assert detect_video_fps('../../models/facefusion/examples/target-240p-60fps.mp4') == 60.0 65 assert detect_video_fps('invalid') is None 66 67 68 def test_restrict_video_fps() -> None:
65 assert detect_video_fps('invalid') is None 66 67 68 def test_restrict_video_fps() -> None: 69 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 20.0) == 20.0 70 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 25.0) == 25.0 71 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 60.0) == 25.0 72
66 67 68 def test_restrict_video_fps() -> None: 69 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 20.0) == 20.0 70 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 25.0) == 25.0 71 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 60.0) == 25.0 72 73
67 68 def test_restrict_video_fps() -> None: 69 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 20.0) == 20.0 70 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 25.0) == 25.0 71 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 60.0) == 25.0 72 73 74 def test_detect_video_resolution() -> None:
71 assert restrict_video_fps('../../models/facefusion/examples/target-1080p.mp4', 60.0) == 25.0 72 73 74 def test_detect_video_resolution() -> None: 75 assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) 76 assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) 77 assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) 78 assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048)
72 73 74 def test_detect_video_resolution() -> None: 75 assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) 76 assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) 77 assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) 78 assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048) 79 assert detect_video_resolution('invalid') is None
73 74 def test_detect_video_resolution() -> None: 75 assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) 76 assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) 77 assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) 78 assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048) 79 assert detect_video_resolution('invalid') is None 80
74 def test_detect_video_resolution() -> None: 75 assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) 76 assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) 77 assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) 78 assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048) 79 assert detect_video_resolution('invalid') is None 80 81
75 assert detect_video_resolution('../../models/facefusion/examples/target-240p.mp4') == (426, 226) 76 assert detect_video_resolution('../../models/facefusion/examples/target-240p-90deg.mp4') == (226, 426) 77 assert detect_video_resolution('../../models/facefusion/examples/target-1080p.mp4') == (2048, 1080) 78 assert detect_video_resolution('../../models/facefusion/examples/target-1080p-90deg.mp4') == (1080, 2048) 79 assert detect_video_resolution('invalid') is None 80 81 82 def test_restrict_video_resolution() -> None:
79 assert detect_video_resolution('invalid') is None 80 81 82 def test_restrict_video_resolution() -> None: 83 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (426, 226)) == (426, 226) 84 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) 85 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) 86
80 81 82 def test_restrict_video_resolution() -> None: 83 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (426, 226)) == (426, 226) 84 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) 85 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) 86 87
81 82 def test_restrict_video_resolution() -> None: 83 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (426, 226)) == (426, 226) 84 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) 85 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) 86 87 88 def test_create_video_resolutions() -> None:
85 assert restrict_video_resolution('../../models/facefusion/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) 86 87 88 def test_create_video_resolutions() -> None: 89 assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] 90 assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] 91 assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] 92 assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ]
86 87 88 def test_create_video_resolutions() -> None: 89 assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] 90 assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] 91 assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] 92 assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] 93 assert create_video_resolutions(None) == []
87 88 def test_create_video_resolutions() -> None: 89 assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] 90 assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] 91 assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] 92 assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] 93 assert create_video_resolutions(None) == [] 94
88 def test_create_video_resolutions() -> None: 89 assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] 90 assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] 91 assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] 92 assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] 93 assert create_video_resolutions(None) == [] 94 95
89 assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] 90 assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] 91 assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] 92 assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] 93 assert create_video_resolutions(None) == [] 94 95 96 def test_normalize_resolution() -> None:
93 assert create_video_resolutions(None) == [] 94 95 96 def test_normalize_resolution() -> None: 97 assert normalize_resolution((2.5, 2.5)) == (2, 2) 98 assert normalize_resolution((3.0, 3.0)) == (4, 4) 99 assert normalize_resolution((6.5, 6.5)) == (6, 6) 100
94 95 96 def test_normalize_resolution() -> None: 97 assert normalize_resolution((2.5, 2.5)) == (2, 2) 98 assert normalize_resolution((3.0, 3.0)) == (4, 4) 99 assert normalize_resolution((6.5, 6.5)) == (6, 6) 100 101
95 96 def test_normalize_resolution() -> None: 97 assert normalize_resolution((2.5, 2.5)) == (2, 2) 98 assert normalize_resolution((3.0, 3.0)) == (4, 4) 99 assert normalize_resolution((6.5, 6.5)) == (6, 6) 100 101 102 def test_pack_resolution() -> None:
99 assert normalize_resolution((6.5, 6.5)) == (6, 6) 100 101 102 def test_pack_resolution() -> None: 103 assert pack_resolution((1, 1)) == '0x0' 104 assert pack_resolution((2, 2)) == '2x2' 105 106
100 101 102 def test_pack_resolution() -> None: 103 assert pack_resolution((1, 1)) == '0x0' 104 assert pack_resolution((2, 2)) == '2x2' 105 106 107 def test_unpack_resolution() -> None:
104 assert pack_resolution((2, 2)) == '2x2' 105 106 107 def test_unpack_resolution() -> None: 108 assert unpack_resolution('0x0') == (0, 0) 109 assert unpack_resolution('2x2') == (2, 2)
105 106 107 def test_unpack_resolution() -> None: 108 assert unpack_resolution('0x0') == (0, 0) 109 assert unpack_resolution('2x2') == (2, 2)
1 from deepfuze import wording 2 3 4 def test_get() -> None: 5 assert wording.get('python_not_supported') 6 assert wording.get('help.source') 7 assert wording.get('invalid') is None
2 3 4 def test_get() -> None: 5 assert wording.get('python_not_supported') 6 assert wording.get('help.source') 7 assert wording.get('invalid') is None
3 4 def test_get() -> None: 5 assert wording.get('python_not_supported') 6 assert wording.get('help.source') 7 assert wording.get('invalid') is None
1 import hashlib 2 import os 3 from typing import Iterable 4 import shutil 5 import subprocess 6 import re 7 from collections.abc import Mapping 8 import torch
15 DIMMAX = 8192 16 17 def ffmpeg_suitability(path): 18 try: 19 version = subprocess.run([path, "-version"], check=True, 20 capture_output=True).stdout.decode("utf-8") 21 except: 22 return 0 23 score = 0
132 prompt_queue.put((number, prompt_id, prompt, extra_data, outputs_to_execute)) 133 134 requeue_guard = [None, 0, 0, {}] 135 def requeue_workflow(requeue_required=(-1,True)): 136 assert(len(prompt_queue.currently_running) == 1) 137 global requeue_guard 138 (run_number, _, prompt, _, _) = next(iter(prompt_queue.currently_running.values())) 139 if requeue_guard[0] != run_number:
159 if duration > 0: 160 args += ["-t", str(duration)] 161 try: 162 #TODO: scan for sample rate and maintain 163 res = subprocess.run(args + ["-f", "f32le", "-"], 164 capture_output=True, check=True) 165 audio = torch.frombuffer(bytearray(res.stdout), dtype=torch.float32) 166 except subprocess.CalledProcessError as e: 167 audio = torch.zeros(1,2)