diff --git a/comfyui-audio/comfyui_audiocraft/nodes.py b/comfyui-audio/comfyui_audiocraft/nodes.py index 0fcf3c6..4377cb6 100644 --- a/comfyui-audio/comfyui_audiocraft/nodes.py +++ b/comfyui-audio/comfyui_audiocraft/nodes.py @@ -85,7 +85,9 @@ class MusicGenLongNode: from audiocraft.data.audio import audio_write torch.manual_seed(seed) - print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten") + import math + total_segments = math.ceil(total_duration / segment_duration) + print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {total_segments} Segmenten à {segment_duration}s") mg = MusicGen.get_pretrained(model) sample_rate = mg.sample_rate @@ -94,7 +96,7 @@ class MusicGenLongNode: # Erstes Segment gen_params = dict(use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, top_p=0.0) mg.set_generation_params(duration=segment_duration, **gen_params) - print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}") + print(f"[MusicGenLong] Segment 1 / {total_segments}") first = mg.generate([prompt]) segments = [first[0].cpu()] generated = segment_duration @@ -114,12 +116,13 @@ class MusicGenLongNode: generated += next_dur seg_num += 1 - # Crossfade und zusammenfügen + # Crossfade und zusammenfügen (funktioniert für Mono und Stereo) crossfade_samples = context_samples result = segments[0] + num_channels = result.shape[0] for seg in segments[1:]: - fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0) - fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0) + fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1) + fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0).expand(num_channels, -1) overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1) @@ -162,6 +165,7 @@ class AudioGenNode: ag = AudioGen.get_pretrained("facebook/audiogen-medium") ag.set_generation_params( duration=duration, + use_sampling=True, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k, diff --git a/comfyui-audio/workflow_music.json b/comfyui-audio/workflow_music.json index bf4eca9..49638db 100644 --- a/comfyui-audio/workflow_music.json +++ b/comfyui-audio/workflow_music.json @@ -86,10 +86,10 @@ { "id": 2, "type": "AudioUpsample", - "pos": [580, 100], + "pos": [580, 620], "size": {"0": 300, "1": 120}, "flags": {}, - "order": 1, + "order": 4, "mode": 0, "inputs": [ {"name": "audio_file", "type": "AUDIO_PATH", "link": 1} @@ -103,10 +103,10 @@ { "id": 3, "type": "AudioPreview", - "pos": [930, 100], + "pos": [930, 620], "size": {"0": 250, "1": 80}, "flags": {}, - "order": 2, + "order": 5, "mode": 0, "inputs": [ {"name": "audio_file", "type": "AUDIO_PATH", "link": 2}