diff --git a/comfyui-audio/comfyui_audiocraft/nodes.py b/comfyui-audio/comfyui_audiocraft/nodes.py index cfc0ec4..d376562 100644 --- a/comfyui-audio/comfyui_audiocraft/nodes.py +++ b/comfyui-audio/comfyui_audiocraft/nodes.py @@ -54,6 +54,81 @@ class MusicGenNode: return (out_path + ".wav",) +class MusicGenLongNode: + """Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge""" + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "prompt": ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}), + "total_duration": ("FLOAT", {"default": 90.0, "min": 10.0, "max": 300.0, "step": 5.0}), + "segment_duration": ("FLOAT", {"default": 30.0, "min": 10.0, "max": 30.0, "step": 5.0}), + "context_seconds": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}), + "model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],), + "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}), + "temperature": ("FLOAT", {"default": 0.85, "min": 0.1, "max": 2.0, "step": 0.05}), + "cfg_coef": ("FLOAT", {"default": 5.0, "min": 1.0, "max": 10.0, "step": 0.5}), + "top_k": ("INT", {"default": 150, "min": 1, "max": 1000}), + } + } + + RETURN_TYPES = ("AUDIO_PATH",) + RETURN_NAMES = ("audio_file",) + FUNCTION = "generate_long" + CATEGORY = "AudioCraft" + OUTPUT_NODE = True + + def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k): + from audiocraft.models import MusicGen + from audiocraft.data.audio import audio_write + + torch.manual_seed(seed) + print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten") + + mg = MusicGen.get_pretrained(model) + sample_rate = mg.sample_rate + context_samples = int(context_seconds * sample_rate) + + # Erstes Segment + mg.set_generation_params(duration=segment_duration, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k) + print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}") + first = mg.generate([prompt]) + segments = [first[0].cpu()] + generated = segment_duration + + # Weitere Segmente via Continuation + seg_num = 2 + while generated < total_duration: + remaining = total_duration - generated + next_dur = min(segment_duration, remaining) + mg.set_generation_params(duration=next_dur, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k) + + # Letzten context_seconds des vorherigen Segments als Kontext + context = segments[-1][:, -context_samples:] + print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)") + cont = mg.generate_continuation(context, sample_rate, [prompt]) + segments.append(cont[0].cpu()) + generated += next_dur + seg_num += 1 + + # Crossfade und zusammenfügen + crossfade_samples = context_samples + result = segments[0] + for seg in segments[1:]: + fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0) + fade_in = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0) + overlap = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in + result = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1) + + output_dir = "/app/ComfyUI/output/audio" + os.makedirs(output_dir, exist_ok=True) + filename = f"musicgen_long_{int(time.time())}" + out_path = os.path.join(output_dir, filename) + audio_write(out_path, result, sample_rate, strategy="loudness") + print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)") + return (out_path + ".wav",) + + class AudioGenNode: @classmethod def INPUT_TYPES(cls): @@ -145,15 +220,17 @@ class AudioPreviewNode: NODE_CLASS_MAPPINGS = { - "MusicGen": MusicGenNode, - "AudioGen": AudioGenNode, - "AudioUpsample": AudioUpsampleNode, - "AudioPreview": AudioPreviewNode, + "MusicGen": MusicGenNode, + "MusicGenLong": MusicGenLongNode, + "AudioGen": AudioGenNode, + "AudioUpsample": AudioUpsampleNode, + "AudioPreview": AudioPreviewNode, } NODE_DISPLAY_NAME_MAPPINGS = { - "MusicGen": "MusicGen (Musik)", - "AudioGen": "AudioGen (Sound Effects)", - "AudioUpsample": "Audio Upsample (Qualität)", - "AudioPreview": "Audio Preview", + "MusicGen": "MusicGen (Musik)", + "MusicGenLong": "MusicGen Long (Lange Musik)", + "AudioGen": "AudioGen (Sound Effects)", + "AudioUpsample": "Audio Upsample (Qualität)", + "AudioPreview": "Audio Preview", } diff --git a/comfyui-audio/workflow_music.json b/comfyui-audio/workflow_music.json index 029c393..bf4eca9 100644 --- a/comfyui-audio/workflow_music.json +++ b/comfyui-audio/workflow_music.json @@ -1,14 +1,71 @@ { - "last_node_id": 3, - "last_link_id": 2, + "last_node_id": 6, + "last_link_id": 4, "nodes": [ + { + "id": 4, + "type": "MusicGenLong", + "pos": [50, 100], + "size": {"0": 480, "1": 420}, + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0} + ], + "properties": {"Node name for S&R": "MusicGenLong"}, + "widgets_values": [ + "dark fantasy orchestral music, deep cello strings, french horn melody, slow war drums, mysterious medieval atmosphere, epic cinematic, consistent dark tone", + 90.0, + 30.0, + 5.0, + "facebook/musicgen-stereo-medium", + 0, + 0.85, + 5.0, + 150 + ] + }, + { + "id": 5, + "type": "AudioUpsample", + "pos": [580, 100], + "size": {"0": 300, "1": 120}, + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 3} + ], + "outputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0} + ], + "properties": {"Node name for S&R": "AudioUpsample"}, + "widgets_values": [48000] + }, + { + "id": 6, + "type": "AudioPreview", + "pos": [930, 100], + "size": {"0": 250, "1": 80}, + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + {"name": "audio_file", "type": "AUDIO_PATH", "link": 4} + ], + "outputs": [], + "properties": {"Node name for S&R": "AudioPreview"}, + "widgets_values": [] + }, { "id": 1, "type": "MusicGen", - "pos": [50, 100], + "pos": [50, 620], "size": {"0": 480, "1": 370}, "flags": {}, - "order": 0, + "order": 3, "mode": 0, "inputs": [], "outputs": [ @@ -16,14 +73,14 @@ ], "properties": {"Node name for S&R": "MusicGen"}, "widgets_values": [ - "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM", + "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious medieval atmosphere, epic cinematic", 30.0, "facebook/musicgen-stereo-medium", 0, - 0.9, + 0.85, 5.0, - 250, - 18.0 + 150, + 8.0 ] }, { @@ -61,19 +118,25 @@ ], "links": [ [1, 1, 0, 2, 0, "AUDIO_PATH"], - [2, 2, 0, 3, 0, "AUDIO_PATH"] + [2, 2, 0, 3, 0, "AUDIO_PATH"], + [3, 4, 0, 5, 0, "AUDIO_PATH"], + [4, 5, 0, 6, 0, "AUDIO_PATH"] ], "groups": [ { - "title": "Musik Pipeline: MusicGen → Upsample 48kHz → Preview", - "bounding": [30, 65, 1180, 180], + "title": "Lange Musik (MusicGenLong) → empfohlen für 60-120s", + "bounding": [30, 65, 1180, 500], "color": "#3d2a5a", "font_size": 14 + }, + { + "title": "Kurze Musik (MusicGen) → für 10-30s Clips", + "bounding": [30, 585, 1180, 460], + "color": "#2a2a5a", + "font_size": 14 } ], "config": {}, - "extra": { - "notes": "Tipps:\n- temperature 0.8-0.9 = konsistenter Sound\n- cfg_coef 5.0 = mehr am Prompt orientiert\n- extend_stride 18 = keine Lücken bei langen Tracks\n- stereo-large für beste Qualität (6GB VRAM)" - }, + "extra": {}, "version": 0.4 }