ComfyUI: MusicGenLong Node fuer nahtlose lange Musik

- MusicGenLong: generiert beliebig lange Musik via generate_continuation - Segmente mit Crossfade zusammengefuegt, kein Qualitaetsverlust - workflow_music.json: MusicGenLong oben (60-120s), MusicGen unten (10-30s) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 15:30:03 +01:00 · 2026-03-21 15:30:03 +01:00 · ee9636684d
commit ee9636684d
parent a3c870127a
2 changed files with 162 additions and 22 deletions
--- a/comfyui-audio/comfyui_audiocraft/nodes.py
+++ b/comfyui-audio/comfyui_audiocraft/nodes.py
@ -54,6 +54,81 @@ class MusicGenNode:
        return (out_path + ".wav",)
 class MusicGenLongNode:
    """Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                "prompt":           ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
                "total_duration":   ("FLOAT",  {"default": 90.0,  "min": 10.0, "max": 300.0, "step": 5.0}),
                "segment_duration": ("FLOAT",  {"default": 30.0,  "min": 10.0, "max": 30.0,  "step": 5.0}),
                "context_seconds":  ("FLOAT",  {"default": 5.0,   "min": 1.0,  "max": 10.0,  "step": 0.5}),
                "model":            (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
                "seed":             ("INT",    {"default": 0,     "min": 0,    "max": 2**32 - 1}),
                "temperature":      ("FLOAT",  {"default": 0.85,  "min": 0.1,  "max": 2.0,   "step": 0.05}),
                "cfg_coef":         ("FLOAT",  {"default": 5.0,   "min": 1.0,  "max": 10.0,  "step": 0.5}),
                "top_k":            ("INT",    {"default": 150,   "min": 1,    "max": 1000}),
            }
        }
    RETURN_TYPES = ("AUDIO_PATH",)
    RETURN_NAMES = ("audio_file",)
    FUNCTION = "generate_long"
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True
    def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
        from audiocraft.models import MusicGen
        from audiocraft.data.audio import audio_write
        torch.manual_seed(seed)
        print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
        mg = MusicGen.get_pretrained(model)
        sample_rate = mg.sample_rate
        context_samples = int(context_seconds * sample_rate)
        # Erstes Segment
        mg.set_generation_params(duration=segment_duration, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
        print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
        first = mg.generate([prompt])
        segments = [first[0].cpu()]
        generated = segment_duration
        # Weitere Segmente via Continuation
        seg_num = 2
        while generated < total_duration:
            remaining = total_duration - generated
            next_dur = min(segment_duration, remaining)
            mg.set_generation_params(duration=next_dur, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
            # Letzten context_seconds des vorherigen Segments als Kontext
            context = segments[-1][:, -context_samples:]
            print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
            cont = mg.generate_continuation(context, sample_rate, [prompt])
            segments.append(cont[0].cpu())
            generated += next_dur
            seg_num += 1
        # Crossfade und zusammenfügen
        crossfade_samples = context_samples
        result = segments[0]
        for seg in segments[1:]:
            fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
            fade_in  = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
            overlap  = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
            result   = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
        output_dir = "/app/ComfyUI/output/audio"
        os.makedirs(output_dir, exist_ok=True)
        filename = f"musicgen_long_{int(time.time())}"
        out_path = os.path.join(output_dir, filename)
        audio_write(out_path, result, sample_rate, strategy="loudness")
        print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)")
        return (out_path + ".wav",)
 class AudioGenNode:
    @classmethod
    def INPUT_TYPES(cls):
@ -145,15 +220,17 @@ class AudioPreviewNode:
 NODE_CLASS_MAPPINGS = {
-    "MusicGen":       MusicGenNode,
+    "MusicGen":        MusicGenNode,
-    "AudioGen":       AudioGenNode,
+    "MusicGenLong":    MusicGenLongNode,
-    "AudioUpsample":  AudioUpsampleNode,
+    "AudioGen":        AudioGenNode,
-    "AudioPreview":   AudioPreviewNode,
+    "AudioUpsample":   AudioUpsampleNode,
    "AudioPreview":    AudioPreviewNode,
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "MusicGen":       "MusicGen (Musik)",
+    "MusicGen":        "MusicGen (Musik)",
-    "AudioGen":       "AudioGen (Sound Effects)",
+    "MusicGenLong":    "MusicGen Long (Lange Musik)",
-    "AudioUpsample":  "Audio Upsample (Qualität)",
+    "AudioGen":        "AudioGen (Sound Effects)",
-    "AudioPreview":   "Audio Preview",
+    "AudioUpsample":   "Audio Upsample (Qualität)",
    "AudioPreview":    "Audio Preview",
 }
--- a/comfyui-audio/workflow_music.json
+++ b/comfyui-audio/workflow_music.json
@ -1,14 +1,71 @@
 {
-  "last_node_id": 3,
+  "last_node_id": 6,
-  "last_link_id": 2,
+  "last_link_id": 4,
  "nodes": [
    {
      "id": 4,
      "type": "MusicGenLong",
      "pos": [50, 100],
      "size": {"0": 480, "1": 420},
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "MusicGenLong"},
      "widgets_values": [
        "dark fantasy orchestral music, deep cello strings, french horn melody, slow war drums, mysterious medieval atmosphere, epic cinematic, consistent dark tone",
        90.0,
        30.0,
        5.0,
        "facebook/musicgen-stereo-medium",
        0,
        0.85,
        5.0,
        150
      ]
    },
    {
      "id": 5,
      "type": "AudioUpsample",
      "pos": [580, 100],
      "size": {"0": 300, "1": 120},
      "flags": {},
      "order": 1,
      "mode": 0,
      "inputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
      ],
      "outputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "AudioUpsample"},
      "widgets_values": [48000]
    },
    {
      "id": 6,
      "type": "AudioPreview",
      "pos": [930, 100],
      "size": {"0": 250, "1": 80},
      "flags": {},
      "order": 2,
      "mode": 0,
      "inputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
      ],
      "outputs": [],
      "properties": {"Node name for S&R": "AudioPreview"},
      "widgets_values": []
    },
    {
      "id": 1,
      "type": "MusicGen",
-      "pos": [50, 100],
+      "pos": [50, 620],
      "size": {"0": 480, "1": 370},
      "flags": {},
-      "order": 0,
+      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
@ -16,14 +73,14 @@
      ],
      "properties": {"Node name for S&R": "MusicGen"},
      "widgets_values": [
-        "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
+        "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious medieval atmosphere, epic cinematic",
        30.0,
        "facebook/musicgen-stereo-medium",
        0,
-        0.9,
+        0.85,
        5.0,
-        250,
+        150,
-        18.0
+        8.0
      ]
    },
    {
@ -61,19 +118,25 @@
  ],
  "links": [
    [1, 1, 0, 2, 0, "AUDIO_PATH"],
-    [2, 2, 0, 3, 0, "AUDIO_PATH"]
+    [2, 2, 0, 3, 0, "AUDIO_PATH"],
    [3, 4, 0, 5, 0, "AUDIO_PATH"],
    [4, 5, 0, 6, 0, "AUDIO_PATH"]
  ],
  "groups": [
    {
-      "title": "Musik Pipeline: MusicGen → Upsample 48kHz → Preview",
+      "title": "Lange Musik (MusicGenLong) → empfohlen für 60-120s",
-      "bounding": [30, 65, 1180, 180],
+      "bounding": [30, 65, 1180, 500],
      "color": "#3d2a5a",
      "font_size": 14
    },
    {
      "title": "Kurze Musik (MusicGen) → für 10-30s Clips",
      "bounding": [30, 585, 1180, 460],
      "color": "#2a2a5a",
      "font_size": 14
    }
  ],
  "config": {},
-  "extra": {
+  "extra": {},
    "notes": "Tipps:\n- temperature 0.8-0.9 = konsistenter Sound\n- cfg_coef 5.0 = mehr am Prompt orientiert\n- extend_stride 18 = keine Lücken bei langen Tracks\n- stereo-large für beste Qualität (6GB VRAM)"
  },
  "version": 0.4
 }