ComfyUI: MusicGenLong Node fuer nahtlose lange Musik

- MusicGenLong: generiert beliebig lange Musik via generate_continuation - Segmente mit Crossfade zusammengefuegt, kein Qualitaetsverlust - workflow_music.json: MusicGenLong oben (60-120s), MusicGen unten (10-30s) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 15:30:03 +01:00 · 2026-03-21 15:30:03 +01:00 · ee9636684d
commit ee9636684d
parent a3c870127a
2 changed files with 162 additions and 22 deletions
--- a/comfyui-audio/comfyui_audiocraft/nodes.py
+++ b/comfyui-audio/comfyui_audiocraft/nodes.py
@ -54,6 +54,81 @@ class MusicGenNode:
        return (out_path + ".wav",)


+class MusicGenLongNode:
+    """Generiert lange Musik in Segmenten mit generate_continuation für nahtlose Übergänge"""
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "prompt":           ("STRING", {"multiline": True, "default": "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious ambient"}),
+                "total_duration":   ("FLOAT",  {"default": 90.0,  "min": 10.0, "max": 300.0, "step": 5.0}),
+                "segment_duration": ("FLOAT",  {"default": 30.0,  "min": 10.0, "max": 30.0,  "step": 5.0}),
+                "context_seconds":  ("FLOAT",  {"default": 5.0,   "min": 1.0,  "max": 10.0,  "step": 0.5}),
+                "model":            (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
+                "seed":             ("INT",    {"default": 0,     "min": 0,    "max": 2**32 - 1}),
+                "temperature":      ("FLOAT",  {"default": 0.85,  "min": 0.1,  "max": 2.0,   "step": 0.05}),
+                "cfg_coef":         ("FLOAT",  {"default": 5.0,   "min": 1.0,  "max": 10.0,  "step": 0.5}),
+                "top_k":            ("INT",    {"default": 150,   "min": 1,    "max": 1000}),
+            }
+        }
+
+    RETURN_TYPES = ("AUDIO_PATH",)
+    RETURN_NAMES = ("audio_file",)
+    FUNCTION = "generate_long"
+    CATEGORY = "AudioCraft"
+    OUTPUT_NODE = True
+
+    def generate_long(self, prompt, total_duration, segment_duration, context_seconds, model, seed, temperature, cfg_coef, top_k):
+        from audiocraft.models import MusicGen
+        from audiocraft.data.audio import audio_write
+
+        torch.manual_seed(seed)
+        print(f"[MusicGenLong] Model: {model}, Ziel: {total_duration}s in {segment_duration}s Segmenten")
+
+        mg = MusicGen.get_pretrained(model)
+        sample_rate = mg.sample_rate
+        context_samples = int(context_seconds * sample_rate)
+
+        # Erstes Segment
+        mg.set_generation_params(duration=segment_duration, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
+        print(f"[MusicGenLong] Segment 1 / {int(total_duration / segment_duration) + 1}")
+        first = mg.generate([prompt])
+        segments = [first[0].cpu()]
+        generated = segment_duration
+
+        # Weitere Segmente via Continuation
+        seg_num = 2
+        while generated < total_duration:
+            remaining = total_duration - generated
+            next_dur = min(segment_duration, remaining)
+            mg.set_generation_params(duration=next_dur, temperature=temperature, cfg_coef=cfg_coef, top_k=top_k)
+
+            # Letzten context_seconds des vorherigen Segments als Kontext
+            context = segments[-1][:, -context_samples:]
+            print(f"[MusicGenLong] Segment {seg_num} ({next_dur}s)")
+            cont = mg.generate_continuation(context, sample_rate, [prompt])
+            segments.append(cont[0].cpu())
+            generated += next_dur
+            seg_num += 1
+
+        # Crossfade und zusammenfügen
+        crossfade_samples = context_samples
+        result = segments[0]
+        for seg in segments[1:]:
+            fade_out = torch.linspace(1.0, 0.0, crossfade_samples).unsqueeze(0)
+            fade_in  = torch.linspace(0.0, 1.0, crossfade_samples).unsqueeze(0)
+            overlap  = result[:, -crossfade_samples:] * fade_out + seg[:, :crossfade_samples] * fade_in
+            result   = torch.cat([result[:, :-crossfade_samples], overlap, seg[:, crossfade_samples:]], dim=1)
+
+        output_dir = "/app/ComfyUI/output/audio"
+        os.makedirs(output_dir, exist_ok=True)
+        filename = f"musicgen_long_{int(time.time())}"
+        out_path = os.path.join(output_dir, filename)
+        audio_write(out_path, result, sample_rate, strategy="loudness")
+        print(f"[MusicGenLong] Fertig: {out_path}.wav ({result.shape[1]/sample_rate:.1f}s)")
+        return (out_path + ".wav",)
+
+
 class AudioGenNode:
    @classmethod
    def INPUT_TYPES(cls):
@ -146,6 +221,7 @@ class AudioPreviewNode:

 NODE_CLASS_MAPPINGS = {
    "MusicGen":        MusicGenNode,
+    "MusicGenLong":    MusicGenLongNode,
    "AudioGen":        AudioGenNode,
    "AudioUpsample":   AudioUpsampleNode,
    "AudioPreview":    AudioPreviewNode,
@ -153,6 +229,7 @@ NODE_CLASS_MAPPINGS = {

 NODE_DISPLAY_NAME_MAPPINGS = {
    "MusicGen":        "MusicGen (Musik)",
+    "MusicGenLong":    "MusicGen Long (Lange Musik)",
    "AudioGen":        "AudioGen (Sound Effects)",
    "AudioUpsample":   "Audio Upsample (Qualität)",
    "AudioPreview":    "Audio Preview",
--- a/comfyui-audio/workflow_music.json
+++ b/comfyui-audio/workflow_music.json
@ -1,14 +1,71 @@
 {
-  "last_node_id": 3,
-  "last_link_id": 2,
+  "last_node_id": 6,
+  "last_link_id": 4,
  "nodes": [
+    {
+      "id": 4,
+      "type": "MusicGenLong",
+      "pos": [50, 100],
+      "size": {"0": 480, "1": 420},
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
+      ],
+      "properties": {"Node name for S&R": "MusicGenLong"},
+      "widgets_values": [
+        "dark fantasy orchestral music, deep cello strings, french horn melody, slow war drums, mysterious medieval atmosphere, epic cinematic, consistent dark tone",
+        90.0,
+        30.0,
+        5.0,
+        "facebook/musicgen-stereo-medium",
+        0,
+        0.85,
+        5.0,
+        150
+      ]
+    },
+    {
+      "id": 5,
+      "type": "AudioUpsample",
+      "pos": [580, 100],
+      "size": {"0": 300, "1": 120},
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
+      ],
+      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
+      ],
+      "properties": {"Node name for S&R": "AudioUpsample"},
+      "widgets_values": [48000]
+    },
+    {
+      "id": 6,
+      "type": "AudioPreview",
+      "pos": [930, 100],
+      "size": {"0": 250, "1": 80},
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
+      ],
+      "outputs": [],
+      "properties": {"Node name for S&R": "AudioPreview"},
+      "widgets_values": []
+    },
    {
      "id": 1,
      "type": "MusicGen",
-      "pos": [50, 100],
+      "pos": [50, 620],
      "size": {"0": 480, "1": 370},
      "flags": {},
-      "order": 0,
+      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
@ -16,14 +73,14 @@
      ],
      "properties": {"Node name for S&R": "MusicGen"},
      "widgets_values": [
-        "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
+        "dark fantasy orchestral music, deep cello, french horn, slow war drums, mysterious medieval atmosphere, epic cinematic",
        30.0,
        "facebook/musicgen-stereo-medium",
        0,
-        0.9,
+        0.85,
        5.0,
-        250,
-        18.0
+        150,
+        8.0
      ]
    },
    {
@ -61,19 +118,25 @@
  ],
  "links": [
    [1, 1, 0, 2, 0, "AUDIO_PATH"],
-    [2, 2, 0, 3, 0, "AUDIO_PATH"]
+    [2, 2, 0, 3, 0, "AUDIO_PATH"],
+    [3, 4, 0, 5, 0, "AUDIO_PATH"],
+    [4, 5, 0, 6, 0, "AUDIO_PATH"]
  ],
  "groups": [
    {
-      "title": "Musik Pipeline: MusicGen → Upsample 48kHz → Preview",
-      "bounding": [30, 65, 1180, 180],
+      "title": "Lange Musik (MusicGenLong) → empfohlen für 60-120s",
+      "bounding": [30, 65, 1180, 500],
      "color": "#3d2a5a",
      "font_size": 14
+    },
+    {
+      "title": "Kurze Musik (MusicGen) → für 10-30s Clips",
+      "bounding": [30, 585, 1180, 460],
+      "color": "#2a2a5a",
+      "font_size": 14
    }
  ],
  "config": {},
-  "extra": {
-    "notes": "Tipps:\n- temperature 0.8-0.9 = konsistenter Sound\n- cfg_coef 5.0 = mehr am Prompt orientiert\n- extend_stride 18 = keine Lücken bei langen Tracks\n- stereo-large für beste Qualität (6GB VRAM)"
-  },
+  "extra": {},
  "version": 0.4
 }