ComfyUI AudioCraft: verbesserte Nodes + Workflow Template

- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter - AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet - AudioPreview Node: Vorschau direkt in ComfyUI UI - workflow_game_audio.json: Template mit Musik + SFX Pipeline - Standardmodell: musicgen-stereo-medium (besserer Sound) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 14:22:45 +01:00 · 2026-03-21 14:22:45 +01:00 · 4feff43758
commit 4feff43758
parent b28b4ddc85
2 changed files with 164 additions and 62 deletions
--- a/comfyui-audio/comfyui_audiocraft/nodes.py
+++ b/comfyui-audio/comfyui_audiocraft/nodes.py
@ -9,10 +9,14 @@ class MusicGenNode:
    def INPUT_TYPES(cls):
        return {
            "required": {
-                "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
+                "prompt":        ("STRING",  {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
-                "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}),
+                "duration":      ("FLOAT",   {"default": 10.0, "min": 1.0,  "max": 120.0, "step": 1.0}),
-                "model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],),
+                "model":         (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
-                "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
+                "seed":          ("INT",     {"default": 0,    "min": 0,    "max": 2**32 - 1}),
                "temperature":   ("FLOAT",   {"default": 1.0,  "min": 0.1,  "max": 2.0,   "step": 0.05}),
                "cfg_coef":      ("FLOAT",   {"default": 3.0,  "min": 1.0,  "max": 10.0,  "step": 0.5}),
                "top_k":         ("INT",     {"default": 250,  "min": 1,    "max": 1000}),
                "extend_stride": ("FLOAT",   {"default": 18.0, "min": 1.0,  "max": 30.0,  "step": 1.0}),
            }
        }
@ -22,7 +26,7 @@ class MusicGenNode:
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True
-    def generate(self, prompt, duration, model, seed):
+    def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
        from audiocraft.models import MusicGen
        from audiocraft.data.audio import audio_write
@ -30,7 +34,13 @@ class MusicGenNode:
        print(f"[MusicGen] Loading model: {model}")
        mg = MusicGen.get_pretrained(model)
-        mg.set_generation_params(duration=duration)
+        mg.set_generation_params(
            duration=duration,
            temperature=temperature,
            cfg_coef=cfg_coef,
            top_k=top_k,
            extend_stride=extend_stride,
        )
        print(f"[MusicGen] Generating: {prompt}")
        wav = mg.generate([prompt])
@ -49,9 +59,12 @@ class AudioGenNode:
    def INPUT_TYPES(cls):
        return {
            "required": {
-                "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
+                "prompt":      ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
-                "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
+                "duration":    ("FLOAT",  {"default": 3.0,  "min": 0.5,  "max": 30.0, "step": 0.5}),
-                "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
+                "seed":        ("INT",    {"default": 0,    "min": 0,    "max": 2**32 - 1}),
                "temperature": ("FLOAT",  {"default": 1.0,  "min": 0.1,  "max": 2.0,  "step": 0.05}),
                "cfg_coef":    ("FLOAT",  {"default": 3.0,  "min": 1.0,  "max": 10.0, "step": 0.5}),
                "top_k":       ("INT",    {"default": 250,  "min": 1,    "max": 1000}),
            }
        }
@ -61,7 +74,7 @@ class AudioGenNode:
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True
-    def generate(self, prompt, duration, seed):
+    def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
        from audiocraft.models import AudioGen
        from audiocraft.data.audio import audio_write
@ -69,7 +82,12 @@ class AudioGenNode:
        print(f"[AudioGen] Loading model...")
        ag = AudioGen.get_pretrained("facebook/audiogen-medium")
-        ag.set_generation_params(duration=duration)
+        ag.set_generation_params(
            duration=duration,
            temperature=temperature,
            cfg_coef=cfg_coef,
            top_k=top_k,
        )
        print(f"[AudioGen] Generating: {prompt}")
        wav = ag.generate([prompt])
@ -83,12 +101,59 @@ class AudioGenNode:
        return (out_path + ".wav",)
 class AudioUpsampleNode:
    """Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
    @classmethod
    def INPUT_TYPES(cls):
        return {
            "required": {
                "audio_file":    ("AUDIO_PATH",),
                "target_sr":     ([48000, 44100], {"default": 48000}),
            }
        }
    RETURN_TYPES = ("AUDIO_PATH",)
    RETURN_NAMES = ("audio_file",)
    FUNCTION = "upsample"
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True
    def upsample(self, audio_file, target_sr):
        wav, sr = torchaudio.load(audio_file)
        if sr != target_sr:
            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
            wav = resampler(wav)
        out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
        torchaudio.save(out_path, wav, target_sr)
        print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
        return (out_path,)
 class AudioPreviewNode:
    """Zeigt Audiodatei in der ComfyUI-UI an"""
    @classmethod
    def INPUT_TYPES(cls):
        return {"required": {"audio_file": ("AUDIO_PATH",)}}
    RETURN_TYPES = ()
    FUNCTION = "preview"
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True
    def preview(self, audio_file):
        return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
 NODE_CLASS_MAPPINGS = {
-    "MusicGen": MusicGenNode,
+    "MusicGen":       MusicGenNode,
-    "AudioGen": AudioGenNode,
+    "AudioGen":       AudioGenNode,
    "AudioUpsample":  AudioUpsampleNode,
    "AudioPreview":   AudioPreviewNode,
 }
 NODE_DISPLAY_NAME_MAPPINGS = {
-    "MusicGen": "MusicGen (Musik)",
+    "MusicGen":       "MusicGen (Musik)",
-    "AudioGen": "AudioGen (Sound Effects)",
+    "AudioGen":       "AudioGen (Sound Effects)",
    "AudioUpsample":  "Audio Upsample (Qualität)",
    "AudioPreview":   "Audio Preview",
 }
--- a/comfyui-audio/workflow_game_audio.json
+++ b/comfyui-audio/workflow_game_audio.json
@ -1,98 +1,135 @@
 {
-  "last_node_id": 4,
+  "last_node_id": 6,
-  "last_link_id": 0,
+  "last_link_id": 4,
  "nodes": [
    {
      "id": 1,
      "type": "MusicGen",
-      "pos": [100, 100],
+      "pos": [50, 100],
-      "size": {"0": 400, "1": 200},
+      "size": {"0": 420, "1": 370},
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "MusicGen"},
      "widgets_values": [
-        "dark dungeon ambience, slow, mysterious, atmospheric, medieval",
+        "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
-        10.0,
+        30.0,
-        "facebook/musicgen-small",
+        "facebook/musicgen-stereo-medium",
-        0
+        0,
        0.9,
        5.0,
        250,
        18.0
      ]
    },
    {
      "id": 2,
-      "type": "MusicGen",
+      "type": "AudioUpsample",
-      "pos": [100, 380],
+      "pos": [520, 100],
-      "size": {"0": 400, "1": 200},
+      "size": {"0": 300, "1": 120},
      "flags": {},
      "order": 1,
      "mode": 0,
-      "inputs": [],
+      "inputs": [
-      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
      ],
-      "properties": {"Node name for S&R": "MusicGen"},
+      "outputs": [
-      "widgets_values": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
-        "epic battle music, drums, dark fantasy, intense, orchestral",
+      ],
-        15.0,
+      "properties": {"Node name for S&R": "AudioUpsample"},
-        "facebook/musicgen-small",
+      "widgets_values": [48000]
        42
      ]
    },
    {
      "id": 3,
-      "type": "AudioGen",
+      "type": "AudioPreview",
-      "pos": [100, 660],
+      "pos": [870, 100],
-      "size": {"0": 400, "1": 160},
+      "size": {"0": 250, "1": 80},
      "flags": {},
      "order": 2,
      "mode": 0,
-      "inputs": [],
+      "inputs": [
-      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
      ],
-      "properties": {"Node name for S&R": "AudioGen"},
+      "outputs": [],
-      "widgets_values": [
+      "properties": {"Node name for S&R": "AudioPreview"},
-        "sword slash metal clang sound effect",
+      "widgets_values": []
        3.0,
        0
      ]
    },
    {
      "id": 4,
      "type": "AudioGen",
-      "pos": [100, 880],
+      "pos": [50, 560],
-      "size": {"0": 400, "1": 160},
+      "size": {"0": 420, "1": 330},
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "AudioGen"},
      "widgets_values": [
-        "footsteps on stone dungeon floor",
+        "fast sword swing whoosh attack sound effect combat",
-        3.0,
+        2.0,
-        1
+        0,
        1.0,
        4.0,
        250
      ]
    },
    {
      "id": 5,
      "type": "AudioUpsample",
      "pos": [520, 560],
      "size": {"0": 300, "1": 120},
      "flags": {},
      "order": 4,
      "mode": 0,
      "inputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
      ],
      "outputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "AudioUpsample"},
      "widgets_values": [48000]
    },
    {
      "id": 6,
      "type": "AudioPreview",
      "pos": [870, 560],
      "size": {"0": 250, "1": 80},
      "flags": {},
      "order": 5,
      "mode": 0,
      "inputs": [
        {"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
      ],
      "outputs": [],
      "properties": {"Node name for S&R": "AudioPreview"},
      "widgets_values": []
    }
  ],
-  "links": [],
+  "links": [
    [1, 1, 0, 2, 0, "AUDIO_PATH"],
    [2, 2, 0, 3, 0, "AUDIO_PATH"],
    [3, 4, 0, 5, 0, "AUDIO_PATH"],
    [4, 5, 0, 6, 0, "AUDIO_PATH"]
  ],
  "groups": [
    {
-      "title": "Musik",
+      "title": "Musik (MusicGen → Upsample → Preview)",
-      "bounding": [80, 75, 440, 480],
+      "bounding": [30, 65, 1120, 420],
-      "color": "#8B0000",
+      "color": "#3d2a5a",
      "font_size": 14
    },
    {
-      "title": "Sound Effects",
+      "title": "Sound Effects (AudioGen → Upsample → Preview)",
-      "bounding": [80, 635, 440, 430],
+      "bounding": [30, 525, 1120, 390],
-      "color": "#1a3a4a",
+      "color": "#1a3a2a",
      "font_size": 14
    }
  ],