ComfyUI AudioCraft: verbesserte Nodes + Workflow Template

- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter - AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet - AudioPreview Node: Vorschau direkt in ComfyUI UI - workflow_game_audio.json: Template mit Musik + SFX Pipeline - Standardmodell: musicgen-stereo-medium (besserer Sound) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 14:22:45 +01:00 · 2026-03-21 14:22:45 +01:00 · 4feff43758
commit 4feff43758
parent b28b4ddc85
2 changed files with 164 additions and 62 deletions
--- a/comfyui-audio/comfyui_audiocraft/nodes.py
+++ b/comfyui-audio/comfyui_audiocraft/nodes.py
@ -9,10 +9,14 @@ class MusicGenNode:
    def INPUT_TYPES(cls):
        return {
            "required": {
-                "prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
-                "duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}),
-                "model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],),
-                "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
+                "prompt":        ("STRING",  {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
+                "duration":      ("FLOAT",   {"default": 10.0, "min": 1.0,  "max": 120.0, "step": 1.0}),
+                "model":         (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
+                "seed":          ("INT",     {"default": 0,    "min": 0,    "max": 2**32 - 1}),
+                "temperature":   ("FLOAT",   {"default": 1.0,  "min": 0.1,  "max": 2.0,   "step": 0.05}),
+                "cfg_coef":      ("FLOAT",   {"default": 3.0,  "min": 1.0,  "max": 10.0,  "step": 0.5}),
+                "top_k":         ("INT",     {"default": 250,  "min": 1,    "max": 1000}),
+                "extend_stride": ("FLOAT",   {"default": 18.0, "min": 1.0,  "max": 30.0,  "step": 1.0}),
            }
        }

@ -22,7 +26,7 @@ class MusicGenNode:
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True

-    def generate(self, prompt, duration, model, seed):
+    def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
        from audiocraft.models import MusicGen
        from audiocraft.data.audio import audio_write

@ -30,7 +34,13 @@ class MusicGenNode:

        print(f"[MusicGen] Loading model: {model}")
        mg = MusicGen.get_pretrained(model)
-        mg.set_generation_params(duration=duration)
+        mg.set_generation_params(
+            duration=duration,
+            temperature=temperature,
+            cfg_coef=cfg_coef,
+            top_k=top_k,
+            extend_stride=extend_stride,
+        )

        print(f"[MusicGen] Generating: {prompt}")
        wav = mg.generate([prompt])
@ -49,9 +59,12 @@ class AudioGenNode:
    def INPUT_TYPES(cls):
        return {
            "required": {
-                "prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
-                "duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
-                "seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
+                "prompt":      ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
+                "duration":    ("FLOAT",  {"default": 3.0,  "min": 0.5,  "max": 30.0, "step": 0.5}),
+                "seed":        ("INT",    {"default": 0,    "min": 0,    "max": 2**32 - 1}),
+                "temperature": ("FLOAT",  {"default": 1.0,  "min": 0.1,  "max": 2.0,  "step": 0.05}),
+                "cfg_coef":    ("FLOAT",  {"default": 3.0,  "min": 1.0,  "max": 10.0, "step": 0.5}),
+                "top_k":       ("INT",    {"default": 250,  "min": 1,    "max": 1000}),
            }
        }

@ -61,7 +74,7 @@ class AudioGenNode:
    CATEGORY = "AudioCraft"
    OUTPUT_NODE = True

-    def generate(self, prompt, duration, seed):
+    def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
        from audiocraft.models import AudioGen
        from audiocraft.data.audio import audio_write

@ -69,7 +82,12 @@ class AudioGenNode:

        print(f"[AudioGen] Loading model...")
        ag = AudioGen.get_pretrained("facebook/audiogen-medium")
-        ag.set_generation_params(duration=duration)
+        ag.set_generation_params(
+            duration=duration,
+            temperature=temperature,
+            cfg_coef=cfg_coef,
+            top_k=top_k,
+        )

        print(f"[AudioGen] Generating: {prompt}")
        wav = ag.generate([prompt])
@ -83,12 +101,59 @@ class AudioGenNode:
        return (out_path + ".wav",)


+class AudioUpsampleNode:
+    """Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "audio_file":    ("AUDIO_PATH",),
+                "target_sr":     ([48000, 44100], {"default": 48000}),
+            }
+        }
+
+    RETURN_TYPES = ("AUDIO_PATH",)
+    RETURN_NAMES = ("audio_file",)
+    FUNCTION = "upsample"
+    CATEGORY = "AudioCraft"
+    OUTPUT_NODE = True
+
+    def upsample(self, audio_file, target_sr):
+        wav, sr = torchaudio.load(audio_file)
+        if sr != target_sr:
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
+            wav = resampler(wav)
+        out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
+        torchaudio.save(out_path, wav, target_sr)
+        print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
+        return (out_path,)
+
+
+class AudioPreviewNode:
+    """Zeigt Audiodatei in der ComfyUI-UI an"""
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {"required": {"audio_file": ("AUDIO_PATH",)}}
+
+    RETURN_TYPES = ()
+    FUNCTION = "preview"
+    CATEGORY = "AudioCraft"
+    OUTPUT_NODE = True
+
+    def preview(self, audio_file):
+        return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
+
+
 NODE_CLASS_MAPPINGS = {
-    "MusicGen": MusicGenNode,
-    "AudioGen": AudioGenNode,
+    "MusicGen":       MusicGenNode,
+    "AudioGen":       AudioGenNode,
+    "AudioUpsample":  AudioUpsampleNode,
+    "AudioPreview":   AudioPreviewNode,
 }

 NODE_DISPLAY_NAME_MAPPINGS = {
-    "MusicGen": "MusicGen (Musik)",
-    "AudioGen": "AudioGen (Sound Effects)",
+    "MusicGen":       "MusicGen (Musik)",
+    "AudioGen":       "AudioGen (Sound Effects)",
+    "AudioUpsample":  "Audio Upsample (Qualität)",
+    "AudioPreview":   "Audio Preview",
 }
--- a/comfyui-audio/workflow_game_audio.json
+++ b/comfyui-audio/workflow_game_audio.json
@ -1,98 +1,135 @@
 {
-  "last_node_id": 4,
-  "last_link_id": 0,
+  "last_node_id": 6,
+  "last_link_id": 4,
  "nodes": [
    {
      "id": 1,
      "type": "MusicGen",
-      "pos": [100, 100],
-      "size": {"0": 400, "1": 200},
+      "pos": [50, 100],
+      "size": {"0": 420, "1": 370},
      "flags": {},
      "order": 0,
      "mode": 0,
      "inputs": [],
      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "MusicGen"},
      "widgets_values": [
-        "dark dungeon ambience, slow, mysterious, atmospheric, medieval",
-        10.0,
-        "facebook/musicgen-small",
-        0
+        "epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
+        30.0,
+        "facebook/musicgen-stereo-medium",
+        0,
+        0.9,
+        5.0,
+        250,
+        18.0
      ]
    },
    {
      "id": 2,
-      "type": "MusicGen",
-      "pos": [100, 380],
-      "size": {"0": 400, "1": 200},
+      "type": "AudioUpsample",
+      "pos": [520, 100],
+      "size": {"0": 300, "1": 120},
      "flags": {},
      "order": 1,
      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
      ],
-      "properties": {"Node name for S&R": "MusicGen"},
-      "widgets_values": [
-        "epic battle music, drums, dark fantasy, intense, orchestral",
-        15.0,
-        "facebook/musicgen-small",
-        42
-      ]
+      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
+      ],
+      "properties": {"Node name for S&R": "AudioUpsample"},
+      "widgets_values": [48000]
    },
    {
      "id": 3,
-      "type": "AudioGen",
-      "pos": [100, 660],
-      "size": {"0": 400, "1": 160},
+      "type": "AudioPreview",
+      "pos": [870, 100],
+      "size": {"0": 250, "1": 80},
      "flags": {},
      "order": 2,
      "mode": 0,
-      "inputs": [],
-      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
      ],
-      "properties": {"Node name for S&R": "AudioGen"},
-      "widgets_values": [
-        "sword slash metal clang sound effect",
-        3.0,
-        0
-      ]
+      "outputs": [],
+      "properties": {"Node name for S&R": "AudioPreview"},
+      "widgets_values": []
    },
    {
      "id": 4,
      "type": "AudioGen",
-      "pos": [100, 880],
-      "size": {"0": 400, "1": 160},
+      "pos": [50, 560],
+      "size": {"0": 420, "1": 330},
      "flags": {},
      "order": 3,
      "mode": 0,
      "inputs": [],
      "outputs": [
-        {"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
      ],
      "properties": {"Node name for S&R": "AudioGen"},
      "widgets_values": [
-        "footsteps on stone dungeon floor",
-        3.0,
-        1
+        "fast sword swing whoosh attack sound effect combat",
+        2.0,
+        0,
+        1.0,
+        4.0,
+        250
      ]
+    },
+    {
+      "id": 5,
+      "type": "AudioUpsample",
+      "pos": [520, 560],
+      "size": {"0": 300, "1": 120},
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
+      ],
+      "outputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
+      ],
+      "properties": {"Node name for S&R": "AudioUpsample"},
+      "widgets_values": [48000]
+    },
+    {
+      "id": 6,
+      "type": "AudioPreview",
+      "pos": [870, 560],
+      "size": {"0": 250, "1": 80},
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
+      ],
+      "outputs": [],
+      "properties": {"Node name for S&R": "AudioPreview"},
+      "widgets_values": []
    }
  ],
-  "links": [],
+  "links": [
+    [1, 1, 0, 2, 0, "AUDIO_PATH"],
+    [2, 2, 0, 3, 0, "AUDIO_PATH"],
+    [3, 4, 0, 5, 0, "AUDIO_PATH"],
+    [4, 5, 0, 6, 0, "AUDIO_PATH"]
+  ],
  "groups": [
    {
-      "title": "Musik",
-      "bounding": [80, 75, 440, 480],
-      "color": "#8B0000",
+      "title": "Musik (MusicGen → Upsample → Preview)",
+      "bounding": [30, 65, 1120, 420],
+      "color": "#3d2a5a",
      "font_size": 14
    },
    {
-      "title": "Sound Effects",
-      "bounding": [80, 635, 440, 430],
-      "color": "#1a3a4a",
+      "title": "Sound Effects (AudioGen → Upsample → Preview)",
+      "bounding": [30, 525, 1120, 390],
+      "color": "#1a3a2a",
      "font_size": 14
    }
  ],