ComfyUI AudioCraft: verbesserte Nodes + Workflow Template

- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter
- AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet
- AudioPreview Node: Vorschau direkt in ComfyUI UI
- workflow_game_audio.json: Template mit Musik + SFX Pipeline
- Standardmodell: musicgen-stereo-medium (besserer Sound)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Andre 2026-03-21 14:22:45 +01:00
parent b28b4ddc85
commit 4feff43758
2 changed files with 164 additions and 62 deletions

View file

@ -9,10 +9,14 @@ class MusicGenNode:
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}),
"model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
"extend_stride": ("FLOAT", {"default": 18.0, "min": 1.0, "max": 30.0, "step": 1.0}),
}
}
@ -22,7 +26,7 @@ class MusicGenNode:
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate(self, prompt, duration, model, seed):
def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
@ -30,7 +34,13 @@ class MusicGenNode:
print(f"[MusicGen] Loading model: {model}")
mg = MusicGen.get_pretrained(model)
mg.set_generation_params(duration=duration)
mg.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
extend_stride=extend_stride,
)
print(f"[MusicGen] Generating: {prompt}")
wav = mg.generate([prompt])
@ -49,9 +59,12 @@ class AudioGenNode:
def INPUT_TYPES(cls):
return {
"required": {
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
}
}
@ -61,7 +74,7 @@ class AudioGenNode:
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def generate(self, prompt, duration, seed):
def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
from audiocraft.models import AudioGen
from audiocraft.data.audio import audio_write
@ -69,7 +82,12 @@ class AudioGenNode:
print(f"[AudioGen] Loading model...")
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
ag.set_generation_params(duration=duration)
ag.set_generation_params(
duration=duration,
temperature=temperature,
cfg_coef=cfg_coef,
top_k=top_k,
)
print(f"[AudioGen] Generating: {prompt}")
wav = ag.generate([prompt])
@ -83,12 +101,59 @@ class AudioGenNode:
return (out_path + ".wav",)
class AudioUpsampleNode:
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"audio_file": ("AUDIO_PATH",),
"target_sr": ([48000, 44100], {"default": 48000}),
}
}
RETURN_TYPES = ("AUDIO_PATH",)
RETURN_NAMES = ("audio_file",)
FUNCTION = "upsample"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def upsample(self, audio_file, target_sr):
wav, sr = torchaudio.load(audio_file)
if sr != target_sr:
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
wav = resampler(wav)
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
torchaudio.save(out_path, wav, target_sr)
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
return (out_path,)
class AudioPreviewNode:
"""Zeigt Audiodatei in der ComfyUI-UI an"""
@classmethod
def INPUT_TYPES(cls):
return {"required": {"audio_file": ("AUDIO_PATH",)}}
RETURN_TYPES = ()
FUNCTION = "preview"
CATEGORY = "AudioCraft"
OUTPUT_NODE = True
def preview(self, audio_file):
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
NODE_CLASS_MAPPINGS = {
"MusicGen": MusicGenNode,
"AudioGen": AudioGenNode,
"MusicGen": MusicGenNode,
"AudioGen": AudioGenNode,
"AudioUpsample": AudioUpsampleNode,
"AudioPreview": AudioPreviewNode,
}
NODE_DISPLAY_NAME_MAPPINGS = {
"MusicGen": "MusicGen (Musik)",
"AudioGen": "AudioGen (Sound Effects)",
"MusicGen": "MusicGen (Musik)",
"AudioGen": "AudioGen (Sound Effects)",
"AudioUpsample": "Audio Upsample (Qualität)",
"AudioPreview": "Audio Preview",
}

View file

@ -1,98 +1,135 @@
{
"last_node_id": 4,
"last_link_id": 0,
"last_node_id": 6,
"last_link_id": 4,
"nodes": [
{
"id": 1,
"type": "MusicGen",
"pos": [100, 100],
"size": {"0": 400, "1": 200},
"pos": [50, 100],
"size": {"0": 420, "1": 370},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
{"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
],
"properties": {"Node name for S&R": "MusicGen"},
"widgets_values": [
"dark dungeon ambience, slow, mysterious, atmospheric, medieval",
10.0,
"facebook/musicgen-small",
0
"epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
30.0,
"facebook/musicgen-stereo-medium",
0,
0.9,
5.0,
250,
18.0
]
},
{
"id": 2,
"type": "MusicGen",
"pos": [100, 380],
"size": {"0": 400, "1": 200},
"type": "AudioUpsample",
"pos": [520, 100],
"size": {"0": 300, "1": 120},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
],
"properties": {"Node name for S&R": "MusicGen"},
"widgets_values": [
"epic battle music, drums, dark fantasy, intense, orchestral",
15.0,
"facebook/musicgen-small",
42
]
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
],
"properties": {"Node name for S&R": "AudioUpsample"},
"widgets_values": [48000]
},
{
"id": 3,
"type": "AudioGen",
"pos": [100, 660],
"size": {"0": 400, "1": 160},
"type": "AudioPreview",
"pos": [870, 100],
"size": {"0": 250, "1": 80},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
],
"properties": {"Node name for S&R": "AudioGen"},
"widgets_values": [
"sword slash metal clang sound effect",
3.0,
0
]
"outputs": [],
"properties": {"Node name for S&R": "AudioPreview"},
"widgets_values": []
},
{
"id": 4,
"type": "AudioGen",
"pos": [100, 880],
"size": {"0": 400, "1": 160},
"pos": [50, 560],
"size": {"0": 420, "1": 330},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
{"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
],
"properties": {"Node name for S&R": "AudioGen"},
"widgets_values": [
"footsteps on stone dungeon floor",
3.0,
1
"fast sword swing whoosh attack sound effect combat",
2.0,
0,
1.0,
4.0,
250
]
},
{
"id": 5,
"type": "AudioUpsample",
"pos": [520, 560],
"size": {"0": 300, "1": 120},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
],
"outputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
],
"properties": {"Node name for S&R": "AudioUpsample"},
"widgets_values": [48000]
},
{
"id": 6,
"type": "AudioPreview",
"pos": [870, 560],
"size": {"0": 250, "1": 80},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
],
"outputs": [],
"properties": {"Node name for S&R": "AudioPreview"},
"widgets_values": []
}
],
"links": [],
"links": [
[1, 1, 0, 2, 0, "AUDIO_PATH"],
[2, 2, 0, 3, 0, "AUDIO_PATH"],
[3, 4, 0, 5, 0, "AUDIO_PATH"],
[4, 5, 0, 6, 0, "AUDIO_PATH"]
],
"groups": [
{
"title": "Musik",
"bounding": [80, 75, 440, 480],
"color": "#8B0000",
"title": "Musik (MusicGen → Upsample → Preview)",
"bounding": [30, 65, 1120, 420],
"color": "#3d2a5a",
"font_size": 14
},
{
"title": "Sound Effects",
"bounding": [80, 635, 440, 430],
"color": "#1a3a4a",
"title": "Sound Effects (AudioGen → Upsample → Preview)",
"bounding": [30, 525, 1120, 390],
"color": "#1a3a2a",
"font_size": 14
}
],