ComfyUI AudioCraft: verbesserte Nodes + Workflow Template
- nodes.py: temperature, cfg_coef, top_k, extend_stride Parameter - AudioUpsample Node: 16kHz/32kHz -> 48kHz fuer bessere Qualitaet - AudioPreview Node: Vorschau direkt in ComfyUI UI - workflow_game_audio.json: Template mit Musik + SFX Pipeline - Standardmodell: musicgen-stereo-medium (besserer Sound) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
b28b4ddc85
commit
4feff43758
2 changed files with 164 additions and 62 deletions
|
|
@ -10,9 +10,13 @@ class MusicGenNode:
|
|||
return {
|
||||
"required": {
|
||||
"prompt": ("STRING", {"multiline": True, "default": "dark dungeon ambience, slow, mysterious"}),
|
||||
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 60.0, "step": 1.0}),
|
||||
"model": (["facebook/musicgen-small", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-stereo-medium"],),
|
||||
"duration": ("FLOAT", {"default": 10.0, "min": 1.0, "max": 120.0, "step": 1.0}),
|
||||
"model": (["facebook/musicgen-stereo-medium", "facebook/musicgen-stereo-large", "facebook/musicgen-medium", "facebook/musicgen-large", "facebook/musicgen-small"],),
|
||||
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
||||
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
||||
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
|
||||
"extend_stride": ("FLOAT", {"default": 18.0, "min": 1.0, "max": 30.0, "step": 1.0}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -22,7 +26,7 @@ class MusicGenNode:
|
|||
CATEGORY = "AudioCraft"
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def generate(self, prompt, duration, model, seed):
|
||||
def generate(self, prompt, duration, model, seed, temperature, cfg_coef, top_k, extend_stride):
|
||||
from audiocraft.models import MusicGen
|
||||
from audiocraft.data.audio import audio_write
|
||||
|
||||
|
|
@ -30,7 +34,13 @@ class MusicGenNode:
|
|||
|
||||
print(f"[MusicGen] Loading model: {model}")
|
||||
mg = MusicGen.get_pretrained(model)
|
||||
mg.set_generation_params(duration=duration)
|
||||
mg.set_generation_params(
|
||||
duration=duration,
|
||||
temperature=temperature,
|
||||
cfg_coef=cfg_coef,
|
||||
top_k=top_k,
|
||||
extend_stride=extend_stride,
|
||||
)
|
||||
|
||||
print(f"[MusicGen] Generating: {prompt}")
|
||||
wav = mg.generate([prompt])
|
||||
|
|
@ -52,6 +62,9 @@ class AudioGenNode:
|
|||
"prompt": ("STRING", {"multiline": True, "default": "sword clash metal sound effect"}),
|
||||
"duration": ("FLOAT", {"default": 3.0, "min": 0.5, "max": 30.0, "step": 0.5}),
|
||||
"seed": ("INT", {"default": 0, "min": 0, "max": 2**32 - 1}),
|
||||
"temperature": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 2.0, "step": 0.05}),
|
||||
"cfg_coef": ("FLOAT", {"default": 3.0, "min": 1.0, "max": 10.0, "step": 0.5}),
|
||||
"top_k": ("INT", {"default": 250, "min": 1, "max": 1000}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +74,7 @@ class AudioGenNode:
|
|||
CATEGORY = "AudioCraft"
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def generate(self, prompt, duration, seed):
|
||||
def generate(self, prompt, duration, seed, temperature, cfg_coef, top_k):
|
||||
from audiocraft.models import AudioGen
|
||||
from audiocraft.data.audio import audio_write
|
||||
|
||||
|
|
@ -69,7 +82,12 @@ class AudioGenNode:
|
|||
|
||||
print(f"[AudioGen] Loading model...")
|
||||
ag = AudioGen.get_pretrained("facebook/audiogen-medium")
|
||||
ag.set_generation_params(duration=duration)
|
||||
ag.set_generation_params(
|
||||
duration=duration,
|
||||
temperature=temperature,
|
||||
cfg_coef=cfg_coef,
|
||||
top_k=top_k,
|
||||
)
|
||||
|
||||
print(f"[AudioGen] Generating: {prompt}")
|
||||
wav = ag.generate([prompt])
|
||||
|
|
@ -83,12 +101,59 @@ class AudioGenNode:
|
|||
return (out_path + ".wav",)
|
||||
|
||||
|
||||
class AudioUpsampleNode:
|
||||
"""Upsampling via torchaudio - verbessert Qualität von 16kHz/32kHz auf 48kHz"""
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {
|
||||
"required": {
|
||||
"audio_file": ("AUDIO_PATH",),
|
||||
"target_sr": ([48000, 44100], {"default": 48000}),
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("AUDIO_PATH",)
|
||||
RETURN_NAMES = ("audio_file",)
|
||||
FUNCTION = "upsample"
|
||||
CATEGORY = "AudioCraft"
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def upsample(self, audio_file, target_sr):
|
||||
wav, sr = torchaudio.load(audio_file)
|
||||
if sr != target_sr:
|
||||
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=target_sr)
|
||||
wav = resampler(wav)
|
||||
out_path = audio_file.replace(".wav", f"_{target_sr}hz.wav")
|
||||
torchaudio.save(out_path, wav, target_sr)
|
||||
print(f"[Upsample] {sr}Hz → {target_sr}Hz: {out_path}")
|
||||
return (out_path,)
|
||||
|
||||
|
||||
class AudioPreviewNode:
|
||||
"""Zeigt Audiodatei in der ComfyUI-UI an"""
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {"required": {"audio_file": ("AUDIO_PATH",)}}
|
||||
|
||||
RETURN_TYPES = ()
|
||||
FUNCTION = "preview"
|
||||
CATEGORY = "AudioCraft"
|
||||
OUTPUT_NODE = True
|
||||
|
||||
def preview(self, audio_file):
|
||||
return {"ui": {"audio": [{"filename": os.path.basename(audio_file), "subfolder": "audio", "type": "output"}]}}
|
||||
|
||||
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"MusicGen": MusicGenNode,
|
||||
"AudioGen": AudioGenNode,
|
||||
"AudioUpsample": AudioUpsampleNode,
|
||||
"AudioPreview": AudioPreviewNode,
|
||||
}
|
||||
|
||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"MusicGen": "MusicGen (Musik)",
|
||||
"AudioGen": "AudioGen (Sound Effects)",
|
||||
"AudioUpsample": "Audio Upsample (Qualität)",
|
||||
"AudioPreview": "Audio Preview",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,98 +1,135 @@
|
|||
{
|
||||
"last_node_id": 4,
|
||||
"last_link_id": 0,
|
||||
"last_node_id": 6,
|
||||
"last_link_id": 4,
|
||||
"nodes": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "MusicGen",
|
||||
"pos": [100, 100],
|
||||
"size": {"0": 400, "1": 200},
|
||||
"pos": [50, 100],
|
||||
"size": {"0": 420, "1": 370},
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [1], "slot_index": 0}
|
||||
],
|
||||
"properties": {"Node name for S&R": "MusicGen"},
|
||||
"widgets_values": [
|
||||
"dark dungeon ambience, slow, mysterious, atmospheric, medieval",
|
||||
10.0,
|
||||
"facebook/musicgen-small",
|
||||
0
|
||||
"epic dark fantasy orchestral RPG main menu theme, war drums, deep brass horns, sweeping strings, ancient mysterious atmosphere, cinematic, 80 BPM",
|
||||
30.0,
|
||||
"facebook/musicgen-stereo-medium",
|
||||
0,
|
||||
0.9,
|
||||
5.0,
|
||||
250,
|
||||
18.0
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "MusicGen",
|
||||
"pos": [100, 380],
|
||||
"size": {"0": 400, "1": 200},
|
||||
"type": "AudioUpsample",
|
||||
"pos": [520, 100],
|
||||
"size": {"0": 300, "1": 120},
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
||||
"inputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 1}
|
||||
],
|
||||
"properties": {"Node name for S&R": "MusicGen"},
|
||||
"widgets_values": [
|
||||
"epic battle music, drums, dark fantasy, intense, orchestral",
|
||||
15.0,
|
||||
"facebook/musicgen-small",
|
||||
42
|
||||
]
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [2], "slot_index": 0}
|
||||
],
|
||||
"properties": {"Node name for S&R": "AudioUpsample"},
|
||||
"widgets_values": [48000]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "AudioGen",
|
||||
"pos": [100, 660],
|
||||
"size": {"0": 400, "1": 160},
|
||||
"type": "AudioPreview",
|
||||
"pos": [870, 100],
|
||||
"size": {"0": 250, "1": 80},
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
||||
"inputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 2}
|
||||
],
|
||||
"properties": {"Node name for S&R": "AudioGen"},
|
||||
"widgets_values": [
|
||||
"sword slash metal clang sound effect",
|
||||
3.0,
|
||||
0
|
||||
]
|
||||
"outputs": [],
|
||||
"properties": {"Node name for S&R": "AudioPreview"},
|
||||
"widgets_values": []
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "AudioGen",
|
||||
"pos": [100, 880],
|
||||
"size": {"0": 400, "1": 160},
|
||||
"pos": [50, 560],
|
||||
"size": {"0": 420, "1": 330},
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [], "slot_index": 0}
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [3], "slot_index": 0}
|
||||
],
|
||||
"properties": {"Node name for S&R": "AudioGen"},
|
||||
"widgets_values": [
|
||||
"footsteps on stone dungeon floor",
|
||||
3.0,
|
||||
1
|
||||
"fast sword swing whoosh attack sound effect combat",
|
||||
2.0,
|
||||
0,
|
||||
1.0,
|
||||
4.0,
|
||||
250
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "AudioUpsample",
|
||||
"pos": [520, 560],
|
||||
"size": {"0": 300, "1": 120},
|
||||
"flags": {},
|
||||
"order": 4,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 3}
|
||||
],
|
||||
"outputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "links": [4], "slot_index": 0}
|
||||
],
|
||||
"properties": {"Node name for S&R": "AudioUpsample"},
|
||||
"widgets_values": [48000]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "AudioPreview",
|
||||
"pos": [870, 560],
|
||||
"size": {"0": 250, "1": 80},
|
||||
"flags": {},
|
||||
"order": 5,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{"name": "audio_file", "type": "AUDIO_PATH", "link": 4}
|
||||
],
|
||||
"outputs": [],
|
||||
"properties": {"Node name for S&R": "AudioPreview"},
|
||||
"widgets_values": []
|
||||
}
|
||||
],
|
||||
"links": [],
|
||||
"links": [
|
||||
[1, 1, 0, 2, 0, "AUDIO_PATH"],
|
||||
[2, 2, 0, 3, 0, "AUDIO_PATH"],
|
||||
[3, 4, 0, 5, 0, "AUDIO_PATH"],
|
||||
[4, 5, 0, 6, 0, "AUDIO_PATH"]
|
||||
],
|
||||
"groups": [
|
||||
{
|
||||
"title": "Musik",
|
||||
"bounding": [80, 75, 440, 480],
|
||||
"color": "#8B0000",
|
||||
"title": "Musik (MusicGen → Upsample → Preview)",
|
||||
"bounding": [30, 65, 1120, 420],
|
||||
"color": "#3d2a5a",
|
||||
"font_size": 14
|
||||
},
|
||||
{
|
||||
"title": "Sound Effects",
|
||||
"bounding": [80, 635, 440, 430],
|
||||
"color": "#1a3a4a",
|
||||
"title": "Sound Effects (AudioGen → Upsample → Preview)",
|
||||
"bounding": [30, 525, 1120, 390],
|
||||
"color": "#1a3a2a",
|
||||
"font_size": 14
|
||||
}
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue