-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
445c92e
commit a23e9eb
Showing
2 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
import Replicate from 'replicate'; | ||
|
||
const replicate = new Replicate({ | ||
auth: process.env.REPLICATE_API_TOKEN!, | ||
}); | ||
|
||
export const maxDuration = 300; | ||
|
||
export async function POST(request: Request) { | ||
const form = await request.formData(); | ||
const musicLength = Number(form.get('length')); | ||
const imgFile = form.get('img') as Blob; | ||
const imgBuffer = Buffer.from(await imgFile.arrayBuffer()); | ||
const imgBase64 = imgBuffer.toString('base64'); | ||
const imgUri = `data:${imgFile.type};base64,${imgBase64}`; | ||
|
||
try { | ||
const llavaVersion = | ||
'yorickvp/llava-13b:e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358'; | ||
const llava: string[] = (await replicate.run(llavaVersion, { | ||
input: { | ||
image: imgUri, | ||
prompt: `Describe what kind of music this image invokes. Give a brief few word description of the image, then comment on the composition of musical elements to recreate this image through music. | ||
Example responses: | ||
Description: Sunrise illuminating a mountain range, with rays of light breaking through clouds, creating a scene of awe and grandeur. | ||
Music: Edo25 major G melodies that sound triumphant and cinematic, leading up to a crescendo that resolves in a 9th harmonic, beginning with a gentle, mysterious introduction that builds into an epic, sweeping climax. | ||
Description: A cozy, dimly lit room with a warm ambience, filled with soft shadows and a sense of quiet introspection. | ||
Music: A jazz piece in B flat minor with a smooth saxophone solo, featuring complex rhythms and a moody, reflective atmosphere, starting with a soft, contemplative melody that evolves into an expressive, passionate finale. | ||
Description: A bustling, neon-lit metropolis at night, alive with vibrant energy and a sense of futuristic progress. | ||
Music: A techno track in A minor, characterized by fast-paced electronic beats, a pulsating bassline, and futuristic synth melodies, opening with a high-energy rhythm that climaxes in a whirlwind of electronic ecstasy. | ||
Description: Urban streets at dusk, vibrant with street art and a pulse of lively, youthful energy. | ||
Music: A rap beat in D minor, with heavy bass, crisp snare hits, and a catchy, repetitive melody suitable for dynamic flow, begins with a bold, assertive introduction that leads into a rhythmically complex and compelling outro. | ||
Description: A peaceful beach with gentle waves, clear skies, and a sense of serene joy and relaxation. | ||
Music: A reggae tune in E major, with a relaxed tempo, characteristic off-beat rhythms, and a laid-back, feel-good vibe, starts with a soothing, cheerful melody that gradually builds into a joyful, uplifting chorus. | ||
Description: An electrifying rock concert, filled with intense energy, dramatic lighting, and a crowd caught up in the excitement. | ||
Music: A heavy metal track in F sharp minor, driven by aggressive guitar riffs, fast drumming, and powerful, energetic vocals, opens with an intense, thunderous intro that crescendos into a fiery, explosive climax. | ||
Description: A serene, mist-covered forest at dawn, bathed in a gentle, ethereal light that creates a sense of calm and wonder. | ||
Music: An ambient piece in A flat major, featuring slow, ethereal synth pads, creating a calm, dreamy soundscape, begins with a delicate, otherworldly sound that slowly unfolds into a serene, peaceful conclusion. | ||
Description: A lively party scene, bursting with color and energy, where people are lost in the moment of celebration and dance. | ||
Music: An electronic dance music (EDM) anthem in B major, with a catchy hook, upbeat tempo, and an infectious rhythm designed for dance floors, starts with a vibrant, exhilarating beat that builds to a euphoric, dance-inducing peak.`, | ||
}, | ||
})) as string[]; | ||
|
||
const llavaPrediction: string = llava.join(''); | ||
|
||
console.log(llavaPrediction); | ||
|
||
const regex = /Description:\s*(.*?)\s*Music:\s*(.*)/s; | ||
const match = llavaPrediction.match(regex); | ||
if (!match) { | ||
throw new Error('No match'); | ||
} | ||
const musicGenVersion = | ||
'meta/musicgen:b05b1dff1d8c6dc63d14b0cdb42135378dcb87f6373b0d3d341ede46e59e2b38'; | ||
const musicGen = await replicate.run(musicGenVersion, { | ||
input: { | ||
classifier_free_guidance: 10, | ||
model_version: 'stereo-melody-large', | ||
prompt: match[2], | ||
duration: musicLength, | ||
}, | ||
}); | ||
|
||
return new Response( | ||
JSON.stringify({ | ||
llavaResponse: { description: match[1], prompt: match[2] }, | ||
audio: musicGen, | ||
}), | ||
{ | ||
status: 200, | ||
headers: { 'Content-Type': 'application/json' }, | ||
}, | ||
); | ||
} catch (error) { | ||
console.log(error); | ||
return new Response(JSON.stringify({ error }), { | ||
status: 500, | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
'use client'; | ||
|
||
import { useEffect, useRef, useState } from 'react'; | ||
|
||
export default function ImageToMusic() { | ||
const [img, setImg] = useState<File | null>(null); | ||
const [llavaResponse, setLlavaResponse] = useState<{ | ||
description: string; | ||
prompt: string; | ||
}>({ description: '', prompt: '' }); // Provide initial values here | ||
const [audio, setAudio] = useState<string>(''); | ||
const [musicLength, setMusicLength] = useState<string>('10'); | ||
const [loading, setLoading] = useState(false); | ||
const audioRef = useRef(null); | ||
|
||
useEffect(() => { | ||
const fetchImage = async () => { | ||
try { | ||
const response = await fetch('/apocalyptic_car.png'); | ||
const blob = await response.blob(); | ||
const file = new File([blob], 'default_image.webp', { | ||
type: 'image/webp', | ||
}); | ||
setImg(file); | ||
} catch (error) { | ||
console.error('Error fetching default image:', error); | ||
} | ||
}; | ||
|
||
fetchImage(); | ||
}, []); | ||
|
||
const handleImageUpload = (e: React.ChangeEvent<HTMLInputElement>) => { | ||
if (audio) { | ||
setAudio(''); | ||
} | ||
if (e.target.files && e.target.files[0]) { | ||
setImg(e.target.files[0]); | ||
} | ||
}; | ||
|
||
const createMusic = async () => { | ||
if (loading) return; | ||
if (audio) { | ||
setAudio(''); | ||
return; | ||
} | ||
|
||
setLoading(true); | ||
const formData = new FormData(); | ||
if (img) { | ||
formData.append('img', img); | ||
} | ||
formData.append('length', musicLength.toString()); | ||
const response = await fetch('/api/image-to-music', { | ||
method: 'POST', | ||
body: formData, | ||
}); | ||
const data = await response.json(); | ||
setAudio(data.audio); | ||
setLlavaResponse(data.llavaResponse); | ||
setLoading(false); | ||
}; | ||
|
||
useEffect(() => { | ||
if (audio) { | ||
(audioRef.current as HTMLAudioElement | null)?.load(); | ||
(audioRef.current as HTMLAudioElement | null)?.play(); | ||
} | ||
}, [audio]); | ||
|
||
const handleMusicLength = (e: React.ChangeEvent<HTMLInputElement>) => { | ||
let value = parseInt(e.target.value, 10); | ||
if (!isNaN(value)) { | ||
value = Math.max(3, Math.min(30, value)); | ||
} | ||
setMusicLength(String(value)); | ||
}; | ||
|
||
return ( | ||
<div className="stretch mx-auto flex w-5/6 flex-col items-center justify-center space-y-4 pb-10 md:w-3/4 lg:w-2/3"> | ||
<div className="flex items-center justify-center"> | ||
<label | ||
htmlFor="customFileUpload" | ||
className="mr-4 flex w-full cursor-pointer items-center rounded-lg border-2 border-dashed py-1 pl-2 md:w-1/2" | ||
> | ||
<span id="pdfLabel" className="mr-2 whitespace-nowrap"> | ||
Upload Image | ||
</span> | ||
<input | ||
type="file" | ||
onChange={handleImageUpload} | ||
disabled={loading} | ||
aria-labelledby="pdfLabel" | ||
accept="image/*" | ||
id="customFileUpload" | ||
className="hidden" | ||
/> | ||
{img && ( | ||
<div className="line-clamp-2 pr-2 text-gray-600">{img.name}</div> | ||
)} | ||
</label> | ||
<> | ||
<span>Length (sec): </span> | ||
<input | ||
max={3} | ||
min={30} | ||
value={musicLength} | ||
onChange={(e) => setMusicLength(e.target.value)} | ||
onBlur={handleMusicLength} | ||
type="number" | ||
className="ml-1 h-8 rounded border pl-1" | ||
/> | ||
</> | ||
</div> | ||
<div className="flex w-full items-center justify-center space-x-4"> | ||
{img && ( | ||
<img | ||
src={URL.createObjectURL(img)} | ||
alt="Preview" | ||
className={`mx-auto w-1/2 ${loading && 'blur-sm'}`} | ||
/> | ||
)} | ||
{audio && ( | ||
<div className="w-1/2 space-y-3 px-6"> | ||
<p> | ||
<b>Image description: </b> | ||
{llavaResponse.description} | ||
</p> | ||
<p> | ||
<b>Inspired music: </b> | ||
{llavaResponse.prompt} | ||
</p> | ||
<audio ref={audioRef} controls src={audio} className="w-full" /> | ||
</div> | ||
)} | ||
</div> | ||
<button | ||
onClick={createMusic} | ||
disabled={loading} | ||
className={`${ | ||
loading && 'bg-black text-white' | ||
} rounded-md border border-black px-3 py-1 font-medium`} | ||
> | ||
{audio | ||
? 'Reset' | ||
: loading | ||
? 'Loading, 10-30 secs...' | ||
: 'Create theme music'} | ||
</button> | ||
</div> | ||
); | ||
} |