starting new stack

stackwiseai · Jan 22, 2024 · a23e9eb · a23e9eb
1 parent 445c92e
commit a23e9eb
Show file tree

Hide file tree

Showing 2 changed files with 240 additions and 0 deletions.
diff --git a/ui/app/api/website-to-music/route.ts b/ui/app/api/website-to-music/route.ts
@@ -0,0 +1,87 @@
+import Replicate from 'replicate';
+
+const replicate = new Replicate({
+  auth: process.env.REPLICATE_API_TOKEN!,
+});
+
+export const maxDuration = 300;
+
+export async function POST(request: Request) {
+  const form = await request.formData();
+  const musicLength = Number(form.get('length'));
+  const imgFile = form.get('img') as Blob;
+  const imgBuffer = Buffer.from(await imgFile.arrayBuffer());
+  const imgBase64 = imgBuffer.toString('base64');
+  const imgUri = `data:${imgFile.type};base64,${imgBase64}`;
+
+  try {
+    const llavaVersion =
+      'yorickvp/llava-13b:e272157381e2a3bf12df3a8edd1f38d1dbd736bbb7437277c8b34175f8fce358';
+    const llava: string[] = (await replicate.run(llavaVersion, {
+      input: {
+        image: imgUri,
+        prompt: `Describe what kind of music this image invokes. Give a brief few word description of the image, then comment on the composition of musical elements to recreate this image through music.
+Example responses:
+Description: Sunrise illuminating a mountain range, with rays of light breaking through clouds, creating a scene of awe and grandeur.
+Music: Edo25 major G melodies that sound triumphant and cinematic, leading up to a crescendo that resolves in a 9th harmonic, beginning with a gentle, mysterious introduction that builds into an epic, sweeping climax.
+
+Description: A cozy, dimly lit room with a warm ambience, filled with soft shadows and a sense of quiet introspection.
+Music: A jazz piece in B flat minor with a smooth saxophone solo, featuring complex rhythms and a moody, reflective atmosphere, starting with a soft, contemplative melody that evolves into an expressive, passionate finale.
+
+Description: A bustling, neon-lit metropolis at night, alive with vibrant energy and a sense of futuristic progress.
+Music: A techno track in A minor, characterized by fast-paced electronic beats, a pulsating bassline, and futuristic synth melodies, opening with a high-energy rhythm that climaxes in a whirlwind of electronic ecstasy.
+
+Description: Urban streets at dusk, vibrant with street art and a pulse of lively, youthful energy.
+Music: A rap beat in D minor, with heavy bass, crisp snare hits, and a catchy, repetitive melody suitable for dynamic flow, begins with a bold, assertive introduction that leads into a rhythmically complex and compelling outro.
+
+Description: A peaceful beach with gentle waves, clear skies, and a sense of serene joy and relaxation.
+Music: A reggae tune in E major, with a relaxed tempo, characteristic off-beat rhythms, and a laid-back, feel-good vibe, starts with a soothing, cheerful melody that gradually builds into a joyful, uplifting chorus.
+
+Description: An electrifying rock concert, filled with intense energy, dramatic lighting, and a crowd caught up in the excitement.
+Music: A heavy metal track in F sharp minor, driven by aggressive guitar riffs, fast drumming, and powerful, energetic vocals, opens with an intense, thunderous intro that crescendos into a fiery, explosive climax.
+
+Description: A serene, mist-covered forest at dawn, bathed in a gentle, ethereal light that creates a sense of calm and wonder.
+Music: An ambient piece in A flat major, featuring slow, ethereal synth pads, creating a calm, dreamy soundscape, begins with a delicate, otherworldly sound that slowly unfolds into a serene, peaceful conclusion.
+
+Description: A lively party scene, bursting with color and energy, where people are lost in the moment of celebration and dance.
+Music: An electronic dance music (EDM) anthem in B major, with a catchy hook, upbeat tempo, and an infectious rhythm designed for dance floors, starts with a vibrant, exhilarating beat that builds to a euphoric, dance-inducing peak.`,
+      },
+    })) as string[];
+
+    const llavaPrediction: string = llava.join('');
+
+    console.log(llavaPrediction);
+
+    const regex = /Description:\s*(.*?)\s*Music:\s*(.*)/s;
+    const match = llavaPrediction.match(regex);
+    if (!match) {
+      throw new Error('No match');
+    }
+    const musicGenVersion =
+      'meta/musicgen:b05b1dff1d8c6dc63d14b0cdb42135378dcb87f6373b0d3d341ede46e59e2b38';
+    const musicGen = await replicate.run(musicGenVersion, {
+      input: {
+        classifier_free_guidance: 10,
+        model_version: 'stereo-melody-large',
+        prompt: match[2],
+        duration: musicLength,
+      },
+    });
+
+    return new Response(
+      JSON.stringify({
+        llavaResponse: { description: match[1], prompt: match[2] },
+        audio: musicGen,
+      }),
+      {
+        status: 200,
+        headers: { 'Content-Type': 'application/json' },
+      },
+    );
+  } catch (error) {
+    console.log(error);
+    return new Response(JSON.stringify({ error }), {
+      status: 500,
+    });
+  }
+}
diff --git a/ui/app/components/stacks/website-to-music.tsx b/ui/app/components/stacks/website-to-music.tsx
@@ -0,0 +1,153 @@
+'use client';
+
+import { useEffect, useRef, useState } from 'react';
+
+export default function ImageToMusic() {
+  const [img, setImg] = useState<File | null>(null);
+  const [llavaResponse, setLlavaResponse] = useState<{
+    description: string;
+    prompt: string;
+  }>({ description: '', prompt: '' }); // Provide initial values here
+  const [audio, setAudio] = useState<string>('');
+  const [musicLength, setMusicLength] = useState<string>('10');
+  const [loading, setLoading] = useState(false);
+  const audioRef = useRef(null);
+
+  useEffect(() => {
+    const fetchImage = async () => {
+      try {
+        const response = await fetch('/apocalyptic_car.png');
+        const blob = await response.blob();
+        const file = new File([blob], 'default_image.webp', {
+          type: 'image/webp',
+        });
+        setImg(file);
+      } catch (error) {
+        console.error('Error fetching default image:', error);
+      }
+    };
+
+    fetchImage();
+  }, []);
+
+  const handleImageUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (audio) {
+      setAudio('');
+    }
+    if (e.target.files && e.target.files[0]) {
+      setImg(e.target.files[0]);
+    }
+  };
+
+  const createMusic = async () => {
+    if (loading) return;
+    if (audio) {
+      setAudio('');
+      return;
+    }
+
+    setLoading(true);
+    const formData = new FormData();
+    if (img) {
+      formData.append('img', img);
+    }
+    formData.append('length', musicLength.toString());
+    const response = await fetch('/api/image-to-music', {
+      method: 'POST',
+      body: formData,
+    });
+    const data = await response.json();
+    setAudio(data.audio);
+    setLlavaResponse(data.llavaResponse);
+    setLoading(false);
+  };
+
+  useEffect(() => {
+    if (audio) {
+      (audioRef.current as HTMLAudioElement | null)?.load();
+      (audioRef.current as HTMLAudioElement | null)?.play();
+    }
+  }, [audio]);
+
+  const handleMusicLength = (e: React.ChangeEvent<HTMLInputElement>) => {
+    let value = parseInt(e.target.value, 10);
+    if (!isNaN(value)) {
+      value = Math.max(3, Math.min(30, value));
+    }
+    setMusicLength(String(value));
+  };
+
+  return (
+    <div className="stretch mx-auto flex w-5/6 flex-col items-center justify-center space-y-4 pb-10 md:w-3/4 lg:w-2/3">
+      <div className="flex items-center justify-center">
+        <label
+          htmlFor="customFileUpload"
+          className="mr-4 flex w-full cursor-pointer items-center rounded-lg border-2 border-dashed py-1 pl-2 md:w-1/2"
+        >
+          <span id="pdfLabel" className="mr-2 whitespace-nowrap">
+            Upload Image
+          </span>
+          <input
+            type="file"
+            onChange={handleImageUpload}
+            disabled={loading}
+            aria-labelledby="pdfLabel"
+            accept="image/*"
+            id="customFileUpload"
+            className="hidden"
+          />
+          {img && (
+            <div className="line-clamp-2 pr-2 text-gray-600">{img.name}</div>
+          )}
+        </label>
+        <>
+          <span>Length (sec): </span>
+          <input
+            max={3}
+            min={30}
+            value={musicLength}
+            onChange={(e) => setMusicLength(e.target.value)}
+            onBlur={handleMusicLength}
+            type="number"
+            className="ml-1 h-8 rounded border pl-1"
+          />
+        </>
+      </div>
+      <div className="flex w-full items-center justify-center space-x-4">
+        {img && (
+          <img
+            src={URL.createObjectURL(img)}
+            alt="Preview"
+            className={`mx-auto w-1/2 ${loading && 'blur-sm'}`}
+          />
+        )}
+        {audio && (
+          <div className="w-1/2 space-y-3 px-6">
+            <p>
+              <b>Image description: </b>
+              {llavaResponse.description}
+            </p>
+            <p>
+              <b>Inspired music: </b>
+              {llavaResponse.prompt}
+            </p>
+            <audio ref={audioRef} controls src={audio} className="w-full" />
+          </div>
+        )}
+      </div>
+      <button
+        onClick={createMusic}
+        disabled={loading}
+        className={`${
+          loading && 'bg-black text-white'
+        } rounded-md border border-black px-3 py-1 font-medium`}
+      >
+        {audio
+          ? 'Reset'
+          : loading
+            ? 'Loading, 10-30 secs...'
+            : 'Create theme music'}
+      </button>
+    </div>
+  );
+}