Merge pull request #126 from VK-RED/generate-a-cover-image-and-subtit…

…le-on-uploading-a-video Create stack generate-a-cover-image-and-subtitle-on-uploading-a-video
stackwiseai · Jan 17, 2024 · 185b86a · 185b86a
2 parents 41dbc98 + 5666417
commit 185b86a
Show file tree

Hide file tree

Showing 13 changed files with 1,823 additions and 420 deletions.
diff --git a/ui/app/api/cover-image-and-subtitle/route.ts b/ui/app/api/cover-image-and-subtitle/route.ts
@@ -0,0 +1,202 @@
+import { spawn } from 'child_process';
+import fs from 'fs';
+import { Readable } from 'stream';
+import OpenAI from 'openai';
+import Replicate from 'replicate';
+import { v4 as uuidv4 } from 'uuid';
+
+// initial config
+const ffmpegPath = './node_modules/ffmpeg-static/ffmpeg';
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+const replicate = new Replicate({
+  auth: process.env.REPLICATE_API_TOKEN as string,
+});
+
+// types
+enum createAudioFileStatus {
+  SUCCESS = 'Audio has been extracted Successfully !!!',
+  FAILED = 'Some error has occurred in extracting the audio !!!',
+}
+
+type createAudioFileOutput = {
+  status: createAudioFileStatus;
+  audioName: string;
+};
+
+// Generate random audio name using UUID for uniqueness
+const createAudioName = (): string => {
+  return uuidv4();
+};
+
+const createAudioFile = (
+  videoStream: Readable,
+): Promise<createAudioFileOutput> => {
+  // Generate the base name without extension
+  const baseAudioName = createAudioName();
+  const audioName = `${baseAudioName}.mp3`; // Correctly append .mp3 extension
+
+  return new Promise((resolve, reject) => {
+    const ffmpegProcess = spawn(ffmpegPath, [
+      '-i',
+      'pipe:0', // Read input from stdin
+      '-vn', // No video
+      '-acodec',
+      'mp3', // Convert to mp3
+      '-f',
+      'mp3', // Output format as mp3
+      'pipe:1', // Write output to stdout
+    ]);
+
+    // Pipe video stream to ffmpeg's stdin
+    videoStream.pipe(ffmpegProcess.stdin);
+
+    // Handle output stream
+    let audioBuffer = Buffer.alloc(0);
+    ffmpegProcess.stdout.on('data', (chunk) => {
+      audioBuffer = Buffer.concat([audioBuffer, chunk]);
+    });
+
+    ffmpegProcess.on('close', (code) => {
+      if (code !== 0) {
+        reject({
+          status: createAudioFileStatus.FAILED,
+          audioName: baseAudioName,
+        });
+      } else {
+        // Write the audio buffer to file with the correct file name
+        fs.writeFileSync(audioName, audioBuffer);
+        console.log('Audio extraction finished');
+        resolve({
+          status: createAudioFileStatus.SUCCESS,
+          audioName: baseAudioName,
+        });
+      }
+    });
+
+    ffmpegProcess.stderr.on('data', (data) => {
+      console.error(`ffmpeg stderr: ${data}`);
+    });
+
+    ffmpegProcess.on('error', (err) => {
+      console.error('Failed to start ffmpeg process:', err);
+      reject({
+        status: createAudioFileStatus.FAILED,
+        audioName: baseAudioName,
+      });
+    });
+  });
+};
+
+export async function POST(req: Request) {
+  try {
+    const form = await req.formData();
+    const videoFile = form.get('video') as Blob;
+
+    //convert this blob into buffer
+    const videoBuffer = Buffer.from(await videoFile.arrayBuffer());
+
+    //create a VideoStream
+    const videoStream = new Readable();
+    videoStream.push(videoBuffer);
+    videoStream.push(null);
+
+    //extract the audio and create an audiofile from the video buffer
+    const res = await createAudioFile(videoStream);
+
+    if (res.status == createAudioFileStatus.FAILED) {
+      if (fs.existsSync(`./${res.audioName}.mp3`)) {
+        fs.unlinkSync(`./${res.audioName}.mp3`);
+      }
+
+      return Response.json({ message: res });
+    }
+
+    const audioName = res.audioName;
+    const audioBase64 = fs.readFileSync(`./${audioName}.mp3`, 'base64');
+    const audioUri = `data:audio/mp3;base64,${audioBase64}`;
+
+    // send the audio to replicate and getback the subtitles and long descrp
+    const output = await replicate.run(
+      'm1guelpf/whisper-subtitles:7f686e243a96c7f6f0f481bcef24d688a1369ed3983cea348d1f43b879615766',
+      {
+        input: {
+          format: 'vtt',
+          audio_path: audioUri,
+          model_name: 'base',
+        },
+      },
+    );
+
+    if (!output || !('text' in output) || !('subtitles' in output)) {
+      return Response.json({
+        message: 'Some Error occured in fetching subtitles !!',
+      });
+    }
+
+    console.log('Subtitle has beeen generate Successfully !!');
+
+    // send the long prompt to openAI and get back a short summary
+
+    const prompt = output.text as string;
+    const subtitle = output.subtitles as string;
+
+    const completion = await openai.chat.completions.create({
+      model: 'gpt-3.5-turbo',
+      messages: [
+        {
+          role: 'system',
+          content: `You are a Short and Crisp Text Summarizer. You will be given a large paragraph You should summarize the context a short summary of 10-15 words and not more than that. Give the summary directly, don't use words like "Okay,Sure" or "The paragraph , author or anyother words about the author or speaker ". Here is the paragraph ${prompt}.`,
+        },
+      ],
+    });
+
+    const summarizedText = completion.choices[0]?.message?.content;
+
+    if (!summarizedText) {
+      return Response.json({ message: 'Summarizer is missing !!!' });
+    }
+
+    console.log('Generated Short Description !');
+
+    // send the short summary to replicate and generate back an image
+    const imgArr = await replicate.run(
+      'stability-ai/stable-diffusion:ac732df83cea7fff18b8472768c88ad041fa750ff7682a21affe81863cbe77e4',
+      {
+        input: {
+          prompt: `Generate Youtube thumbnail for the following content. It should be scenic with no text on it, make sure it ABSOLUTELY does not have any text embedded on it. Understand the following prompt and generate a high quality image without any text, just a good thumbnail: ${summarizedText}`,
+          width: 1024,
+          height: 576,
+          scheduler: 'K_EULER',
+        },
+      },
+    );
+
+    if (!imgArr || !imgArr[0]) {
+      return Response.json({
+        message: 'Some Error occured in Image Generation !!',
+      });
+    }
+
+    console.log('The Image generated Successfully !!!');
+    const imgUrl = imgArr[0] as string;
+
+    //remove the created audio file
+    if (fs.existsSync(`./${res.audioName}.mp3`)) {
+      fs.unlinkSync(`./${res.audioName}.mp3`);
+    }
+
+    return Response.json({
+      message: 'The Audio has been extracted and stored in the server !',
+      subtitle,
+      imgUrl,
+      summarizedText,
+    });
+  } catch (error) {
+    console.error(error);
+    return Response.error();
+  }
+}
diff --git a/ui/app/components/stacks/cover-image-and-subtitle.tsx b/ui/app/components/stacks/cover-image-and-subtitle.tsx
@@ -0,0 +1,176 @@
+'use client';
+
+import { useState } from 'react';
+
+export const GenerateImageAndSubtitle = () => {
+  const [video, setVideo] = useState<File | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [summary, setSummary] = useState('');
+  const [subtitle, setSubtitle] = useState('');
+  const [imgUrl, setImgUrl] = useState('');
+  const [isDropdownOpen, setIsDropdownOpen] = useState(false);
+
+  const handleVideoUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
+    if (subtitle) {
+      setSubtitle('');
+    }
+    if (imgUrl) {
+      setImgUrl('');
+    }
+    if (e.target.files && e.target.files[0]) {
+      setVideo(e.target.files[0]);
+    }
+  };
+
+  const getSubtitle = async () => {
+    try {
+      if (loading || !video) {
+        return;
+      }
+
+      if (subtitle || imgUrl) {
+        setSubtitle('');
+        setImgUrl('');
+        return;
+      }
+
+      setLoading(true);
+      const formData = new FormData();
+      if (!video) return;
+      formData.append('video', video);
+
+      const response = await fetch('/api/cover-image-and-subtitle', {
+        method: 'POST',
+        body: formData,
+      });
+
+      const data = await response.json();
+      setSubtitle(data.subtitle);
+      setImgUrl(data.imgUrl);
+      setSummary(data.summarizedText);
+      setLoading(false);
+    } catch (error) {
+      console.error(error);
+    }
+  };
+
+  const downloadSubtitle = () => {
+    if (!subtitle) {
+      console.error('Subtitle not Present !!');
+      return;
+    }
+
+    const blob = new Blob([subtitle], { type: 'text/vtt' });
+    const link = document.createElement('a');
+    link.href = URL.createObjectURL(blob);
+    link.download = 'subtitle.vtt';
+
+    document.body.appendChild(link);
+    link.click();
+
+    // Clean up the link
+    document.body.removeChild(link);
+  };
+
+  return (
+    <div className="stretch mx-auto flex w-5/6 flex-col items-center justify-center space-y-4 pb-10 md:w-3/4 lg:w-2/3">
+      <div className="mt-2 flex items-center justify-center">
+        <label
+          htmlFor="customFileUpload"
+          className="flex w-full cursor-pointer items-center rounded-lg border-2 border-dashed py-1 pl-2 "
+        >
+          <span id="pdfLabel" className="mr-2 whitespace-nowrap">
+            Upload Video
+          </span>
+          <input
+            type="file"
+            onChange={handleVideoUpload}
+            disabled={loading}
+            aria-labelledby="pdfLabel"
+            accept="video/*"
+            id="customFileUpload"
+            className="hidden"
+          />
+          {video && (
+            <div className="line-clamp-2 pr-2 text-gray-600">{video.name}</div>
+          )}
+        </label>
+      </div>
+      <div className="flex w-full flex-col items-center justify-center sm:flex-row sm:space-x-4">
+        {video && (
+          <video
+            src={URL.createObjectURL(video)}
+            className={`mx-auto w-full sm:w-1/2 ${loading && 'blur-sm'}`}
+            controls
+          />
+        )}
+        {subtitle && (
+          <div className="mt-3 flex w-full flex-col items-center justify-center space-y-3 px-3 sm:mt-0 sm:w-1/2">
+            <div className="text-sm">{imgUrl && <img src={imgUrl} />}</div>
+            {summary && (
+              <div className="mb-3 text-sm">
+                <span className="font-medium">Summary of video:</span> {summary}
+              </div>
+            )}
+            <div className="flex w-full flex-col justify-center space-y-2">
+              <button
+                className="mx-auto rounded-md border border-black px-3 py-1 font-medium xl:w-1/2"
+                onClick={downloadSubtitle}
+              >
+                Download Subtitles
+              </button>
+              <div
+                className="mx-auto flex cursor-pointer items-center justify-center space-x-2"
+                onClick={() => setIsDropdownOpen(!isDropdownOpen)}
+              >
+                <svg
+                  className={`h-4 w-4 ${isDropdownOpen ? 'rotate-180' : ''}`}
+                  fill="none"
+                  stroke="currentColor"
+                  viewBox="0 0 24 24"
+                  xmlns="http://www.w3.org/2000/svg"
+                >
+                  <path
+                    strokeLinecap="round"
+                    strokeLinejoin="round"
+                    strokeWidth={2}
+                    d="M19 9l-7 7-7-7"
+                  />
+                </svg>
+                <span className="text-center text-sm font-medium">
+                  {isDropdownOpen ? 'Hide Subtitles' : 'Show Subtitles'}
+                </span>
+              </div>
+
+              <div
+                className="w-full"
+                style={{
+                  maxHeight: isDropdownOpen ? '500px' : '0',
+                  overflow: 'hidden',
+                  transition: 'max-height 0.3s ease-in-out',
+                }}
+              >
+                {isDropdownOpen && (
+                  <div className="bg-gray-100 p-2 text-sm">
+                    {subtitle || 'No Subtitles'}
+                  </div>
+                )}
+              </div>
+            </div>
+          </div>
+        )}
+      </div>
+      <button
+        onClick={getSubtitle}
+        disabled={loading}
+        className={`${
+          loading && 'bg-black text-white'
+        } rounded-md border border-black px-3 py-1 font-medium`}
+      >
+        {subtitle ? 'Reset' : loading ? 'Loading, 2-3 mins...' : 'Create '}
+      </button>
+    </div>
+  );
+};
+
+export default GenerateImageAndSubtitle;
diff --git a/ui/app/components/stacks/use-openai-assistant.tsx b/ui/app/components/stacks/use-openai-assistant.tsx
@@ -10,6 +10,7 @@ const roleToColorMap: Record<Message['role'], string> = {
   function: 'blue',
   assistant: 'green',
   data: 'orange',
+  tool: '',
 };
 
 export default function Chat() {

diff --git a/ui/next.config.js b/ui/next.config.js
@@ -1,4 +1,5 @@
 /** @type {import('next').NextConfig} */
+
 const nextConfig = {
   async redirects() {
     return [