Skip to content

Commit

Permalink
Merge pull request #126 from VK-RED/generate-a-cover-image-and-subtit…
Browse files Browse the repository at this point in the history
…le-on-uploading-a-video

Create stack generate-a-cover-image-and-subtitle-on-uploading-a-video
  • Loading branch information
SilenNaihin authored Jan 17, 2024
2 parents 41dbc98 + 5666417 commit 185b86a
Show file tree
Hide file tree
Showing 13 changed files with 1,823 additions and 420 deletions.
202 changes: 202 additions & 0 deletions ui/app/api/cover-image-and-subtitle/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import { spawn } from 'child_process';
import fs from 'fs';
import { Readable } from 'stream';
import OpenAI from 'openai';
import Replicate from 'replicate';
import { v4 as uuidv4 } from 'uuid';

// initial config
const ffmpegPath = './node_modules/ffmpeg-static/ffmpeg';

const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});

const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN as string,
});

// types
enum createAudioFileStatus {
SUCCESS = 'Audio has been extracted Successfully !!!',
FAILED = 'Some error has occurred in extracting the audio !!!',
}

type createAudioFileOutput = {
status: createAudioFileStatus;
audioName: string;
};

// Generate random audio name using UUID for uniqueness
const createAudioName = (): string => {
return uuidv4();
};

const createAudioFile = (
videoStream: Readable,
): Promise<createAudioFileOutput> => {
// Generate the base name without extension
const baseAudioName = createAudioName();
const audioName = `${baseAudioName}.mp3`; // Correctly append .mp3 extension

return new Promise((resolve, reject) => {
const ffmpegProcess = spawn(ffmpegPath, [
'-i',
'pipe:0', // Read input from stdin
'-vn', // No video
'-acodec',
'mp3', // Convert to mp3
'-f',
'mp3', // Output format as mp3
'pipe:1', // Write output to stdout
]);

// Pipe video stream to ffmpeg's stdin
videoStream.pipe(ffmpegProcess.stdin);

// Handle output stream
let audioBuffer = Buffer.alloc(0);
ffmpegProcess.stdout.on('data', (chunk) => {
audioBuffer = Buffer.concat([audioBuffer, chunk]);
});

ffmpegProcess.on('close', (code) => {
if (code !== 0) {
reject({
status: createAudioFileStatus.FAILED,
audioName: baseAudioName,
});
} else {
// Write the audio buffer to file with the correct file name
fs.writeFileSync(audioName, audioBuffer);
console.log('Audio extraction finished');
resolve({
status: createAudioFileStatus.SUCCESS,
audioName: baseAudioName,
});
}
});

ffmpegProcess.stderr.on('data', (data) => {
console.error(`ffmpeg stderr: ${data}`);
});

ffmpegProcess.on('error', (err) => {
console.error('Failed to start ffmpeg process:', err);
reject({
status: createAudioFileStatus.FAILED,
audioName: baseAudioName,
});
});
});
};

export async function POST(req: Request) {
try {
const form = await req.formData();
const videoFile = form.get('video') as Blob;

//convert this blob into buffer
const videoBuffer = Buffer.from(await videoFile.arrayBuffer());

//create a VideoStream
const videoStream = new Readable();
videoStream.push(videoBuffer);
videoStream.push(null);

//extract the audio and create an audiofile from the video buffer
const res = await createAudioFile(videoStream);

if (res.status == createAudioFileStatus.FAILED) {
if (fs.existsSync(`./${res.audioName}.mp3`)) {
fs.unlinkSync(`./${res.audioName}.mp3`);
}

return Response.json({ message: res });
}

const audioName = res.audioName;
const audioBase64 = fs.readFileSync(`./${audioName}.mp3`, 'base64');
const audioUri = `data:audio/mp3;base64,${audioBase64}`;

// send the audio to replicate and getback the subtitles and long descrp
const output = await replicate.run(
'm1guelpf/whisper-subtitles:7f686e243a96c7f6f0f481bcef24d688a1369ed3983cea348d1f43b879615766',
{
input: {
format: 'vtt',
audio_path: audioUri,
model_name: 'base',
},
},
);

if (!output || !('text' in output) || !('subtitles' in output)) {
return Response.json({
message: 'Some Error occured in fetching subtitles !!',
});
}

console.log('Subtitle has beeen generate Successfully !!');

// send the long prompt to openAI and get back a short summary

const prompt = output.text as string;
const subtitle = output.subtitles as string;

const completion = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [
{
role: 'system',
content: `You are a Short and Crisp Text Summarizer. You will be given a large paragraph You should summarize the context a short summary of 10-15 words and not more than that. Give the summary directly, don't use words like "Okay,Sure" or "The paragraph , author or anyother words about the author or speaker ". Here is the paragraph ${prompt}.`,
},
],
});

const summarizedText = completion.choices[0]?.message?.content;

if (!summarizedText) {
return Response.json({ message: 'Summarizer is missing !!!' });
}

console.log('Generated Short Description !');

// send the short summary to replicate and generate back an image
const imgArr = await replicate.run(
'stability-ai/stable-diffusion:ac732df83cea7fff18b8472768c88ad041fa750ff7682a21affe81863cbe77e4',
{
input: {
prompt: `Generate Youtube thumbnail for the following content. It should be scenic with no text on it, make sure it ABSOLUTELY does not have any text embedded on it. Understand the following prompt and generate a high quality image without any text, just a good thumbnail: ${summarizedText}`,
width: 1024,
height: 576,
scheduler: 'K_EULER',
},
},
);

if (!imgArr || !imgArr[0]) {
return Response.json({
message: 'Some Error occured in Image Generation !!',
});
}

console.log('The Image generated Successfully !!!');
const imgUrl = imgArr[0] as string;

//remove the created audio file
if (fs.existsSync(`./${res.audioName}.mp3`)) {
fs.unlinkSync(`./${res.audioName}.mp3`);
}

return Response.json({
message: 'The Audio has been extracted and stored in the server !',
subtitle,
imgUrl,
summarizedText,
});
} catch (error) {
console.error(error);
return Response.error();
}
}
176 changes: 176 additions & 0 deletions ui/app/components/stacks/cover-image-and-subtitle.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
'use client';

import { useState } from 'react';

export const GenerateImageAndSubtitle = () => {
const [video, setVideo] = useState<File | null>(null);
const [loading, setLoading] = useState(false);
const [summary, setSummary] = useState('');
const [subtitle, setSubtitle] = useState('');
const [imgUrl, setImgUrl] = useState('');
const [isDropdownOpen, setIsDropdownOpen] = useState(false);

const handleVideoUpload = (e: React.ChangeEvent<HTMLInputElement>) => {
if (subtitle) {
setSubtitle('');
}
if (imgUrl) {
setImgUrl('');
}
if (e.target.files && e.target.files[0]) {
setVideo(e.target.files[0]);
}
};

const getSubtitle = async () => {
try {
if (loading || !video) {
return;
}

if (subtitle || imgUrl) {
setSubtitle('');
setImgUrl('');
return;
}

setLoading(true);
const formData = new FormData();
if (!video) return;
formData.append('video', video);

const response = await fetch('/api/cover-image-and-subtitle', {
method: 'POST',
body: formData,
});

const data = await response.json();
setSubtitle(data.subtitle);
setImgUrl(data.imgUrl);
setSummary(data.summarizedText);
setLoading(false);
} catch (error) {
console.error(error);
}
};

const downloadSubtitle = () => {
if (!subtitle) {
console.error('Subtitle not Present !!');
return;
}

const blob = new Blob([subtitle], { type: 'text/vtt' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = 'subtitle.vtt';

document.body.appendChild(link);
link.click();

// Clean up the link
document.body.removeChild(link);
};

return (
<div className="stretch mx-auto flex w-5/6 flex-col items-center justify-center space-y-4 pb-10 md:w-3/4 lg:w-2/3">
<div className="mt-2 flex items-center justify-center">
<label
htmlFor="customFileUpload"
className="flex w-full cursor-pointer items-center rounded-lg border-2 border-dashed py-1 pl-2 "
>
<span id="pdfLabel" className="mr-2 whitespace-nowrap">
Upload Video
</span>
<input
type="file"
onChange={handleVideoUpload}
disabled={loading}
aria-labelledby="pdfLabel"
accept="video/*"
id="customFileUpload"
className="hidden"
/>
{video && (
<div className="line-clamp-2 pr-2 text-gray-600">{video.name}</div>
)}
</label>
</div>
<div className="flex w-full flex-col items-center justify-center sm:flex-row sm:space-x-4">
{video && (
<video
src={URL.createObjectURL(video)}
className={`mx-auto w-full sm:w-1/2 ${loading && 'blur-sm'}`}
controls
/>
)}
{subtitle && (
<div className="mt-3 flex w-full flex-col items-center justify-center space-y-3 px-3 sm:mt-0 sm:w-1/2">
<div className="text-sm">{imgUrl && <img src={imgUrl} />}</div>
{summary && (
<div className="mb-3 text-sm">
<span className="font-medium">Summary of video:</span> {summary}
</div>
)}
<div className="flex w-full flex-col justify-center space-y-2">
<button
className="mx-auto rounded-md border border-black px-3 py-1 font-medium xl:w-1/2"
onClick={downloadSubtitle}
>
Download Subtitles
</button>
<div
className="mx-auto flex cursor-pointer items-center justify-center space-x-2"
onClick={() => setIsDropdownOpen(!isDropdownOpen)}
>
<svg
className={`h-4 w-4 ${isDropdownOpen ? 'rotate-180' : ''}`}
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M19 9l-7 7-7-7"
/>
</svg>
<span className="text-center text-sm font-medium">
{isDropdownOpen ? 'Hide Subtitles' : 'Show Subtitles'}
</span>
</div>

<div
className="w-full"
style={{
maxHeight: isDropdownOpen ? '500px' : '0',
overflow: 'hidden',
transition: 'max-height 0.3s ease-in-out',
}}
>
{isDropdownOpen && (
<div className="bg-gray-100 p-2 text-sm">
{subtitle || 'No Subtitles'}
</div>
)}
</div>
</div>
</div>
)}
</div>
<button
onClick={getSubtitle}
disabled={loading}
className={`${
loading && 'bg-black text-white'
} rounded-md border border-black px-3 py-1 font-medium`}
>
{subtitle ? 'Reset' : loading ? 'Loading, 2-3 mins...' : 'Create '}
</button>
</div>
);
};

export default GenerateImageAndSubtitle;
1 change: 1 addition & 0 deletions ui/app/components/stacks/use-openai-assistant.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const roleToColorMap: Record<Message['role'], string> = {
function: 'blue',
assistant: 'green',
data: 'orange',
tool: '',
};

export default function Chat() {
Expand Down
1 change: 1 addition & 0 deletions ui/next.config.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/** @type {import('next').NextConfig} */

const nextConfig = {
async redirects() {
return [
Expand Down
Loading

0 comments on commit 185b86a

Please sign in to comment.