-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
paraphrasing_with_transformers_pythoncode.py
67 lines (49 loc) · 2.37 KB
/
paraphrasing_with_transformers_pythoncode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""Paraphrasing-with-Transformers_PythonCode.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1bPfvSF7bJqDfw9ZMgfIZPd1Bk-fW7AJY
"""
!pip install transformers sentencepiece
from transformers import *
# models we gonna use for this tutorial
model_names = [
"tuner007/pegasus_paraphrase",
"Vamsi/T5_Paraphrase_Paws",
"prithivida/parrot_paraphraser_on_T5", # Parrot
]
model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")
def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
# tokenize the text to be form of a list of token IDs
inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
# generate the paraphrased sentences
outputs = model.generate(
**inputs,
num_beams=num_beams,
num_return_sequences=num_return_sequences,
)
# decode the generated sentences using the tokenizer to get them back to text
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
sentence = "Learning is the process of acquiring new understanding, knowledge, behaviors, skills, values, attitudes, and preferences."
get_paraphrased_sentences(model, tokenizer, sentence, num_beams=10, num_return_sequences=10)
get_paraphrased_sentences(model, tokenizer, "To paraphrase a source, you have to rewrite a passage without changing the meaning of the original text.", num_beams=10, num_return_sequences=10)
tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
get_paraphrased_sentences(model, tokenizer, "paraphrase: " + "One of the best ways to learn is to teach what you've already learned")
!pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git
from parrot import Parrot
parrot = Parrot()
phrases = [
sentence,
"One of the best ways to learn is to teach what you've already learned",
"Paraphrasing is the process of coming up with someone else's ideas in your own words"
]
for phrase in phrases:
print("-"*100)
print("Input_phrase: ", phrase)
print("-"*100)
paraphrases = parrot.augment(input_phrase=phrase)
if paraphrases:
for paraphrase in paraphrases:
print(paraphrase)