-
Notifications
You must be signed in to change notification settings - Fork 42
/
semantic_coding.py
126 lines (106 loc) · 5.5 KB
/
semantic_coding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from diffusers import UnCLIPPipeline, DiffusionPipeline
import torch
import os
from lora_diffusion.cli_lora_pti import train
from PIL import Image
import numpy as np
def semantic_karlo(prompt, output_dir, num_initial_image, bg_preprocess, seed):
pipe = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha", torch_dtype=torch.float16)
pipe = pipe.to('cuda')
view_prompt=["front view of ","overhead view of ","side view of ", "back view of "]
if bg_preprocess:
# Please refer to the code at https://github.com/Ir1d/image-background-remove-tool.
import cv2
from carvekit.api.high import HiInterface
interface = HiInterface(object_type="object",
batch_size_seg=5,
batch_size_matting=1,
device='cuda' if torch.cuda.is_available() else 'cpu',
seg_mask_size=640, # Use 640 for Tracer B7 and 320 for U2Net
matting_mask_size=2048,
trimap_prob_threshold=231,
trimap_dilation=30,
trimap_erosion_iters=5,
fp16=False)
generator = torch.Generator(device='cuda').manual_seed(seed)
for i in range(num_initial_image):
t=", white background" if bg_preprocess else ", white background"
if i==0:
prompt_ = f"{view_prompt[i%4]}{prompt}{t}"
else:
prompt_ = f"{view_prompt[i%4]}{prompt}"
image = pipe(prompt_, generator=generator).images[0]
fn=f"instance{i}.png"
os.makedirs(output_dir,exist_ok=True)
if bg_preprocess:
# motivated by NeuralLift-360 (removing bg), and Zero-1-to-3 (removing bg and object-centering)
# NOTE: This option was added during the code orgranization process.
# The results reported in the paper were obtained with [bg_preprocess: False] setting.
img_without_background = interface([image])
mask = np.array(img_without_background[0]) > 127
image = np.array(image)
image[~mask] = [255., 255., 255.]
# x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
# image = image[y:y+h, x:x+w, :]
image = Image.fromarray(np.array(image))
image.save(os.path.join(output_dir,fn))
def semantic_sd(prompt, output_dir, num_initial_image, bg_preprocess, seed):
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
pipe = pipe.to('cuda')
view_prompt=["front view of ","overhead view of ","side view of ", "back view of "]
if bg_preprocess:
# Please refer to the code at https://github.com/Ir1d/image-background-remove-tool.
import cv2
from carvekit.api.high import HiInterface
interface = HiInterface(object_type="object",
batch_size_seg=5,
batch_size_matting=1,
device='cuda' if torch.cuda.is_available() else 'cpu',
seg_mask_size=640, # Use 640 for Tracer B7 and 320 for U2Net
matting_mask_size=2048,
trimap_prob_threshold=231,
trimap_dilation=30,
trimap_erosion_iters=5,
fp16=False)
generator = torch.Generator(device='cuda').manual_seed(seed)
for i in range(num_initial_image):
t=", white background" if bg_preprocess else ", white background"
if i==0:
prompt_ = f"{view_prompt[i%4]}{prompt}{t}"
else:
prompt_ = f"{view_prompt[i%4]}{prompt}"
image = pipe(prompt_, generator=generator).images[0]
fn=f"instance{i}.png"
os.makedirs(output_dir,exist_ok=True)
if bg_preprocess:
# motivated by NeuralLift-360 (removing bg), and Zero-1-to-3 (removing bg and object-centering)
# NOTE: This option was added during the code orgranization process.
# The results reported in the paper were obtained with [bg_preprocess: False] setting.
img_without_background = interface([image])
mask = np.array(img_without_background[0]) > 127
image = np.array(image)
image[~mask] = [255., 255., 255.]
# x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
# image = image[y:y+h, x:x+w, :]
image = Image.fromarray(np.array(image))
image.save(os.path.join(output_dir,fn))
def semantic_coding(exp_dir,cfgs,sd,initial):
ti_step=cfgs.pop('ti_step')
pt_step=cfgs.pop('pt_step')
# semantic_model=cfgs.pop('semantic_model')
prompt=cfgs['sd']['prompt']
instance_dir=os.path.join(exp_dir,'initial_image')
weight_dir=os.path.join(exp_dir,'lora')
if initial=="":
initial=None
train(pretrained_model_name_or_path='runwayml/stable-diffusion-v1-5',\
instance_data_dir=instance_dir,output_dir=weight_dir,gradient_checkpointing=True,\
scale_lr=True,lora_rank=1,cached_latents=False,save_steps=ti_step,\
max_train_steps_ti=ti_step,max_train_steps_tuning=pt_step, use_template="object",\
lr_warmup_steps=0, lr_warmup_steps_lora=100, placeholder_tokens="<0>", initializer_tokens=initial,\
continue_inversion=True, continue_inversion_lr=1e-4,device="cuda:0",
)
if initial is not None:
sd.prompt=prompt.replace(initial,'<0>')
else:
sd.prompt="a <0>"