Changes to be committed:

GoooHi · GoooHi · commit ae9dba93066a · 2023-05-19T15:33:41.000+08:00
modified:   ppdiffusers/examples/controlnet/README.md
	new file:   ppdiffusers/examples/controlnet/annotator/shuffle/__init__.py
	modified:   ppdiffusers/examples/controlnet/annotator/util.py
	renamed:    ppdiffusers/examples/controlnet/gradio_mlsd2image.py -&gt; ppdiffusers/examples/controlnet/gradio_shuffle2image.py
diff --git a/ppdiffusers/examples/controlnet/README.md b/ppdiffusers/examples/controlnet/README.md
@@ -75,12 +75,12 @@ python gradio_ip2p2image.py
 ```
 ![image](https://github.com/Submerge-Gu/Images/raw/main/4.png)
 
-## MLSD to Image
-(ControlNet V1.1) 在原基础上进行升级，采用HoughLine检测图片作为控制条件
+## Shuffle to Image
+(ControlNet V1.1) Shuffle打乱图像进行重构。
 ```
-python gradio_mlsd2image.py
+python gradio_shuffle2image.py
 ```
-![image](https://github.com/Submerge-Gu/Images/raw/main/8.png)
+![image](https://github.com/Submerge-Gu/Images/raw/main/control.png)
 
 # ControlNet模型训练
 
diff --git a/ppdiffusers/examples/controlnet/annotator/shuffle/__init__.py b/ppdiffusers/examples/controlnet/annotator/shuffle/__init__.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+import cv2
+import numpy as np
+from annotator.util import img2mask, make_noise_disk
+
+
+class ContentShuffleDetector:
+    def __call__(self, img, h=None, w=None, f=None):
+        H, W, C = img.shape
+        if h is None:
+            h = H
+        if w is None:
+            w = W
+        if f is None:
+            f = 256
+        x = make_noise_disk(h, w, 1, f) * float(W - 1)
+        y = make_noise_disk(h, w, 1, f) * float(H - 1)
+        flow = np.concatenate([x, y], axis=2).astype(np.float32)
+        return cv2.remap(img, flow, None, cv2.INTER_LINEAR)
+
+
+class ColorShuffleDetector:
+    def __call__(self, img):
+        H, W, C = img.shape
+        F = random.randint(64, 384)
+        A = make_noise_disk(H, W, 3, F)
+        B = make_noise_disk(H, W, 3, F)
+        C = (A + B) / 2.0
+        A = (C + (A - C) * 3.0).clip(0, 1)
+        B = (C + (B - C) * 3.0).clip(0, 1)
+        L = img.astype(np.float32) / 255.0
+        Y = A * L + B * (1 - L)
+        Y -= np.min(Y, axis=(0, 1), keepdims=True)
+        Y /= np.maximum(np.max(Y, axis=(0, 1), keepdims=True), 1e-5)
+        Y *= 255.0
+        return Y.clip(0, 255).astype(np.uint8)
+
+
+class GrayDetector:
+    def __call__(self, img):
+        eps = 1e-5
+        X = img.astype(np.float32)
+        r, g, b = X[:, :, 0], X[:, :, 1], X[:, :, 2]
+        kr, kg, kb = [random.random() + eps for _ in range(3)]
+        ks = kr + kg + kb
+        kr /= ks
+        kg /= ks
+        kb /= ks
+        Y = r * kr + g * kg + b * kb
+        Y = np.stack([Y] * 3, axis=2)
+        return Y.clip(0, 255).astype(np.uint8)
+
+
+class DownSampleDetector:
+    def __call__(self, img, level=3, k=16.0):
+        h = img.astype(np.float32)
+        for _ in range(level):
+            h += np.random.normal(loc=0.0, scale=k, size=h.shape)
+            h = cv2.pyrDown(h)
+        for _ in range(level):
+            h = cv2.pyrUp(h)
+            h += np.random.normal(loc=0.0, scale=k, size=h.shape)
+        return h.clip(0, 255).astype(np.uint8)
+
+
+class Image2MaskShuffleDetector:
+    def __init__(self, resolution=(640, 512)):
+        self.H, self.W = resolution
+
+    def __call__(self, img):
+        m = img2mask(img, self.H, self.W)
+        m *= 255.0
+        return m.clip(0, 255).astype(np.uint8)
diff --git a/ppdiffusers/examples/controlnet/annotator/util.py b/ppdiffusers/examples/controlnet/annotator/util.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import os
+import random
 
 import cv2
 import numpy as np
@@ -51,3 +52,31 @@ def resize_image(input_image, resolution):
     W = int(np.round(W / 64.0)) * 64
     img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
     return img
+
+
+def make_noise_disk(H, W, C, F):
+    noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
+    noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
+    noise = noise[F : F + H, F : F + W]
+    noise -= np.min(noise)
+    noise /= np.max(noise)
+    if C == 1:
+        noise = noise[:, :, None]
+    return noise
+
+
+def img2mask(img, H, W, low=10, high=90):
+    assert img.ndim == 3 or img.ndim == 2
+    assert img.dtype == np.uint8
+
+    if img.ndim == 3:
+        y = img[:, :, random.randrange(0, img.shape[2])]
+    else:
+        y = img
+
+    y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC)
+
+    if random.uniform(0, 1) < 0.5:
+        y = 255 - y
+
+    return y < np.percentile(y, random.randrange(low, high))
diff --git a/ppdiffusers/examples/controlnet/gradio_shuffle2image.py b/ppdiffusers/examples/controlnet/gradio_shuffle2image.py
@@ -17,15 +17,15 @@
 
 import gradio as gr
 import paddle
-from annotator.mlsd import MLSDdetector
+from annotator.shuffle import ContentShuffleDetector
 from annotator.util import HWC3, resize_image
 
 from paddlenlp.trainer import set_seed as seed_everything
 from ppdiffusers import ControlNetModel, StableDiffusionControlNetPipeline
 
-apply_mlsd = MLSDdetector()
+apply_shuffle = ContentShuffleDetector()
 
-controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_mlsd")
+controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11e_sd15_shuffle")
 pipe = StableDiffusionControlNetPipeline.from_pretrained(
     "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
 )
@@ -44,21 +44,16 @@ def process(
     scale,
     seed,
     eta,
-    value_threshold,
-    distance_threshold,
 ):
     with paddle.no_grad():
         img = resize_image(HWC3(input_image), image_resolution)
         H, W, C = img.shape
-        detected_map = apply_mlsd(img, value_threshold, distance_threshold)
-        detected_map = HWC3(detected_map)
+        detected_map = apply_shuffle(img, w=W, h=H, f=256)
 
         control = paddle.to_tensor(detected_map.copy(), dtype=paddle.float32) / 255.0
         control = control.unsqueeze(0).transpose([0, 3, 1, 2])
 
-        control_scales = (
-            [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13)
-        )  # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01
+        control_scales = [strength] * 13
         if seed == -1:
             seed = random.randint(0, 65535)
         seed_everything(seed)
@@ -83,7 +78,7 @@ def process(
 block = gr.Blocks().queue()
 with block:
     with gr.Row():
-        gr.Markdown("## Control Stable Diffusion with MLSD Lines")
+        gr.Markdown("## Control Stable Diffusion with Content Shuffle")
     with gr.Row():
         with gr.Column():
             input_image = gr.Image(source="upload", type="numpy")
@@ -94,12 +89,6 @@ def process(
                 image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
                 strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
                 guess_mode = gr.Checkbox(label="Guess Mode", value=False)
-                value_threshold = gr.Slider(
-                    label="Hough value threshold (MLSD)", minimum=0.01, maximum=2.0, value=0.1, step=0.01
-                )
-                distance_threshold = gr.Slider(
-                    label="Hough ditance threshold (MLSD)", minimum=0.01, maximum=20.0, value=0.1, step=0.01
-                )
                 ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
                 scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
                 seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
@@ -126,9 +115,8 @@ def process(
         scale,
         seed,
         eta,
-        value_threshold,
-        distance_threshold,
     ]
     run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
 
+
 block.launch(server_name="0.0.0.0", server_port=8513)