scale=0.5或者0.25时报错

```python
import os
import sys
import cv2
import math
import glob
import torch
import argparse
import warnings
import numpy as np
from time import time
from tqdm import tqdm
from padder import InputPadder # 填充到32的整数倍
from model.pytorch_msssim import ssim_matlab
from model.RIFE import Model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = Model()
model.load_model('train_log')
model.eval()
model.device()

def getXVFI(dir, multiple=8, t_step_size=32):
    """ make [I0,I1,It,t,scene_folder] """
    testPath = []
    t = np.linspace((1 / multiple), (1 - (1 / multiple)), (multiple - 1))
    for type_folder in sorted(glob.glob(os.path.join(dir, '*', ''))):
        for scene_folder in sorted(glob.glob(os.path.join(type_folder, '*', ''))):
            frame_folder = sorted(glob.glob(scene_folder + '*.png'))
            for idx in range(0, len(frame_folder), t_step_size):
                if idx == len(frame_folder) - 1:
                    break
                for mul in range(multiple - 1):
                    I0I1It_paths = []
                    I0I1It_paths.append(frame_folder[idx])
                    I0I1It_paths.append(frame_folder[idx + t_step_size])
                    I0I1It_paths.append(frame_folder[idx + int((t_step_size // multiple) * (mul + 1))])
                    I0I1It_paths.append(t[mul])
                    testPath.append(I0I1It_paths)
    return testPath


def _recursive_generator(model, frame1, frame2, scale, num_recursions):
    if num_recursions == 0:
        yield frame1
    else:
        with torch.no_grad():
            mid_frame = model.inference(frame1, frame2, scale=scale, TTA=True)
            yield from _recursive_generator(model, frame1, mid_frame, scale, num_recursions - 1)
            yield from _recursive_generator(model, mid_frame, frame2, scale, num_recursions - 1)


def test_XTEST(mode, path, model, device, save_result, save_dir):
    if save_result:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

    listFiles = getXVFI(path, multiple=8, t_step_size=32)
    count = 0
    input_frames = [item[:2] for item in listFiles][::7]
    gts = [item[2:] for item in listFiles]
    fltPsnr, fltSsim = [], []
    count = 0
    for intFrame in tqdm(input_frames):
        npyOne = np.array(cv2.imread(intFrame[0])).astype(np.float32) * (1.0 / 255.0)
        npyTwo = np.array(cv2.imread(intFrame[1])).astype(np.float32) * (1.0 / 255.0)
        gtFrames = gts[count*7:(count+1)*7]
        count += 1

        if mode == '2K':
            scale = 0.5
            npyOne = cv2.resize(src=npyOne, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            npyTwo = cv2.resize(src=npyTwo, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
        elif mode == '4K':
            scale = 0.25

        tenOne = torch.FloatTensor(np.ascontiguousarray(npyOne.transpose(2, 0, 1)[None, :, :, :])).to(device)
        tenTwo = torch.FloatTensor(np.ascontiguousarray(npyTwo.transpose(2, 0, 1)[None, :, :, :])).to(device)

        padder = InputPadder(tenOne.shape, 32)
        tenOne, tenTwo = padder.pad(tenOne, tenTwo)

        frames = list(_recursive_generator(model, tenOne, tenTwo, scale, 3))

        fltPsnr_single_testcase, fltSsim_single_testcase = [], []
        frames = frames[1:] 
        i = 0
        for frame in frames:
            tenEstimate = padder.unpad(frame[0])
            npyEstimate = (tenEstimate.detach().cpu().numpy().transpose(1, 2, 0) * 255.0).clip(0.0, 255.0).round().astype(np.uint8)

            if save_result:
                output_filename = os.path.join(save_dir, f"{(count-1) * 7 + i:04d}.png")  # 四位数字命名
                cv2.imwrite(output_filename, npyEstimate)

            tenEstimate = torch.FloatTensor(npyEstimate.transpose(2, 0, 1)[None, :, :, :]).to(device) / 255.0
            npyTruth = np.array(cv2.imread(gtFrames[i][0])).astype(np.float32) * (1.0 / 255.0)
            
            npyTruth = cv2.resize(src=npyTruth, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            if mode == '2K':
                npyTruth = cv2.resize(src=npyTruth, dsize=(2048, 1080), fx=0.0, fy=0.0, interpolation=cv2.INTER_AREA)
            tenGT = torch.FloatTensor(np.ascontiguousarray(npyTruth.transpose(2, 0, 1)[None, :, :, :])).to(device)

            fltPsnr_single_testcase.append(-10 * math.log10(torch.mean((tenEstimate - tenGT) * (tenEstimate - tenGT)).cpu().data))
            fltSsim_single_testcase.append(ssim_matlab(tenEstimate, tenGT).detach().cpu().numpy())
            i = i + 1
        fltPsnr.append(np.mean(fltPsnr_single_testcase))
        fltSsim.append(np.mean(fltSsim_single_testcase))
    print('PSNR: %.2f, SSIM: %.4f' % (np.mean(fltPsnr), np.mean(fltSsim)))


path ='/data/data/xtest'
save_result=True
mode = '4K'
save_dir = './XTEST_4K'
test_XTEST(mode, path, model, device, save_result, save_dir)
```

作者您好，我使用上述代码测试RIFE在X-TEST-L(2K以及4K)数据集上的性能，当设置scale为1时,可以正常运行。当设置scale为0.25或者0.25时，就会报错：
```python
(/data/env/myvfi) (RIFE) (base) lzj7@ins-7df6eojsifuykig9:/data/MyVFI/ECCV2022-RIFE/benchmark$ python XTEST_L.py 
  7%|████████████▏                                                                                                                                                                         | 1/15 [00:05<01:20,  5.77s/it]
Traceback (most recent call last):
  File "/data/MyVFI/ECCV2022-RIFE/benchmark/XTEST_L.py", line 99, in <module>
    test_XTEST(mode, path, model, device, save_result, save_dir)
  File "/data/MyVFI/ECCV2022-RIFE/benchmark/XTEST_L.py", line 80, in test_XTEST
    tenEstimate = model.inference(tenOne, tenTwo, scale=scale, TTA=True)[0]
  File "/data/MyVFI/ECCV2022-RIFE/model/RIFE.py", line 60, in inference
    flow, mask, merged, flow_teacher, merged_teacher, loss_distill = self.flownet(imgs, scale_list, timestep=timestep)
  File "/data/env/myvfi/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/data/MyVFI/ECCV2022-RIFE/model/IFNet.py", line 77, in forward
    flow_d, mask_d = stu[i](torch.cat((img0, img1, warped_img0, warped_img1, mask), 1), flow, scale=scale[i])
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 2176 but got size 2304 for tensor number 2 in the list.
```

奇怪的是，第一组数据成功跑出了结果，到第二组测试数据就开始报错了。使用上述代码对XTEST以及Xiph数据集测试时，只要scale设为0.25或者0.5，就会遇到上述问题。

经过分析，是IFNet.py中flow的shape发生了变化:
<img width="965" alt="image" src="https://github.com/user-attachments/assets/25fd5fd9-9042-47af-b6db-39a4bd8e3d9e" />



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

scale=0.5或者0.25时报错 #385

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

scale=0.5或者0.25时报错 #385

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions