import os import torch from torch import nn from transformers import AutoModel, AutoTokenizer from huggingface_hub import hf_hub_download class ClipTextModel(nn.Module): def __init__(self, model_name_or_path, device=None): super(ClipTextModel, self).__init__() if os.path.exists(model_name_or_path): # load from file system output_linear_state_dict = torch.load(os.path.join(model_name_or_path, "outp