use trust remote code per transformers docs

This commit is contained in:
Josh Hawkins 2024-10-09 13:55:34 -05:00
parent 420ca5a92f
commit 896dad5cc9

View File

@ -75,6 +75,7 @@ class GenericONNXEmbedding:
logger.info(f"Downloading {self.model_name} tokenizer") logger.info(f"Downloading {self.model_name} tokenizer")
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
self.model_name, self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer", cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer",
clean_up_tokenization_spaces=True, clean_up_tokenization_spaces=True,
) )
@ -84,6 +85,7 @@ class GenericONNXEmbedding:
logger.info(f"Downloading {self.model_name} feature extractor") logger.info(f"Downloading {self.model_name} feature extractor")
feature_extractor = AutoFeatureExtractor.from_pretrained( feature_extractor = AutoFeatureExtractor.from_pretrained(
self.model_name, self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor", cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor",
) )
feature_extractor.save_pretrained(path) feature_extractor.save_pretrained(path)
@ -119,7 +121,10 @@ class GenericONNXEmbedding:
def _load_tokenizer(self): def _load_tokenizer(self):
tokenizer_path = os.path.join(f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer") tokenizer_path = os.path.join(f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer")
return AutoTokenizer.from_pretrained( return AutoTokenizer.from_pretrained(
self.model_name, cache_dir=tokenizer_path, clean_up_tokenization_spaces=True self.model_name,
cache_dir=tokenizer_path,
trust_remote_code=True,
clean_up_tokenization_spaces=True,
) )
def _load_feature_extractor(self): def _load_feature_extractor(self):
@ -127,7 +132,7 @@ class GenericONNXEmbedding:
f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor" f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor"
) )
return AutoFeatureExtractor.from_pretrained( return AutoFeatureExtractor.from_pretrained(
self.model_name, cache_dir=feature_extractor_path self.model_name, trust_remote_code=True, cache_dir=feature_extractor_path
) )
def _load_model(self, path: str, providers: List[str]): def _load_model(self, path: str, providers: List[str]):