use trust remote code per transformers docs

This commit is contained in:
Josh Hawkins 2024-10-09 13:55:34 -05:00
parent 420ca5a92f
commit 896dad5cc9

View File

@ -75,6 +75,7 @@ class GenericONNXEmbedding:
logger.info(f"Downloading {self.model_name} tokenizer")
tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer",
clean_up_tokenization_spaces=True,
)
@ -84,6 +85,7 @@ class GenericONNXEmbedding:
logger.info(f"Downloading {self.model_name} feature extractor")
feature_extractor = AutoFeatureExtractor.from_pretrained(
self.model_name,
trust_remote_code=True,
cache_dir=f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor",
)
feature_extractor.save_pretrained(path)
@ -119,7 +121,10 @@ class GenericONNXEmbedding:
def _load_tokenizer(self):
tokenizer_path = os.path.join(f"{MODEL_CACHE_DIR}/{self.model_name}/tokenizer")
return AutoTokenizer.from_pretrained(
self.model_name, cache_dir=tokenizer_path, clean_up_tokenization_spaces=True
self.model_name,
cache_dir=tokenizer_path,
trust_remote_code=True,
clean_up_tokenization_spaces=True,
)
def _load_feature_extractor(self):
@ -127,7 +132,7 @@ class GenericONNXEmbedding:
f"{MODEL_CACHE_DIR}/{self.model_name}/feature_extractor"
)
return AutoFeatureExtractor.from_pretrained(
self.model_name, cache_dir=feature_extractor_path
self.model_name, trust_remote_code=True, cache_dir=feature_extractor_path
)
def _load_model(self, path: str, providers: List[str]):