这是用于酒店评论(例如正面或负面)的情感分析代码示例。我使用 pandas、transformers、datasets、turkish_lm_tuner 库。首先,我认为路径名称(C:\Users\Ata Onur
这是用于酒店评论(例如正面或负面)的情感分析代码示例。我使用了 pandas、transformers、datasets 和 turkish_lm_tuner 库。首先,我认为路径名称(C:\Users\Ata Onur Özdemir,所以是“Ö”字母)和我已更改,但它没有修复,输出给出相同的错误。其次,我在输出文件夹中创建了 init .py,但这种方式给出了相同的错误。而且我在环境变量中定义了路径,但相同的错误再次出现。我搜索了 google 和 ,但我找不到任何方法,我尝试了很多方法,但我没有找到。请帮助我 :) 我搜索了 google 和 ,但我找不到任何方法,我尝试了很多方法,但我没有找到。请帮助我 :)
import os
import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset
from turkish_lm_tuner import TrainerForClassification, EvaluatorForClassification
# Load the data
data = pd.read_csv('../Emotion_Detection/Hotel_readablee.csv')
# Define the output directory
output_dir = 'C:\\Users\\Ata Onur Özdemir\\PycharmProjects\\Emotion_Detection\\output'
# Check if the output directory exists
if os.path.exists(output_dir):
# Check the contents of the directory
print(f"Contents of {output_dir} directory:")
print(os.listdir(output_dir))
# Rename the directory
new_output_dir = output_dir + "_old"
os.rename(output_dir, new_output_dir)
print(f"{output_dir} directory has been renamed to {new_output_dir}.")
# Create a new output directory
os.makedirs(output_dir)
print(f"New {output_dir} directory created.")
else:
# If the directory does not exist, create it
os.makedirs(output_dir)
print(f"{output_dir} directory created.")
# Initialize the tokenizer
model_name = "boun-tabi-LMG/TURNA"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Define the CustomDatasetProcessor class and other necessary steps
class CustomDatasetProcessor:
def __init__(self, tokenizer, max_input_length):
self.tokenizer = tokenizer
self.max_input_length = max_input_length
def load_and_preprocess_data(self, data):
dataset = Dataset.from_pandas(data)
def preprocess_function(examples):
# Convert each review text to string type
positive_reviews = [str(review) for review in examples['Positive_Review_Tr']]
negative_reviews = [str(review) for review in examples['Negative_Review_Tr']]
# Use the tokenizer correctly
tokenized_reviews = self.tokenizer(
positive_reviews,
negative_reviews,
truncation=True,
padding='max_length',
max_length=self.max_input_length,
return_tensors='pt' # Return PyTorch tensors
)
return tokenized_reviews
tokenized_dataset = dataset.map(preprocess_function, batched=True)
return tokenized_dataset
# Initialize the dataset processor
dataset_processor = CustomDatasetProcessor(tokenizer, max_input_length=2048)
# Split the data into training, validation, and test sets
train_data = data.sample(frac=0.8, random_state=42)
remaining_data = data.drop(train_data.index)
validation_data = remaining_data.sample(frac=0.5, random_state=42)
test_data = remaining_data.drop(validation_data.index)
# Preprocess the datasets
train_dataset = dataset_processor.load_and_preprocess_data(train_data)
eval_dataset = dataset_processor.load_and_preprocess_data(validation_data)
test_dataset = dataset_processor.load_and_preprocess_data(test_data)
# Training parameters
training_params = {
'num_train_epochs': 10,
'per_device_train_batch_size': 4,
'per_device_eval_batch_size': 4,
'output_dir': output_dir,
'evaluation_strategy': 'epoch',
'save_strategy': 'epoch',
}
# Optimizer parameters
optimizer_params = {
'optimizer_type': 'adafactor',
'scheduler': False,
}
# Test parameters
test_params = {
'per_device_eval_batch_size': 4,
'output_dir': output_dir,
}
num_labels = 4 # Assuming you are performing binary classification
# Initialize TrainerForClassification
model_trainer = TrainerForClassification(
model_name=model_name,
num_labels=num_labels,
task='classification',
optimizer_params=optimizer_params,
training_params=training_params,
model_save_path="hotel_reviews_classification_model",
test_params=test_params
)
# Train and evaluate the model
trainer, model = model_trainer.train_and_evaluate(train_dataset, eval_dataset, test_dataset)
# Save the trained model and tokenizer
model.save_pretrained("hotel_reviews_classification_model")
tokenizer.save_pretrained("hotel_reviews_classification_model")
# Evaluate the model using EvaluatorForClassification
evaluator = EvaluatorForClassification(
model_save_path="hotel_reviews_classification_model",
model_name=model_name,
task='classification',
test_params=test_params,
num_labels=num_labels
)
# Evaluate the model on the test dataset
results = evaluator.evaluate_model(test_dataset)
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)
# Save the results to a new CSV file
results_df.to_csv('evaluation_results.csv', index=False)
print("Evaluation results saved to evaluation_results.csv.")
# Check the current working directory
print("Current Working Directory:", os.getcwd())
系统出现此错误:
Traceback (most recent call last):
File "C:\Users\Ata Onur Özdemir\PycharmProjects\Emotion_Detection\main.py", line 101, in <module>
trainer, model = model_trainer.train_and_evaluate(train_dataset, eval_dataset, test_dataset)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\turkish_lm_tuner\trainer.py", line 195, in train_and_evaluate
trainer.train()
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer.py", line 1885, in train
return inner_training_loop(
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer.py", line 2147, in _inner_training_loop
self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer_callback.py", line 454, in on_train_begin
return self.call_event("on_train_begin", args, state, control)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer_callback.py", line 498, in call_event
result = getattr(callback, event)(
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\integrations\integration_utils.py", line 629, in on_train_begin
self._init_summary_writer(args, log_dir)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\integrations\integration_utils.py", line 615, in _init_summary_writer
self.tb_writer = self._SummaryWriter(log_dir=log_dir)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 249, in __init__
self._get_file_writer()
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 281, in _get_file_writer
self.file_writer = FileWriter(
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 75, in __init__
self.event_writer = EventFileWriter(
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\tensorboard\summary\writer\event_file_writer.py", line 72, in __init__
tf.io.gfile.makedirs(logdir)
File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\tensorflow\python\lib\io\file_io.py", line 513, in recursive_create_dir_v2
_pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))
tensorflow.python.framework.errors_impl.FailedPreconditionError: C:\Users\Ata Onur Özdemir\PycharmProjects\Emotion_Detection\output is not a directory
D:\TEMP>tree PycharmProjects文件夹路径列表卷序列号为 A544-D3FBD:\TEMP\PYCHARMPROJECTS路径无效 - \TEMP\PYCHARMPROJECTS不存在子文件夹