import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
# 加载数据
data = pd.read_csv('student_behavior.csv')
X = data[['course_taken', 'activity_participated']]
y = data['recommended_course']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# 训练模型
model = RandomForestClassifier()
model.fit(X_train, y_train)
# 预测
predictions = model.predict(X_test)
]]>
import spacy
nlp = spacy.load("en_core_web_sm")
def answer_question(question):
doc = nlp(question)
# 简单示例:检查问题是否包含特定关键词
if "course" in question:
return "The course schedule can be found on the main page."
elif "library" in question:
return "The library hours are from 8 AM to 10 PM."
else:
return "I'm sorry, I don't have an answer for that."
# 示例
print(answer_question("What time does the library close?"))
]]>
import matplotlib.pyplot as plt
# 数据分析
grades = data['grade']
course_preferences = data.groupby('grade')['course_taken'].count()
# 可视化
plt.bar(course_preferences.index, course_preferences.values)
plt.xlabel('Grade')
plt.ylabel('Number of Courses')
plt.title('Course Preferences by Grade')
plt.show()
]]>