  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.svm import SVC # Support Vector Classifier
  4. from sklearn.preprocessing import StandardScaler
  5. from sklearn.metrics import accuracy_score, classification_report
  6. from sklearn.decomposition import TruncatedSVD
  7. from ydata_profiling import ProfileReport
  8. from sklearn.metrics import mean_squared_error
  9. import time
  10. import seaborn as sns
  11. from importlib import reload
  12. import matplotlib.pyplot as plt
  13. import matplotlib
  14. import warnings
  15. from IPython.display import display, HTML
  16. import plotly.graph_objects as go
  17. import plotly.express as px
  18. from plotly.subplots import make_subplots
  19. import plotly.io as pio
  20. # Configure Jupyter Notebook
  21. pd.set_option('display.max_columns', None)
  22. pd.set_option('display.max_rows', 500)
  23. pd.set_option('display.expand_frame_repr', False)
  24. display(HTML("<style>div.output_scroll { height: 35em; }</style>"))
dataset = pd.read_csv('Battery_RUL.csv')
  1. profile = ProfileReport(dataset)
  2. profile
y = dataset['RUL']
x = dataset.drop(columns=['RUL'])
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Singular Value Decomposition

  1. # Step 5: Initialize and fit TruncatedSVD to your training data
  2. n_components = 6 # Adjust the number of components based on your desired dimensionality
  3. svd = TruncatedSVD(n_components=n_components, random_state=42)
  4. X_train_svd = svd.fit_transform(X_train)
  5. # Step 6: Transform the test data using the fitted SVD
  6. X_test_svd = svd.transform(X_test)


  1. from sklearn.neighbors import KNeighborsRegressor
  2. start = time.time()
  3. model = KNeighborsRegressor(n_neighbors=3).fit(X_train_svd,y_train)
  4. end_train = time.time()
  5. y_predictions = model.predict(X_test_svd) # These are the predictions from the test data.
  6. end_predict = time.time()
  7. kNN = [model.score(X_test_svd,y_test),
  8. mean_squared_error(y_test,y_predictions,squared=False),
  9. end_train-start,
  10. end_predict-end_train,
  11. end_predict-start]
  12. print('R-squared error: '+ "{:.2%}".format(model.score(X_test_svd,y_test)))
  13. print('Root Mean Squared Error: '+ "{:.2f}".format(mean_squared_error(y_test,y_predictions,squared=False)))
  1. R-squared error: 98.93%
  2. Root Mean Squared Error: 33.30
  1. plt.style.use('seaborn-white')
  2. plt.rcParams['figure.figsize']=5,5
  3. fig,ax = plt.subplots()
  4. plt.title('Actual vs Predicted')
  5. plt.xlabel('Actual')
  6. plt.ylabel('Predicted')
  7. g = sns.scatterplot(x=y_test,
  8. y=y_predictions,
  9. s=20,
  10. alpha=0.6,
  11. linewidth=1,
  12. edgecolor='black',
  13. ax=ax)
  14. f = sns.lineplot(x=[min(y_test),max(y_test)],
  15. y=[min(y_test),max(y_test)],
  16. linewidth=4,
  17. color='gray',
  18. ax=ax)
  19. plt.annotate(text=('R-squared error: '+ "{:.2%}".format(model.score(X_test_svd,y_test)) +'\n' +
  20. 'Root Mean Squared Error: '+ "{:.2f}".format(mean_squared_error(y_test,y_predictions,squared=False))),
  21. xy=(0,800),
  22. size='medium')
  23. xlabels = ['{:,.0f}'.format(x) for x in g.get_xticks()]
  24. g.set_xticklabels(xlabels)
  25. ylabels = ['{:,.0f}'.format(x) for x in g.get_yticks()]
  26. g.set_yticklabels(ylabels)
  27. sns.despine()

Random Forest

  1. %%time
  2. from sklearn.ensemble import RandomForestRegressor
  3. start = time.time()
  4. model = RandomForestRegressor(n_jobs=-1,
  5. n_estimators=100,
  6. min_samples_leaf=1,
  7. max_features='sqrt',
  8. # min_samples_split=2,
  9. bootstrap = True,
  10. criterion='mse',
  11. ).fit(X_train_svd,y_train)
  12. end_train = time.time()
  13. y_predictions = model.predict(X_test_svd) # These are the predictions from the test data.
  14. end_predict = time.time()
  15. Random_Forest = [model.score(X_test_svd,y_test),
  16. mean_squared_error(y_test,y_predictions,squared=False),
  17. end_train-start,
  18. end_predict-end_train,
  19. end_predict-start]
  20. print('R-squared error: '+ "{:.2%}".format(model.score(X_test_svd,y_test)))
  21. print('Root Mean Squared Error: '+ "{:.2f}".format(mean_squared_error(y_test,y_predictions,squared=False)))
  1. R-squared error: 99.75%
  2. Root Mean Squared Error: 15.97
  3. CPU times: total: 3.34 s
  4. Wall time: 389 ms
  1. plt.style.use('seaborn-white')
  2. plt.rcParams['figure.figsize']=5,5
  3. fig,ax = plt.subplots()
  4. plt.title('Actual vs Predicted')
  5. plt.xlabel('Actual')
  6. plt.ylabel('Predicted')
  7. g = sns.scatterplot(x=y_test,
  8. y=y_predictions,
  9. s=20,
  10. alpha=0.6,
  11. linewidth=1,
  12. edgecolor='black',
  13. ax=ax)
  14. f = sns.lineplot(x=[min(y_test),max(y_test)],
  15. y=[min(y_test),max(y_test)],
  16. linewidth=4,
  17. color='gray',
  18. ax=ax)
  19. plt.annotate(text=('R-squared error: '+ "{:.2%}".format(model.score(X_test_svd,y_test)) +'\n' +
  20. 'Root Mean Squared Error: '+ "{:.2f}".format(mean_squared_error(y_test,y_predictions,squared=False))),
  21. xy=(0,800),
  22. size='medium')
  23. xlabels = ['{:,.0f}'.format(x) for x in g.get_xticks()]
  24. g.set_xticklabels(xlabels)
  25. ylabels = ['{:,.0f}'.format(x) for x in g.get_yticks()]
  26. g.set_yticklabels(ylabels)
  27. sns.despine()

