import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def getXY(A):
    x = A[:, :-1]
    y = A[:, len(A[0])-1]
    return x, y

def getAb_CauA(x, y):
    #  y = theta_1*x_1 + theta_2*x_2 + ... + theta_11*x_11
    A = x
    b = y.reshape(len(y), 1)
    return A, b

def Cau_A():
    A = getAb_CauA(X, Y)[0]
    B = getAb_CauA(X, Y)[1]
    x_hat = np.linalg.inv(A.T @ A) @ (A.T @ B)  # find theta hat
    r = np.linalg.norm(A@x_hat - B)
    print("a. Sử dụng toàn bộ 11 đặc trưng đề bài cung cấp:")
    print("Sử dụng hàm f(x) = theta1*x1 + theta2*x2 + ... + theta11*x11")
    print("Giá trị chuẩn vector phần dư là:", r)

def getAb_CauB(x, y, i):
    #  y = theta_i * xi
    A = x[:, i].reshape(len(x), 1)
    b = y.reshape(len(y), 1)
    return A, b

def Cau_B():
    print("b. Sử dụng duy nhất 1 đặc trưng cho kết quả tốt nhất:")
    print("Sử dụng hàm f(x) = theta1*x1 + theta2*x2 + ... + theta11*x11")
    min_r = float('inf')
    index = 0
    for i in range(11):
        print("* Đặc trưng " + df.columns[i] + ":")
        A = getAb_CauB(X, Y, i)[0]
        B = getAb_CauB(X, Y, i)[1]
        x_hat = np.linalg.inv(A.T @ A) @ (A.T @ B)  # find theta hat
        r = np.linalg.norm(A@x_hat - B)
        print("Giá trị chuẩn vector phần dư là:", r)
        if r < min_r:
            min_r = r
            index = i
    print("\nGiá trị chuẩn vector phần dư tốt nhất là:", min_r, "ứng với đặc trưng", df.columns[index])


if __name__ == '__main__':
    df = pd.read_csv('wine.csv', sep=';')  # read csv file
    arr = df.to_numpy()  # convert dataframe to numpy array
    X = getXY(arr)[0]  # get X from the array (eleven features)
    Y = getXY(arr)[1]  # get Y from the array (quality)
    Cau_A()
    Cau_B()