############################## # Define matrix as column (document) array A = matrix ( c( c (1, 2, 3, 4, 5), # First document c (3, 2, 1, 4, 3), c (4, 2, 3, 1, 2), c (2, 2, 4, 3, 5), c (4, 4, 3, 2, 1), c (3, 4, 3, 4, 4)), # Sixth document nrow = 5, # terms ncol = 6) # documents # Print matrix A A # Singular value decomposition SVD = svd (A) # Sigmas are positive and non-decreasing S = diag(SVD$d) # Sigma S1 = diag((SVD$d)^-1) # Ita inverse U = SVD$u V = SVD$v # Print the matrices S U V # Check the column orthonormality of U and V t(U) %*% U t(V) %*% V # Check that A is reproduced U %*% S %*% t(V) ########################################## # # Handle a query # # Define a query as a document q = c (3, 2, 3, 5, 4) # Compute and print direct document similarity to q Sim = t(A) %*% q Sim # Convert query q into SVD frame q1 = S1 %*% t(U) %*% q # Print new coordinates q1 # check that similarity is the same as above V %*% S^2 %*% q1 ####################################### # # LSI # # Reduce rank to k terms k = 3 # Cut matrices off Uk = U[,1:k] Vk = V[,1:k] Sk = diag(SVD$d[1:k]) Sk1 = diag((SVD$d[1:k])^-1) # Print new matrices Uk Vk Sk Sk1 # Print approximate document-term matrix # to see how it changed Ak = Uk %*% Sk %*% t(Vk) # Project query onto restricted LSI space qk = Sk1 %*% t(Uk) %*% q # Print it qk # Compute approximate similarity Simk = Vk %*% Sk^2 %*% qk # Print all relevant results A Ak Sim Simk q qk ############################################