Code: Select all
import pandas as pd # Importing the pandas library, which is useful for data manipulation and analysis
import os # Importing the os library, which provides functions to interact with the operating system
# Defining the path to the directory containing the Excel files
path = r"C:\\Users\\Ram Workstation\\Downloads\\sample_excel_files\\"
# Creating a list of all files in the specified directory that have the '.xlsx' extension (Excel files)
files = [f for f in os.listdir(path) if f.endswith('.xlsx')]
# Creating an empty list to store DataFrames
dataframes = []
# Looping through each file in the list of Excel files
for file in files:
# Loading the Excel file
excel_file = pd.ExcelFile(os.path.join(path, file))
# Looping through each sheet in the Excel file
for sheet_name in excel_file.sheet_names:
# Reading the current sheet into a DataFrame
df = pd.read_excel(os.path.join(path, file), sheet_name=sheet_name)
# Displaying information about the DataFrame (like number of rows and columns, data types, etc.)
df.info()
# Adding the DataFrame to the list of DataFrames
dataframes.append(df)
# Displaying information about the DataFrames list (incorrect usage, will cause an error)
# dataframes.info() # This line should be removed or corrected, as lists do not have an 'info()' method
# Concatenating all DataFrames in the list into a single DataFrame, ignoring the original row indexes
merged_df = pd.concat(dataframes, ignore_index=True)
# Writing the concatenated DataFrame to a new Excel file named 'merged_data.xlsx' in the same directory
merged_df.to_excel(os.path.join(path, 'merged_data.xlsx'), index=False)
# Printing a message to indicate that the process is complete
print("Done with comments")