Wednesday 10 May 2023

Read Excel file using DocumentFormat.OpenXml.Spreadsheet

Code Snippet:


using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using System;
using System.Data;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;

namespace ConsoleExcelReader
{
    interface IRajkrsExcelReader
    {
        DataTable GetDataTableFromExcel(string fullPath, string sheetName = null);

    }

    public class RajkrsExcelReader : IRajkrsExcelReader
    {
        private string _traceLocation { get; set; }
        private string _requestId { get; set; }
        public RajkrsExcelReader(string traceLocation)
        {
            this._traceLocation = traceLocation;
            this._requestId = Guid.NewGuid().ToString();
        }
        public DataTable GetDataTableFromExcel(string fullPath, string sheetName = null)
        {
            Log(string.Format("Request initiated for - {0}", fullPath));
            DataTable dt = null;

            if (!File.Exists(fullPath))
            {
                Log(string.Format("file not exists - {0}", fullPath));
            }
            else
            {
                Log(string.Format("file exists - {0}", fullPath));



                dt = new DataTable();

                try
                {
                    using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(fullPath, false))
                    {
                        WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;

                        Sheet sheet = workbookPart.Workbook.Descendants().Where(s => s.Name == sheetName).FirstOrDefault();


                        if (sheet == null)
                        {
                            Log(string.Format("Sheet {0} not exists", sheetName));
                        }
                        else
                        {
                            Log(string.Format("Sheet {0} exists", sheetName));

                            WorksheetPart worksheetPart = workbookPart.GetPartById(sheet.Id.Value) as WorksheetPart;
                            SharedStringTablePart sharedStringTablePart = workbookPart.SharedStringTablePart;


                            var rowIndex = 1;
                            var rows = worksheetPart.Worksheet.Descendants();
                            Log(string.Format("Row count: {0}", rows.Count()));

                            foreach (Row row in rows)
                            {
                                DataRow dataRow = dt.NewRow();

                                foreach (Cell cell in row.Descendants())
                                {

                                    var currentRowIndex = int.Parse(Regex.Match(cell.CellReference.InnerText, @"\d+").Value);
                                    var currentColumn = Regex.Match(cell.CellReference.InnerText, "[A-Za-z]+").Value;

                                    string cellValue = null;
                                    if (cell.CellValue != null)
                                    {
                                        cellValue = cell.CellValue.InnerText;
                                    }


                                    // If the cell has a data type, convert the value accordingly
                                    if (cell.DataType != null)
                                    {
                                        switch (cell.DataType.Value)
                                        {
                                            case CellValues.SharedString:
                                                cellValue = sharedStringTablePart.SharedStringTable.ElementAt(int.Parse(cellValue)).InnerText;
                                                break;
                                            case CellValues.Boolean:
                                                cellValue = cellValue == "1" ? "TRUE" : "FALSE";
                                                break;
                                            case CellValues.Date:
                                                cellValue = DateTime.FromOADate(double.Parse(cellValue)).ToString();
                                                break;
                                            default:
                                                break;
                                        }
                                    }

                                    if (rowIndex == 1)
                                    {
                                        rowIndex = currentRowIndex;
                                        dt.Columns.Add(new DataColumn(currentColumn, typeof(string)));
                                        dataRow[currentColumn] = cellValue;

                                    }
                                    else
                                    {
                                        dataRow[currentColumn] = cellValue;
                                    }

                                }

                                dt.Rows.Add(dataRow);


                                rowIndex++;
                            }

                            Log(string.Format("Total DUMP available in DT: {0}", dt.Rows.Count));

                            var headerRow = dt.Rows[0].ItemArray;

                            Log("Renaming headers..");
                            for (int i = 0; i < headerRow.Length; i++)
                            {
                                var newHeader = headerRow[i].ToString();
                                Log(string.Format("{0} - {1}", dt.Columns[i].ColumnName, newHeader));
                                dt.Columns[i].ColumnName = newHeader;
                                dt.AcceptChanges();

                            }
                            dt.Rows[0].Delete();
                            dt.AcceptChanges();

                            Log(string.Format("Changes accepted, total {0} rows available.", dt.Rows.Count));
                        }


                    }


                }
                catch (Exception ex)
                {
                    Log(string.Format("Error Message - {0}", ex.Message));
                    if (ex.InnerException != null)
                    {
                        Log(string.Format("Error InnerException - {0}", ex.InnerException.ToString()));
                    }

                }




            }
            return dt;
        }





        private void Log(string msg)
        {
            if (!string.IsNullOrEmpty(_traceLocation))
            {

                try
                {
                    var logFile = _traceLocation + @"Logs\DctExcelReader-trace-" + DateTime.Now.ToString("yyyyMMdd") + ".txt";
                    var dir = Path.GetDirectoryName(logFile);
                    if (!File.Exists(logFile))
                    {
                        if (!Directory.Exists(dir))
                            Directory.CreateDirectory(dir);
                        File.WriteAllText(logFile, "----DctExcelReader Logs----");
                    }
                    File.AppendAllText(logFile, string.Format("\n{0} {1} - {2}", _requestId, DateTime.Now, msg));

                }
                catch (Exception ex)
                {
                    string message = "ExcelReader:" + ex;
                    System.Diagnostics.EventLog.WriteEntry("Application", message, System.Diagnostics.EventLogEntryType.Error, 55555);
                }

            }
        }
    }

}