using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
class Program
{
static void Main(string[] args)
{
string sampleHtml = @"
Insis Revenue
Revenue Details (SSCGPAS001)
";
List data = TableDataExtractor.ExtractSpecificTableData(sampleHtml);
foreach (var row in data)
{
Console.WriteLine(row.ToString());
}
}
}
public class TableDataExtractor
{
// A class to hold the extracted data for a single row
public class RowData
{
public string Item { get; set; }
public string Code { get; set; }
public string Description { get; set; }
public string Period { get; set; }
public string UnitPrice { get; set; }
public string Qty { get; set; }
public string Amount { get; set; }
public string BillingType { get; set; }
public string Currency { get; set; }
public override string ToString()
{
return $"Item: {Item}, Description: {Description}, Unit Price: {UnitPrice}, Qty: {Qty}, Amount: {Amount}";
}
}
public static List ExtractSpecificTableData(string htmlContent)
{
var extractedData = new List();
// Regex to find table rows (
) within a specific table structure (adjust if needed, e.g., using a table ID)
// The pattern uses capturing groups for each
content
// It assumes
tags might have extra spaces or attributes, but the content inside is the target.
// It specifically looks for rows with exactly 5 data cells.
string rowPattern = @"
]*?\bid=[^>]*?>(.*?)
"; //@"
]*>(.*?)
";
string cellPattern = @"
]*>(.*?)
";
// Find all rows in the HTML
MatchCollection rows = Regex.Matches(htmlContent, rowPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
foreach (Match rowMatch in rows)
{
string rowHtml = rowMatch.Groups[1].Value;
// Find all cells in the current row
MatchCollection cells = Regex.Matches(rowHtml, cellPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Check if the row has exactly 5 columns of data (Item, Description, Unit Price, Qty, Amount)
if (cells.Count >= 9)
{
// Extract the inner text and clean up whitespace
string item = cells[0].Groups[1].Value.Replace(" ", " ").Trim();
string code = cells[1].Groups[1].Value.Replace(" ", " ").Trim();
string description = cells[2].Groups[1].Value.Replace(" ", " ").Trim();
string period = cells[3].Groups[1].Value.Replace(" ", " ").Trim();
string unitPrice = cells[4].Groups[1].Value.Replace(" ", " ").Trim();
string qty = cells[5].Groups[1].Value.Replace(" ", " ").Trim();
string amount = cells[6].Groups[1].Value.Replace(" ", " ").Trim();
string billingType = cells[7].Groups[1].Value.Replace(" ", " ").Trim();
string currency = cells[8].Groups[1].Value.Replace(" ", " ").Trim();
// Add to the list
extractedData.Add(new RowData
{
Item = item,
Code = code,
Description = description,
Period = period,
UnitPrice = unitPrice,
Qty = qty,
Amount = amount,
BillingType = billingType,
Currency = currency
});
}
}
return extractedData;
}
}