194 lines
7.3 KiB
C#
Executable File
194 lines
7.3 KiB
C#
Executable File
using System;
|
|
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
|
|
class Program
|
|
{
|
|
static void Main(string[] args)
|
|
{
|
|
string sampleHtml = @"
|
|
|
|
|
|
<html>
|
|
<head>
|
|
<title>Insis Revenue</title>
|
|
<link rel=""stylesheet"" type=""text/css"" href=""../Images/OutputStyle.css"" />
|
|
<link rel=""stylesheet"" type=""text/css"" href=""../Images/TopHeadingStyle.css"" />
|
|
<script type=""text/javascript"" language=""javascript"" src=""../Includes/HighLightRow.js""></script>
|
|
</head>
|
|
<body onload = ""init()"";>
|
|
|
|
<div id=""container"">
|
|
<div id=""rogersheader""><center><font class=""header"">Revenue Details (<font class='name'>SSCGPAS001</font>)</font></center></div>
|
|
</div>
|
|
|
|
<center>
|
|
<form name=""myForm"" method =""post"" action="""">
|
|
<div align=""right"">
|
|
<font color=""#B22222""><b>Revision:</b> </font>
|
|
<select name=""revision"" class =""entry"" onchange =""revisionOnChange()"";>
|
|
|
|
<option value=""03"">03 NS 7/31/2025 APPR</option>
|
|
|
|
<option value=""02"">02 NS 7/31/2025 ARCH</option>
|
|
|
|
<option value=""01"">01 NS 7/31/2024 ARCH</option>
|
|
|
|
</select>
|
|
</div>
|
|
<br />
|
|
<table class=""normal"" border=""1"" cellspacing=""0"" cellpadding=""0"">
|
|
<tr>
|
|
<td>
|
|
<table class=""line"" cellspacing=""0"" cellpadding=""6"">
|
|
<tr class=""heading2"">
|
|
<td class=""heading2"">Item</td>
|
|
<td class=""heading2"">Code</td>
|
|
<td class=""heading2"">Description</td>
|
|
<td class=""heading2"">Period</td>
|
|
<td class=""heading2"" align=""right"">Unit Price</td>
|
|
<td class=""heading2"" align=""center"">Qty</td>
|
|
<td class=""heading2"" align=""right"">Amount</td>
|
|
<td class=""heading2"" align=""center"">Billing Type</td>
|
|
<td class=""heading2"" align=""center"">Currency</td>
|
|
</tr>
|
|
|
|
<tr id=""R0"" onclick=""Highlight_Row(R0)"">
|
|
<td nowrap=""nowrap"">01 </td>
|
|
<td nowrap=""nowrap"">ACH </td>
|
|
<td nowrap=""nowrap"">100M Ethernet Fiber Access-BRE </td>
|
|
<td nowrap=""nowrap"">Monthly </td>
|
|
<td nowrap=""nowrap"" align=""right"">$200.00 </td>
|
|
<td nowrap=""nowrap"" align=""center"">1 </td>
|
|
<td nowrap=""nowrap"" align=""right"">$200.00 </td>
|
|
<td nowrap=""nowrap"" align=""center""> </td>
|
|
<td nowrap=""nowrap"" align=""center"">CD </td>
|
|
</tr>
|
|
|
|
<tr id=""R1"" onclick=""Highlight_Row(R1)"">
|
|
<td nowrap=""nowrap"">04 </td>
|
|
<td nowrap=""nowrap"">VSS </td>
|
|
<td nowrap=""nowrap"">Voice Session Service-BRE </td>
|
|
<td nowrap=""nowrap"">Monthly </td>
|
|
<td nowrap=""nowrap"" align=""right"">$9.80 </td>
|
|
<td nowrap=""nowrap"" align=""center"">14 </td>
|
|
<td nowrap=""nowrap"" align=""right"">$137.20 </td>
|
|
<td nowrap=""nowrap"" align=""center""> </td>
|
|
<td nowrap=""nowrap"" align=""center"">CD </td>
|
|
</tr>
|
|
|
|
</table>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
</form>
|
|
</center>
|
|
|
|
<script type=""text/javascript"" language =""javascript"">
|
|
|
|
function init() {
|
|
myForm.revision.value = ""03""
|
|
}
|
|
|
|
function revisionOnChange(){
|
|
window.location.href=""revenue.asp?serv_id=SSCGPAS001&revision="" + myForm.revision.value
|
|
}
|
|
</script>
|
|
|
|
|
|
<p></p>
|
|
<center>
|
|
|
|
<input class=""button2"" type=""button"" onclick=""top.close()"" value=""Close"" title=""Close"" />
|
|
|
|
</center>
|
|
|
|
</body>
|
|
</html>";
|
|
|
|
List<TableDataExtractor.RowData> data = TableDataExtractor.ExtractSpecificTableData(sampleHtml);
|
|
|
|
foreach (var row in data)
|
|
{
|
|
Console.WriteLine(row.ToString());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
public class TableDataExtractor
|
|
{
|
|
// A class to hold the extracted data for a single row
|
|
public class RowData
|
|
{
|
|
public string Item { get; set; }
|
|
public string Code { get; set; }
|
|
public string Description { get; set; }
|
|
public string Period { get; set; }
|
|
public string UnitPrice { get; set; }
|
|
public string Qty { get; set; }
|
|
public string Amount { get; set; }
|
|
public string BillingType { get; set; }
|
|
public string Currency { get; set; }
|
|
|
|
public override string ToString()
|
|
{
|
|
return $"Item: {Item}, Description: {Description}, Unit Price: {UnitPrice}, Qty: {Qty}, Amount: {Amount}";
|
|
}
|
|
}
|
|
|
|
public static List<RowData> ExtractSpecificTableData(string htmlContent)
|
|
{
|
|
var extractedData = new List<RowData>();
|
|
|
|
// Regex to find table rows (<tr>) within a specific table structure (adjust if needed, e.g., using a table ID)
|
|
// The pattern uses capturing groups for each <td> content
|
|
// It assumes <td> tags might have extra spaces or attributes, but the content inside is the target.
|
|
// It specifically looks for rows with exactly 5 data cells.
|
|
string rowPattern = @"<tr\s*[^>]*?\bid=[^>]*?>(.*?)</tr\s*>"; //@"<tr\s*[^>]*>(.*?)</tr\s*>";
|
|
string cellPattern = @"<td\s*[^>]*>(.*?)</td>";
|
|
|
|
// Find all rows in the HTML
|
|
MatchCollection rows = Regex.Matches(htmlContent, rowPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
|
|
|
|
foreach (Match rowMatch in rows)
|
|
{
|
|
string rowHtml = rowMatch.Groups[1].Value;
|
|
// Find all cells in the current row
|
|
MatchCollection cells = Regex.Matches(rowHtml, cellPattern, RegexOptions.Singleline | RegexOptions.IgnoreCase);
|
|
|
|
// Check if the row has exactly 5 columns of data (Item, Description, Unit Price, Qty, Amount)
|
|
if (cells.Count >= 9)
|
|
{
|
|
// Extract the inner text and clean up whitespace
|
|
string item = cells[0].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string code = cells[1].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string description = cells[2].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string period = cells[3].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string unitPrice = cells[4].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string qty = cells[5].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string amount = cells[6].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string billingType = cells[7].Groups[1].Value.Replace(" ", " ").Trim();
|
|
string currency = cells[8].Groups[1].Value.Replace(" ", " ").Trim();
|
|
|
|
// Add to the list
|
|
extractedData.Add(new RowData
|
|
{
|
|
Item = item,
|
|
Code = code,
|
|
Description = description,
|
|
Period = period,
|
|
UnitPrice = unitPrice,
|
|
Qty = qty,
|
|
Amount = amount,
|
|
BillingType = billingType,
|
|
Currency = currency
|
|
});
|
|
}
|
|
}
|
|
|
|
return extractedData;
|
|
}
|
|
}
|