For this example, I used your two lines of input data pasted three times in the data.txt
input test file:
Python:
import re
data = {}
regex = re.compile(r'CustomerId:(\d+).*?Size:(\d+)');
with open('data.txt') as fh:
for line in fh:
m = regex.search(line)
if (m.group(1) and m.group(2)):
cust = m.group(1)
size = m.group(2)
try:
data[cust] += int(size)
except KeyError:
data[cust] = int(size)
print(str(data))
Output:
{'1234': 16296, '1237': 16296}
Perl:
use warnings;
use strict;
use Data::Dumper;
open my $fh, '<', 'data.txt' or die $!;
my %data;
while (my $line = <$fh>){
if (my ($cust, $size) = $line =~ /CustomerId:(\d+).*?Size:(\d+)/){
$data{$cust} += $size;
}
}
print Dumper \%data;
Output:
$VAR1 = {
'1234' => 16296,
'1237' => 16296
};
solved regular expression to extract only Customerid and Data (bytes) and save in list?