Flexible Data Warehouse ETL Code-Highlights

Table of Contents

Transforming SAP EKBE Table​

In the IRB model, we want to determine the invoice quantity and goods received quantity for each purchase order and the last GR posting for the invoice. SAP stores all of this information in table EKBE. However, the document for the goods receipt value is in a different line than the invoice receipt. They are linked together with a reference document. The logic below works as follows.

1. create two helper tables with indexes as the workload can be many million rows

2. find and sum up all invoice lines (Q) and good receipt lines (E)

3. join the newest good receipt for the follow on document of each invoice

4. delete duplicates based on PO for the join of IR/GR PO quantity

5. delete duplicates based on the invoice document to later join on IRB material document.


drop table if exists #ekbe_q
drop table if exists #ekbe_e
drop table if exists tp1_ekbe_po;
drop table if exists tp1_ekbe_ref;

create table #ekbe_q(
	[purchase_order] [nvarchar](max) null,
	[material_document] [nvarchar](10) null,
	[year_mat_doc] [decimal](4, 0) null,
	[reference_document] [nvarchar](10) null,
	[year_ref_doc] [decimal](4, 0) null,
	[posting_date] [date] null,
	[entry_date] [date] null,
	[created_by] [nvarchar](max) null,
	[po_history_category] [nvarchar](max) null,
	[plant] [nvarchar](max) null,
	[quantity] [decimal](13, 3) null,
	[file_path] [nvarchar](max) null,
	[aa_number] [decimal](2, 0) null,
	[movement_type] [nvarchar](max) null,
	[download_date] [datetime2](7) null
) on [primary] textimage_on [primary]   

create nonclustered index ekbe_q_ref
    on #ekbe_q (reference_document, year_ref_doc);   

create table #ekbe_e(
	[purchase_order] [nvarchar](max) null,
	[material_document] [nvarchar](10) null,
	[year_mat_doc] [decimal](4, 0) null,
	[reference_document] [nvarchar](10) null,
	[year_ref_doc] [decimal](4, 0) null,
	[posting_date] [date] null,
	[entry_date] [date] null,
	[created_by] [nvarchar](max) null,
	[po_history_category] [nvarchar](max) null,
	[plant] [nvarchar](max) null,
	[quantity] [decimal](13, 3) null,
	[file_path] [nvarchar](max) null,
	[aa_number] [decimal](2, 0) null,
	[movement_type] [nvarchar](max) null,
	[download_date] [datetime2](7) null
) on [primary] textimage_on [primary]
-- for reverse movement types reverse quantity

create nonclustered index ekbe_e_ref
    on #ekbe_e (reference_document, year_ref_doc,entry_date desc, material_document desc );   

update cln_ekbe set quantity = quantity * -1 where debit_credit = 'h' ;

with ekbe_sum_q as (
    select
        *,
		sum(quantity) over (
            partition by
                purchase_order
        ) quantity_sum,
        row_number() over (
            partition by
			purchase_order
            order by
                purchase_order
        ) row_num
     from
        cln_ekbe
	where (po_history_category = 'q')
	and reference_document <> ''
)

insert into #ekbe_q (
	 [purchase_order]
	,[material_document]
	,[year_mat_doc]
	,[reference_document]
	,[year_ref_doc]
	,[posting_date]  
	,[entry_date]
	,[created_by]
	,[po_history_category]
	,[plant]
	,[quantity]
	,[file_path]
	,[aa_number]
	,[movement_type]
	,[download_date]
)
select
	[purchase_order]
	,[material_document]
	,[year_mat_doc]
	,[reference_document]
	,[year_ref_doc]
	,[posting_date]  
	,[entry_date]
	,[created_by]
	,[po_history_category]
	,[plant]
	,[quantity_sum]
	,[file_path]
	,[aa_number]
	,[movement_type]
	,[download_date]
	from ekbe_sum_q;

with ekbe_sum_e as (
    select
        *,
		sum(quantity) over (
            partition by
                purchase_order
        ) quantity_sum,
        row_number() over (
            partition by
			purchase_order
            order by
                purchase_order
        ) row_num
     from
        cln_ekbe
	where po_history_category = 'e'
		and reference_document <> ''
)

insert into #ekbe_e (
	 [purchase_order]
	,[material_document]
	,[year_mat_doc]
	,[reference_document]
	,[year_ref_doc]
	,[posting_date]  
	,[entry_date]
	,[created_by]
	,[po_history_category]
	,[plant]
	,[quantity]
	,[file_path]
	,[aa_number]
	,[movement_type]
	,[download_date]
)
select
	[purchase_order]
	,[material_document]
	,[year_mat_doc]
	,[reference_document]
	,[year_ref_doc]
	,[posting_date]  
	,[entry_date]
	,[created_by]
	,[po_history_category]
	,[plant]
	,[quantity_sum]
	,[file_path]
	,[aa_number]
	,[movement_type]
	,[download_date]
	from ekbe_sum_e;

with ekbe_delete_suplicates as (
    select
        *,
        row_number() over (
            partition by
			reference_document,
			year_ref_doc
            order by
            reference_document,
			year_ref_doc,
			entry_date desc,
			material_document desc
        ) row_num
     from
        #ekbe_e
	where po_history_category = 'e'
)

delete from ekbe_delete_suplicates
where row_num > 1

select q.material_document,
	   q.year_mat_doc,
	   q.purchase_order,
	   e.material_document as reference_document,
	   e.year_ref_doc as year_ref_doc,
	   e.entry_date as ref_doc_entry_date,
	   e.posting_date as ref_doc_posting_date,
	   e.created_by as ref_doc_created_by,
	   e.quantity as gr_quantity,
	   q.quantity as ir_quantity
into dbo.tp1_ekbe_ref from #ekbe_q as q
left outer join #ekbe_e as e on
q.reference_document = e.reference_document and
q.year_ref_doc = e.year_ref_doc;

select * into dbo.tp1_ekbe_po from tp1_ekbe_ref;

with ekbe_ref_del_duplicates as (
    select
        *,
        row_number() over (
            partition by
                year_mat_doc,
                material_document
            order by
                year_mat_doc,
				material_document,
				ref_doc_entry_date desc
        ) row_num
     from
     tp1_ekbe_ref
)

delete from ekbe_ref_del_duplicates
where row_num > 1;

with ekbe_po_del_duplicates as (
    select
        *,
        row_number() over (
            partition by
                purchase_order
            order by
                purchase_order
        ) row_num
     from
     tp1_ekbe_po
)

delete from ekbe_po_del_duplicates
where row_num > 1

drop table if exists #ekbe_q
drop table if exists #ekbe_e

ETL Function to Dynamically Remove special Characters

declare @table table
(
tablename varchar(50),
columnname varchar(50),
id int identity(1,1)
)

insert into @taable(tablename,columnname)
select table_name, column_name from config
where function_name = 'CLEAN_COLUMNS'
and db_schema = @schema

declare @max int
declare @sql varchar(max)
declare @tablename varchar(50)
declare @columnname varchar(50)
declare @id int = 1

select @max = max(id) from @table

while (@id <= @max)
begin

select @tablename = tablename, @columnname =columnname from @table where id = @id

set @sql =  'update '+@schema+'.'+@tablename+' set '+@columnname+' = replace('+@columnname+','','','''');
			 update '+@schema+'.'+@tablename+' set '+@columnname+' = replace('+@columnname+',''"'','''');	'

exec(@sql)

set @id = @id +1
end

Recalculating Due Date based on SAP Logic

The due date and Arrears after net must also be recalculated. Additionally, there is the requirement to calculate the due date based on the VAT due date (Reference_key1 field). This way, we can compare the Due date based on the Baseline date and Real VAT date in a report. To enable this comparison, we need also to recalculate Days1 and Days2 based on the reference_key_1 and payment term table t052:

alter function [o2c].[fc_calculate_days1]
(@payment_terms nvarchar(max), @baseline_date nvarchar(max))
returns int
as
begin

--declare @payment_terms as nvarchar(max)
--declare @baseline_date as nvarchar(max)
declare @days1 as int
declare @days1_fixed as int
declare @due_date_special1 as int
declare @month_special1 as int
declare @date as date
declare @day_limit as int

--set @payment_terms = 'z304'
--set @baseline_date = '2021-01-09 00:00:00.0000000'

set @day_limit = (select min(day_limit)
					from o2c.cln_t052
					where payment_term = @payment_terms)

set @baseline_date = try_convert(date,@baseline_date)
set @days1 = 0

if @day_limit = 0
begin
	set @days1_fixed = (select max(days1_fixed)
						from o2c.cln_t052
						where payment_term = @payment_terms)

	set @due_date_special1 = (
						select max(due_date_special1)
						from o2c.cln_t052
						where payment_term = @payment_terms)

	set @month_special1 = (
						select max(month_special1)
						from o2c.cln_t052
						where payment_term = @payment_terms)
end
else
begin
	set @days1_fixed = (select top 1 days1_fixed
						from o2c.cln_t052
						where payment_term = @payment_terms and
						day_limit >= right(@baseline_date,2)
						order by day_limit)

	set @due_date_special1 = (
						select top 1 due_date_special1
						from o2c.cln_t052
						where payment_term = @payment_terms and
						day_limit >= right(@baseline_date,2)
						order by day_limit)

	set @month_special1 = (
						select top 1 month_special1
						from o2c.cln_t052
						where payment_term = @payment_terms and
						day_limit >= right(@baseline_date,2)
						order by day_limit)
end

set @days1 = @days1_fixed

if @due_date_special1 > 0 or @month_special1 > 0
begin
	set @date = @baseline_date

	if @days1_fixed > 0
	begin
	    set @date =  dateadd(day,@days1_fixed,convert(date,@baseline_date))
	end
--		date calculation
	set @date = dateadd(month,@month_special1,@date)

	if @due_date_special1 = 31
	begin
		set @due_date_special1 = right(eomonth(@date),2)
	end

	set @date = datefromparts(left(@date,4),right(left(@date,7),2),@due_date_special1)

	if  @date < @baseline_date
	begin
		set	@baseline_date = @date
	end

	set @days1 = datediff(day,@baseline_date,@date)

end
return @days1
end

Based on these newly calculated Days1 and Days2, the due date and arrears are calculated.

alter function [dbo].[fc_calculate_due_date_ap]
(@start date, @days1 int, @days2 int, @debit_credit varchar(max), @follow_on_doc varchar(max))
returns date
as
begin

declare @duedate date

if @debit_credit = 's' and @follow_on_doc = ''
	begin
	set @duedate = @start
	end
else
	begin
	if @days2 = 0
		begin
		set @duedate = dateadd(day,@days1,@start)
		end
	else
		begin
		set @duedate = dateadd(day, @days2, @start)
		end
end

return @duedate

end

Reassigning Open Items based on Key Date

The Open items model contains all open items at the end of each month for the last year. e.g., all open items on 31.03, 30.04, 31.05, etc.

By calculating back in time on the cleared items column in the TP3_ALL_ITEMS table, we can identify which records were open at the key date. If an item is currently open or has been cleared after a given key date and was posted before that keydate, it was open at that time and therefore, will be printed into the output model.

In this model open items can occur multiple times as an item can be open in Jan and Feb and March etc. Therefore the primary key is the transaction_key and the key_date column.

declare @keydate_tp3 date
declare @month int
set @month = 0

set @keydate_tp3 = (select max(key_date) from dbo.tp3_all_items)

drop table if exists sta_open_items_monthly

select
client, 
company_code,
	document_number,
	line_item,
	vendor_number,
	document_type,
	special_gl_indicator,
	payment_block,
	payment_terms,
	scb_indicator,
	gl_account,
	clearing_document,
	currency,
	posting_date,
	clearing_date,
	amount_local,
	amount_document,
	reference,
	item_text,
	[year],
	days1,
	days2,
	baseline_date,
	file_path,
	debit_credit,
	download_date,
	clearing_document_year,
	src_download_date,
	vendor_name,
	accounting_clerk_number,
	accounting_clerk_name,
	accounting_clerk_user,
	reconciliation_account,
	vendor_name_chinese,
	vendor_country,
	trading_partner,
	company_name,
	city,
	company_name_short,
	run_id,
	run_date,
	document_posted_by,
	reason,
	reason_details,
	china_public_holiday,
	domestic_3rd_payment,
	oversea_3rd_payment,
	oversea_ic_payment,
	key_date,
	due_date,
	arrears_after_net,
	transaction_key,
	wht,
	duplicate
into dbo.sta_open_items_monthly
from dbo.tp3_all_items
where posting_date <= @keydate_tp3 and
	( clearing_date is null or clearing_date > @keydate_tp3 );


while (@month > -11)
begin

set @keydate_tp3 = eomonth(dateadd(month,-1,@keydate_tp3))

insert into dbo.sta_open_items_monthly (
client,
--... exluded columns from above to make the code more easily readable
select
client,
--... exluded columns from above to make the code more easily readable
from dbo.tp3_all_items
where posting_date <= @keydate_tp3 and
	( clearing_date is null or clearing_date > @keydate_tp3 );

update dbo.sta_open_items_monthly set key_date = @keydate_tp3
where key_date is null

set @month = @month -1
end

update dbo.sta_open_items_monthly set arrears_after_net = dbo.fc_calculate_arrears(due_date,key_date)

exec o2c.p_execute_etl_function @imp_function = 'remove_zero', @imp_tablename = 'sta_open_items_monthly', @schema = 'dbo'

Rate Customer Payment Behavior

This unique routine takes data from both the STA_OPEN_CUST_ITEMS and STA_ALL_CUST_ITEMS model to create a model that can compare overdues and total sales for customers. To do this, the single open line items are clustered in overdue 30 / 60 / 90+ days and aggregated for each customer. The Dashboard solution contains a scoring model that then clusters the customers in good and bad payers.

alter procedure [o2c].[p_sta_payment_behavior] as
begin

drop table if exists o2c.sta_payment_behavior
drop table if exists #temp_od
drop table if exists #temp_sum
drop table if exists #temp_cust;

with sum_od as (
    select distinct credit_account,
		   key_date,
		   overdue_rank_vat,
		   sum(convert(float,overdue_value) ) over (
            partition by
				convert(varchar(10),credit_account),
				key_date,
				convert(varchar(10),overdue_rank_vat)
            order by
                convert(varchar(10),credit_account),
				key_date ,
				convert(varchar(10),overdue_rank)  
			) overdue_value_by_ca
     from
        o2c.sta_open_cust_items
	 where credit_account is not null
	 and relevant_for_payment_behavior = 'x')


select * into #temp_od from (
  select credit_account, key_date, overdue_rank_vat,overdue_value_by_ca
  from sum_od
) t
pivot (
  sum(overdue_value_by_ca)
  for overdue_rank_vat in (
   [1-30], [90+], [31-90], [not_due]
  )
)as p;


with sum_sales as (
    select distinct credit_account,
		   sum(convert(float,amount_local) ) over (
            partition by
				convert(varchar(10),credit_account)
            order by
                convert(varchar(10),credit_account)
			) sales_by_ca
     from
        o2c.sta_all_cust_items
	 where (document_type = 'dg' or
		   document_type = 'dr') and
		   posting_date between eomonth(dateadd(month,-12,key_date)) and key_date)

select * into #temp_sum
from sum_sales

select * into #temp_cust
from o2c.tp1_customer
where customer_number = credit_account;

with del_cust_duplicates as (
    select
        *,
        row_number() over (
            partition by
				credit_account
            order by
				credit_account,
				company_code
        ) row_num
     from
        #temp_cust
)

delete from del_cust_duplicates
where row_num > 1

select od.credit_account,
	   od.key_date,
	   od.[1-30],
	   od.[31-90],
	   od.[90+],
	   od.[not_due],
	   su.sales_by_ca,
	   cust.credit_limit,
	   cust.credit_reporting_group,
	   cust.customer_country,
	   cust.customer_name_chinese,
	   cust.last_internal_review,
	   cust.reconciliation_account,
	   cust.risk_category,
	   cust.trading_partner,
	   cust.credit_block
into o2c.sta_payment_behavior
from #temp_od as od
	left join
	#temp_sum as su on
	od.credit_account = su.credit_account
	left join
	#temp_cust as cust on
	right('0000000000' + convert(varchar(10),od.credit_account), 10) = cust.credit_account

drop table if exists #temp_od
drop table if exists #temp_sum
drop table if exists #temp_cust

end

Let's get in Touch

... How can I help you?