Contour CodeI am using a Code Repository to perform a Python transform. This involves selecting columns from three separate datasets (caufv_df, afpo_df, prps_df) and then inner joining them into one. The first inner join joins on the column "OrderNumber". When I run this join it returns zero rows. However, if I open Contour and do an inner join it returns 7 million Order Numbers that match between the two tables.
I have double-checked the schema to make sure data types match, I've tried different syntaxes for the join, and removing the substring and cast functions. None of these have helped. The other commented out inner join works so I know it isn't my environment or a syntax mistake. Can anyone help?
from pyspark.sql import functions as F
from transforms.api import transform_df, Input, Output
@transform_df(
Output("redacted file path"),
caufv_df=Input("redacted file path"),
afpo_df=Input("redacted file path"),
prps_df=Input("redacted file path"),
)
def compute(caufv_df, afpo_df, prps_df):
# prep caufv_df for joining
caufv_df = caufv_df.select(
F.col('opertn_task_list_no_|_aufpl').alias('aufpl'),
F.col('object_number_|_objnr').alias('objnr'),
F.substring(F.col('order_|_aufnr'), 4, 9).cast('integer').alias('OrderNumber'),
# F.col('order_|_aufnr').cast('integer').alias('order'),
F.col('technical_completion_|_idat2').alias('completed_date'),
F.col('order_type_|_auart').alias('Order_Type')
)
# prep afpo_df for joining
afpo_df = afpo_df.select(
F.col('order_item_quantity_|_psmng').alias('Operation_Qty_AFPO'),
F.substring(F.col('order_|_aufnr'), 4, 9).cast('integer').alias('OrderNumber'),
#F.col('order_|_aufnr').cast('integer').alias('order'),
F.col('wbs_element_|_projn').alias('WBS_Element')
)
# prep prps_df for joining
prps_df = prps_df.select(
F.col('wbs_element_|_posid').alias('WBS'),
F.col('applicant_no_|_astnr').alias('Applicant_Description'),
F.col('applicant_|_astna').alias('ApplicantName'),
F.col('wpc_|_zzwpc').alias('WPC'),
F.col('material_|_zzmatnr').alias('material_zzmatnr'),
F.col('description_|_post1').alias('Descriptionprps'),
F.col('customer_code_|_zzccd').alias('CustCode'),
F.col('wbs_element_|_pspnr').alias('WBS_Element')
)
# join caufv_df with afpo_df and prps_df to create the final dataframe
joined_df = caufv_df.join(afpo_df, 'OrderNumber', 'inner')
#joined_df = afpo_df.join(prps_df, 'WBS_Element', 'inner')
return joined_df