Help on Code logic to remove duplicate mails from webapp mail box

eman banerjee emanbanerjee at gmail.com
Wed Oct 30 03:34:14 EDT 2019


On Wednesday, 30 October 2019 12:40:06 UTC+5:30, eman banerjee  wrote:
> Hi
> 
> I am working on a project where we make connections to webapp mail and extract subject, sender,body etc from mails and save it in dataframe and insert it n SQL DB.
> 
> My next challenge is to remove any duplicate mails from mailbox.
> Could you kindly help me.
> It can be a new mail which is entering the mailbox is first checked , if its a duplicate email,it will not be inserted to mail box
> 
> 
> Thanks





code is below




class EmailAnalysis:

    '''Method help to communicate with outlook''' 
    
    def emailExchangeCredentials(self):
        try:
            cipher = Fernet(b'***************')
            account_details = b'*****************'
            user_details = b'****************'
            sec_details = b'*****************'
            account_details_decrypt = cipher.decrypt(account_details).decode('utf-8')
            user_details_decrypt = cipher.decrypt(user_details).decode('utf-8')
            sec_details_decrypt = cipher.decrypt(sec_details).decode('utf-8')
            credentials = Credentials(user_details_decrypt, sec_details_decrypt)
            account = Account(account_details_decrypt, credentials=credentials, autodiscover=True)
            return account
        except:
            account = 'Failure'
            return account
        
    
    
    
    def createDBConn(self):
        cipher = Fernet(b'*********************')
        ip = b'*******************'
        
    
        port = ****
        service_name = b'*************'
        attuid = b'**********************'
        password = b'********************'
        ip_decrypt = cipher.decrypt(ip).decode('utf-8')
        service_name_decrypt = cipher.decrypt(service_name).decode('utf-8')
        attuid_decrypt = cipher.decrypt(attuid).decode('utf-8')
        password_decrypt = cipher.decrypt(password).decode('utf-8')
        dsn_tns = ora.makedsn(ip_decrypt, port,service_name_decrypt)
        conn = ora.connect(attuid_decrypt,password_decrypt,dsn_tns)
        return conn
        
    def extractEmail(self, account, Email_Data, conn):
        
        self.Email_Data = Email_Data
        SUBJECT = []
        SENDER = []
        JOB_NAME = []
        EMAIL_DATE = []
        EMAIL_BODY = []
        REMEDIATION_ID = []
        PRIORITY = []
        i = 0
        
        to_folder = account.inbox / 'Read-Mail'
        cursor = conn.cursor()
        #print("Type.......................:",type(account.inbox.all()))
        for item in account.inbox.all().order_by('-datetime_received')[:20]:
            if len(item.sender.email_address.split('*****.com'))>1:
                if len(item.subject.split('GREEN'))<2:
                    
                    
                    SUBJECT.append(item.subject)
                    
                    
                    
                    SENDER.append(item.sender.email_address) 

                    find_Job = item.sender.email_address.split('@')
                    job_name = find_Job[0]
                    JOB_NAME.append(str(job_name))
                   
                   
                    #print("Date time received: ",item.datetime_received.date())
                    EMAIL_DATE.append(item.datetime_received.date())
                    date = item.datetime_received.date()
                    date_ = date.strftime('%Y-%m-%d')
                    print("Date:",date_)
                  
                    
                    
                    soup = BeautifulSoup(item.body)
                    email_body_unfor = soup.get_text()
                    #print("Emain Body Ori:- ",email_body_unfor)
                    email_body_unfor_arr = email_body_unfor.split('-->')
                    if len(email_body_unfor_arr) > 1:                        
                        email_body_unfor_foot = email_body_unfor_arr[1]
                        #print("Emain Body Ori2:- ",email_body_unfor_foot)
                        email_body_unfor_sp_arr = email_body_unfor_foot.split("This message is for the designated recipient only")
                        if len(email_body_unfor_sp_arr) > 1:   
                            email_body_unfor_sp = email_body_unfor_sp_arr[0]
                            #print("Emain Body Ori3:- ",email_body_unfor_sp)
                            email_body_1 = email_body_unfor_sp.strip()
                            email_body_2 = email_body_1.rstrip()
                            email_body = email_body_2.lstrip()
                            #print("Email Body: ",email_body)
                            EMAIL_BODY.append(email_body)
                    else:
                        email_body_1 = email_body_unfor.strip()
                        email_body_2 = email_body_1.rstrip()
                        email_body = email_body_2.lstrip()
                        EMAIL_BODY.append(email_body)
                        
                      
                      
                      
                    i = i+1
                    i = 1000+i 
                    Issue_ID = i 
                    PROCESS_NAME = str(find_Job[0])
                    MESSAGE_1 = ''
                    MESSAGE_2 = ''
                    MESSAGE = email_body
                    
                   
                    
                    sql = "INSERT INTO ***DBname** (SENDER,SUBJECT,EMAIL_BODY,EMAIL_DATE) VALUES ("+"'"+str(item.sender.email_address)+"',"+"'"+str(item.subject)+"',"+"'"+str(email_body)+"',"+"'"+str(date_)+"')"
                    print('SQL :- ',sql)
                    result = cursor.execute(sql.encode('utf-8'))
                    
                    conn.commit()
                    
                    #print('to folder:-',to_folder)
                    item.move(to_folder)
                else:
                    item.move(to_folder)
                    
            else:
                item.move(to_folder)
                
                
                     
        cursor.close()
        conn.close()
        Email_Data = pd.DataFrame(list(zip(SUBJECT, SENDER,EMAIL_DATE, EMAIL_BODY)),columns =['SUBJECT', 'SENDER',  'EMAIL_DATE', 'EMAIL_BODY'])
        
        return Email_Data
        

    def emailAnalysisController(self,Task_Details):
        try:
            #print('inside controller')
            self.Task_Details = Task_Details
            acc = self.emailExchangeCredentials(self)
            if acc == 'Failure':
                acc = self.emailExchangeCredentials(self)
                
            Email_Data = pd.DataFrame(columns=['EMAIL_DATE','SENDER','EMAIL_BODY','SUBJECT'])
            Email_Data['EMAIL_DATE'] = pd.to_datetime(Email_Data['EMAIL_DATE'])
            conn = self.createDBConn(self)
            Email_Data = self.extractEmail(self, acc,Email_Data, conn)
           
            exection_status = 'Success'
            return exection_status
        except:
            exection_status = 'Failure'
            return exection_status    



More information about the Python-list mailing list