Winky's profile picture

Published by

published

Category: Web, HTML, Tech

I thought i would show some of my parser code

Some of these routines have been implemented, and some have not been, since i may not need them, since my improvements in the aiml files and aim interpreter have progressed, the emotion routine is based on a completed routine but not implemented completely yet, But here is some of my python parser code; 

 # part of response

    def identify_parts_of_speech(self, sentence):
        tokens = word_tokenize(sentence)
        tagged_tokens = pos_tag(tokens)

        nouns = [word for word, pos in tagged_tokens if pos.startswith('NN')]
        adverbs = [word for word, pos in tagged_tokens if pos.startswith('RB')]
        verbs = [word for word, pos in tagged_tokens if pos.startswith('VB')]
        names = [word for word, pos in tagged_tokens if pos.startswith('NNP')]
        adjectives = [word for word, pos in tagged_tokens if pos.startswith('JJ')]

        question_starters = ["who", "why", "when", "where", "how", "what"]
        question_starters = [starter.lower() for starter in question_starters]

        question_starters_found = [word for word in tagged_tokens if word.lower() in question_starters]

        random.shuffle(nouns)
        random.shuffle(adverbs)
        random.shuffle(verbs)
        random.shuffle(names)
        random.shuffle(adjectives)

        return nouns, adverbs, verbs, names, adjectives, question_starters_found
    
    #Part of response
    def identify_names(self, parsed_input):
        names = []
        for token in parsed_input:
            if re.match(r'^[A-Z][a-z]+$', token):
                names.append(token)
        return names
    
    #Partof response
    def normalize_input(self, input_text):
        # Tokenize the input text into words
        words = word_tokenize(input_text)

        # Lemmatize each word in the input text
        lemmatized_words = [self.lemmatizer.lemmatize(word) for word in words]

        # Join the lemmatized words back into a normalized sentence
        normalized_sentence = " ".join(lemmatized_words)

        return normalized_sentence
    
    #Part of response
    def identify_sentence_type(self, sentence):
        if sentence.endswith((".", "!", "?")):
            return "Ends with punctuation"
        elif sentence.endswith((";", ",")):
            return "Ends with separator"
        else:
            return "Normal sentence"


    def split_non_question_sentences(self, sentences):
        split_sentences = []

        for sentence in sentences:
            # Split based on sentence enders
            sentence_parts = re.split(r'(?<=[.!?])\s+', sentence)
    
            # Split each part based on sentence splitters
            for i in range(len(sentence_parts)):
                sentence_parts[i] = re.split(r'\b(but|however|in addition)\b', sentence_parts[i])

            # Flatten the nested list of sentence parts
            split_sentences.extend([part for parts in sentence_parts for part in parts])

        return split_sentences
    

    def parse_pronouns(user_input):
        pronouns = []
        words = user_input.split()
        
        for word in words:
            if word.lower() in ['i', 'me', 'my', 'mine']:
                pronouns.append('first person')
            elif word.lower() in ['you', 'your', 'yours']:
                pronouns.append('second person')
            elif word.lower() in ['he', 'him', 'his', 'she', 'her', 'hers', 'it', 'its']:
                pronouns.append('third person')
        
        return pronouns


    def search_close_matches(self, sentence, word):
        # Importing the necessary module
        from difflib import get_close_matches
        
        # Searching for close enough matches
        close_matches = get_close_matches(word, sentence.split())
        
        return close_matches

    def identify_yelling(self, sentence):
        if sentence.isupper():
            return "Yelling detected! Whoa there, calm down, pal!"
        else:
            words = sentence.split()
            for word in words:
                if word.isupper():
                    return "Pay attention to this word: {}".format(word)
        return "No yelling detected. Carry on!"

    def detect_sentiment_intent(self, user_input):
        sid = SentimentIntensityAnalyzer()
        sentiment_scores = sid.polarity_scores(user_input)
        
        if sentiment_scores['compound'] >= 0.05:
            return "Positive"
        elif sentiment_scores['compound'] <= -0.05:
            return "Negative"
        else:
            return "Neutral"

    def check_grammar(sentence):
        tokens = word_tokenize(sentence)
        reversed_tokens = tokens[::-1]
        reversed_sentence = ' '.join(reversed_tokens)
        return reversed_sentence

        
        # Adjust sentiment intent based on smiley recognition
        sentiment = "neutral"
        for match in matches:
            if match in smileys:
                if smileys[match] == "positive":
                    sentiment = "positive"
                elif smileys[match] == "negative":
                    sentiment = "negative"
                elif smileys[match] == "surprised":
                    sentiment = "surprised"
                elif smileys[match] == "confused":
                    sentiment = "confused"
        
        # Return the adjusted sentiment intent
        return sentiment

#remember this has to do with xml
    def get_property(self, key):
        if key in self.default_properties:
            return self.default_properties[key]
        else:
            return None


#rember the above code has to do with xml;

    def rephrase_sentences(self, text):
        # Tokenize the text into sentences
        sentences = nltk.sent_tokenize(text)
        
        # Iterate over each sentence
        for sentence in sentences:
            # Tokenize the sentence into words
            words = nltk.word_tokenize(sentence)
            
            # Iterate over each word
            for word in words:
                # Get the synonyms of the word
                synonyms = []
                for syn in wordnet.synsets(word):
                    for lemma in syn.lemmas():
                        synonyms.append(lemma.name())
                
                # Get the antonyms of the word
                antonyms = []
                for syn in wordnet.synsets(word):
                    for lemma in syn.lemmas():
                        if lemma.antonyms():
                            antonyms.append(lemma.antonyms()[0].name())
                
                # Get the homonyms of the word
                homonyms = []
                for syn in wordnet.synsets(word):
                    for lemma in syn.lemmas():
                        if lemma.name() != word:
                            homonyms.append(lemma.name())
                
                # Get the homographs of the word
                homographs = []
                for syn in wordnet.synsets(word):
                    for lemma in syn.lemmas():
                        if lemma.name() != word and lemma.name() not in homonyms:
                            homographs.append(lemma.name())
                
                # Rephrase the sentence using the linguistic elements
                rephrased_sentence = sentence.replace(word, f"({word})")
                rephrased_sentence += f"\nSynonyms of {word}: {', '.join(synonyms)}"
                rephrased_sentence += f"\nAntonyms of {word}: {', '.join(antonyms)}"
                rephrased_sentence += f"\nHomonyms of {word}: {', '.join(homonyms)}"
                rephrased_sentence += f"\nHomographs of {word}: {', '.join(homographs)}"

    def remember_variables(self):
        properties = get_properties()
        predicates = get_predicates()
        aiml_file = get_aiml_file()

        if properties is None:
            properties = {}

        if predicates is None:
            predicates = {}

        if aiml_file is None:
            aiml_file = {}

        return properties, predicates, aiml_file

    def check_context_and_limit_phrases(aiml_file):
        phrases = aiml_file.get_phrases()
        phrase_count = {}
        
        for phrase in phrases:
            if phrase in phrase_count:
                phrase_count[phrase] += 1
            else:
                phrase_count[phrase] = 1
        
        for phrase in phrases:
            if phrase_count[phrase] > 3:
                aiml_file.remove_phrase(phrase)
        
        aiml_file.save()

    def search_subtopics(aiml_files):
        subtopics = []
        
        for aiml_file in aiml_files:
            kernel = aiml.Kernel()
            kernel.learn(aiml_file)
            
            # Get the subtopics from the AIML file
            aiml_subtopics = kernel.get_subtopics()
            
            # Add the subtopics to the list
            subtopics.extend(aiml_subtopics)
    
        return subtopics



    # Function to parse large texts and generate AIML replies
    def parse_large_text(text):
        try:
            # Break down the text into paragraphs
            paragraphs = text.split("\n\n")
            
            # Initialize AIML reply
            aiml_reply = ""
            
            # Process each paragraph
            for paragraph in paragraphs:
                # Tokenize the paragraph into sentences
                sentences = nltk.sent_tokenize(paragraph)
                
                # Initialize paragraph context
                paragraph_context = ""
                
                # Process each sentence
                for sentence in sentences:
                    # Remove stopwords and lemmatize the words
                    words = nltk.word_tokenize(sentence)
                    words = [word for word in words if word.lower() not in stopwords.words("english")]
                    words = [WordNetLemmatizer().lemmatize(word) for word in words]
                    
                    # Generate AIML reply based on sentence context
                    aiml_reply += aiml.generate_reply(paragraph_context, sentence)
                    
                    # Update paragraph context
                    paragraph_context += " " + sentence
                    
            return aiml_reply
        
        except Exception as e:
            # Handle exceptions and report errors using 1920s slang
            error_message = "Oops! We've got ourselves a pickle. The bees knees went haywire. Please try again later, old sport."
            print(error_message)
            print("Error Details: ", str(e))    
    
    def detect_sarcasm(statement):
        # Tokenize the statement into words
        words = nltk.word_tokenize(statement)
        
        # Check for sarcastic patterns
        if 'not' in words and 'really' in words:
            return True
        else:
            return False
        
    # Function to determine the mood and tone of user input
    def determine_emotion(user_input):
        global emotion
        
        # Perform sentiment analysis using NLTK
        sentiment = nltk.sentiment.polarity_scores(user_input)
        
        # Extract the compound score
        compound_score = sentiment['compound']
        
        # Categorize the input based on the compound score
        if compound_score >= 0.5:
            emotion = 'excitement'
        elif compound_score >= 0.1:
            emotion = 'joy'
        elif compound_score >= -0.1:
            emotion = 'neutral'
        elif compound_score >= -0.5:
            emotion = 'sadness'
        elif compound_score >= -0.9:
            emotion = 'depression'
        else:
            emotion = 'anger'


    def calculate_popularity_score(user_chats):
        positive_count = 0
        neutral_count = 0
        
        sid = SentimentIntensityAnalyzer()
        
        for chat in user_chats:
            sentiment_scores = sid.polarity_scores(chat)
            compound_score = sentiment_scores['compound']
            
            if compound_score >= 0.05:
                positive_count += 1
            elif -0.05 < compound_score < 0.05:
                neutral_count += 1
        
        total_chats = len(user_chats)
        popularity_score = (positive_count + (0.5 * neutral_count)) / total_chats
        
        return popularity_score


    def parse_objects(sentence):
        objects = []
        verbs = []
        properties = []
        actions = []

        # Tokenize the sentence
        tokens = nltk.word_tokenize(sentence)

        # Part-of-speech tagging
        tagged_tokens = nltk.pos_tag(tokens)

        # Extract objects, verbs, properties, and actions
        for token, tag in tagged_tokens:
            if tag.startswith('NN'):
                objects.append(token)
            elif tag.startswith('VB'):
                verbs.append(token)
            elif tag.startswith('JJ'):
                properties.append(token)
            elif tag.startswith('RB'):
                actions.append(token)

        return objects, verbs, properties, actions

    def dependency_parsing(query):
        try:
            nlp = spacy.load("en_core_web_sm")
            doc = nlp(query)
            
            # Extracting object relationships
            relationships = []
            for token in doc:
                if token.dep_ in ["nsubj", "dobj"]:
                    relationships.append((token.text, token.head.text, token.dep_))
            
            return relationships
    
        except Exception as e:
            print("An error occurred during dependency parsing:", str(e))
            return None

    def parse_user_location(sentence):
        # Tokenize the sentence
        tokens = word_tokenize(sentence)
        
        # Lemmatize the tokens
        lemmatizer = WordNetLemmatizer()
        lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
        
        # Identify locations using WordNet
        locations = []
        for token in lemmatized_tokens:
            synsets = wordnet.synsets(token)
            for synset in synsets:
                if synset.pos() == 'n':  # Consider only nouns
                    locations.append(synset.lemmas()[0].name())
                    
    # Subroutine to identify and react to plurals
    def handle_plural(input_text):
        # Tokenize input text
        tokens = input_text.split()
        
        # Lemmatize each token
        lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
        
        # Check if any token is a plural
        plural_tokens = [token for token in lemmatized_tokens if token != lemmatizer.lemmatize(token, pos='n')]
        
        # React to plurals
        if plural_tokens:
            response = "It seems like you're talking about multiple things. Can you please provide more context?"
        else:
            response = "I'm sorry, I couldn't identify any plurals in your input."
        
        return response
        
        return locations


    def correct_verb_tenses(input_sentence):
        # Tokenize the input sentence
        tokens = word_tokenize(input_sentence)
        
        # Perform part-of-speech tagging
        tagged_tokens = pos_tag(tokens)
        
        # Identify verbs and their tenses
        verbs = [(token, tag) for token, tag in tagged_tokens if tag.startswith('VB')]
        
        # Correct verb tenses
        corrected_sentence = input_sentence
        irregular_verbs = {
            'be': {'VBD': 'was/were', 'VBG': 'being', 'VBN': 'been'},
            'have': {'VBD': 'had', 'VBG': 'having', 'VBN': 'had'},
            'do': {'VBD': 'did', 'VBG': 'doing', 'VBN': 'done'},
            'say': {'VBD': 'said', 'VBG': 'saying', 'VBN': 'said'},
            'go': {'VBD': 'went', 'VBG': 'going', 'VBN': 'gone'},
            'get': {'VBD': 'got', 'VBG': 'getting', 'VBN': 'got/gotten'},
            'make': {'VBD': 'made', 'VBG': 'making', 'VBN': 'made'},
            'know': {'VBD': 'knew', 'VBG': 'knowing', 'VBN': 'known'},
            'think': {'VBD': 'thought', 'VBG': 'thinking', 'VBN': 'thought'},
            'take': {'VBD': 'took', 'VBG': 'taking', 'VBN': 'taken'},
            'see': {'VBD': 'saw', 'VBG': 'seeing', 'VBN': 'seen'},
            'come': {'VBD': 'came', 'VBG': 'coming', 'VBN': 'come'},
            'want': {'VBD': 'wanted', 'VBG': 'wanting', 'VBN': 'wanted'},
            'use': {'VBD': 'used', 'VBG': 'using', 'VBN': 'used'},
            'find': {'VBD': 'found', 'VBG': 'finding', 'VBN': 'found'},
            'give': {'VBD': 'gave', 'VBG': 'giving', 'VBN': 'given'},
            'tell': {'VBD': 'told', 'VBG': 'telling', 'VBN': 'told'},
            'work': {'VBD': 'worked', 'VBG': 'working', 'VBN': 'worked'},
            'call': {'VBD': 'called', 'VBG': 'calling', 'VBN': 'called'},
            'try': {'VBD': 'tried', 'VBG': 'trying', 'VBN': 'tried'}
        }

        
        for verb, tag in verbs:
            if verb in irregular_verbs:
                corrected_verb = irregular_verbs[verb].get(tag, verb)
            else:
                corrected_verb = nltk.WordNetLemmatizer().lemmatize(verb, 'v')
            corrected_sentence = corrected_sentence.replace(verb, corrected_verb)
        
        return corrected_sentence

    def correct_misspelling(response):
        # Known acronym library
        known_acronyms = {
            "LOL": "laugh out loud",
            "BRB": "be right back",
            "OMG": "oh my god",
            # Add more known acronyms here
        }
        
        # Check if response contains any known acronyms
        for acronym in known_acronyms:
            if acronym in response:
                response = response.replace(acronym, known_acronyms[acronym])
        
        # Split response into words
        words = response.split()
        
        # Iterate through each word
        for i in range(len(words)):
            word = words[i]
            
            # Check if word is misspelled
            if word not in known_acronyms.values() and word.lower() not in known_acronyms.values():
                # Find the closest matching word in known acronyms
                closest_match = difflib.get_close_matches(word, known_acronyms.values(), n=1)
                
                # If a close match is found, replace the misspelled word
                if closest_match:
                    words[i] = closest_match[0]
                else:
                    # If no close match is found, ask the user to explain the word
                    explanation = input(f"Could you please explain the meaning of '{word}'? ")
                    words[i] = explanation
        
        # Join the corrected words back into a response
        corrected_response = " ".join(words)
        
        return corrected_response
    
    def track_and_correct(topic, subtopic, subject):
        global current_topic, current_subtopic, current_subject
        
        if topic != current_topic:
            # Topic has changed, update current_topic and reset subtopic and subject
            current_topic = topic
            current_subtopic = None
            current_subject = None
        
        if subtopic != current_subtopic:
            # Subtopic has changed, update current_subtopic and reset subject
            current_subtopic = subtopic
            current_subject = None
        
        if subject != current_subject:
            # Subject has changed, update current_subject
            current_subject = subject

    #Routine to learn from playing text adventure games
    def generate_text_game_response(self, user_input):
        try:
            # Check if the user input relates to a text adventure game
            if is_game_input(user_input):
                # Invoke the game-specific modules to process the game input
                game_response = process_game_input(user_input)
                return game_response
            else:
                # Use AIML to generate a response for non-game-related input
                aiml_response = self.kernel.respond(user_input)
                return aiml_response
        except Exception as e:
            # Handle any exceptions that occur during the execution of the code
            print(f"An error occurred: {str(e)}")
            # Return a default error message or perform any necessary error handling
            return "Sorry, an error occurred. Please try again later."

        
    def check_best_use_case(user_input):
        # Get all the AIML categories
        categories = kernel._brain._categories

        # Initialize variables to store the best use case
        best_use_case = None
        best_match_score = 0

        # Iterate through all the categories
        for category in categories:
            # Get the pattern and template of the category
            pattern = category[0]
            template = category[1]

            # Check if the pattern matches the user input
            match_score = kernel._brain._compare_strings(pattern, user_input)

            # Update the best use case if the match score is higher
            if match_score > best_match_score:
                best_use_case = template
                best_match_score = match_score

        # Return the best use case
        return best_use_case


0 Kudos

Comments

Displaying 1 of 1 comments ( View all | Add Comment )

Winky

Winky's profile picture

I also have some where in my whole code she can keep track of topic then she looks or starts a conversation with a subtopic and then can talk about tge subject in whole.


Report Comment