PDF Read In

LDA Analysis

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 24 16:04:44 2019

@author: rj
"""
#%% IMPORTS
#import xlrd
import spacy
import pandas as pd
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
import random
import gensim
from gensim import corpora
import pickle
import pyLDAvis.gensim

spacy.load('en')
from spacy.lang.en import English
parser = English()

nltk.download('stopwords')
nltk.download('wordnet')
en_stop = set(nltk.corpus.stopwords.words('english'))

#%% INPUT DATA

loc = ('/Users/rj/Documents/Hacking_4_Defense/python_code/Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data

"""
wb = xlrd.open_workbook(loc)
'sheet = wb.sheet_by_index(0)
"""

data = pd.read_excel(loc)
df = pd.DataFrame(data, columns= ['Index','Name','Organization','Type','Comment'])
df = df.dropna() #Get rid of NaN rows
print(df)


#%% FUNCTIONS
def tokenize(text):
    lda_tokens = []
    tokens = parser(text)
    for token in tokens:
        if token.orth_.isspace():
            continue
        elif token.like_url:
            lda_tokens.append('URL')
        elif token.orth_.startswith('@'):
            lda_tokens.append('SCREEN_NAME')
        else:
            lda_tokens.append(token.lower_)
    return lda_tokens

def get_lemma(word):
    lemma = wn.morphy(word)
    if lemma is None:
        return word
    else:
        return lemma

def get_lemma2(word):
    return WordNetLemmatizer().lemmatize(word)

def prepare_text_for_lda(text):
    tokens = tokenize(text)
    tokens = [token for token in tokens if len(token) > 4]
    tokens = [token for token in tokens if token not in en_stop]
    tokens = [get_lemma(token) for token in tokens]
    return tokens

#%% PROCESS DATA
#Completely ripped from:
#https://towardsdatascience.com/topic-modelling-in-python-with-nltk-and-gensim-4ef03213cd21    
text_data = []


UniqueTypes = df.Type.unique()
DataFrameDict = {elem : df for elem in UniqueTypes}

for key in DataFrameDict.keys():
    DataFrameDict[key] = df[:][df.Type == key]

deficiency_df = pd.DataFrame.from_dict(DataFrameDict['Deficiency'])
recommendations_df = pd.DataFrame.from_dict(DataFrameDict['Recommended Improvement Area'])
grade_summary_df = pd.DataFrame.from_dict(DataFrameDict['Grade/Summary'])
strength_df = pd.DataFrame.from_dict(DataFrameDict['Strength'])


"""
# sort the dataframe
df.sort_values(by='Type', axis=1, inplace=True)

# set the index to be this and don't drop
df.set_index(keys=['Type'], drop=False,inplace=True)
# get a list of names
types=df['Type'].unique().tolist()
# now we can perform a lookup on a 'view' of the dataframe
deficieny = df.loc[df.type=='deficiency']
# now you can query all 'joes'
"""




for ind in recommendations_df.index:
    tokens = prepare_text_for_lda(recommendations_df['Comment'][ind])
    #if random.random() > .99: #Something is really messed up with this line, why random?
    print(tokens)
    text_data.append(tokens)

dictionary = corpora.Dictionary(text_data)
corpus = [dictionary.doc2bow(text) for text in text_data]


pickle.dump(corpus, open('corpus.pkl', 'wb'))
dictionary.save('dictionary.gensim')




"""
NUM_TOPICS = 8
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model8.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)
"""
  

NUM_TOPICS = 5
ldamodel = gensim.models.ldamodel.LdaModel(corpus, num_topics = NUM_TOPICS, id2word=dictionary, passes=15)
ldamodel.save('model5.gensim')
topics = ldamodel.print_topics(num_words=4)
for topic in topics:
    print(topic)



dictionary = gensim.corpora.Dictionary.load('dictionary.gensim')
corpus = pickle.load(open('corpus.pkl', 'rb'))
lda = gensim.models.ldamodel.LdaModel.load('model5.gensim')
lda_display = pyLDAvis.gensim.prepare(lda, corpus, dictionary, sort_topics=False)
pyLDAvis.display(lda_display)
[nltk_data] Downloading package stopwords to /Users/rj/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/rj/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
         Index                                   Name Organization  \
1      1.4.2.2                          Right Quality      anon_S3   
2      1.2.3.3                                   Unit      anon_S8   
3      1.2.3.3                                   Unit      anon_S8   
4      1.3.2.1                     Key Work Processes      anon_S8   
5        1.4.3  Mission-Assurance Command and Control     anon_S14   
6      1.4.1.3                         Right Quantity     anon_S20   
7    1.3.3.1.1                Self-Assessment Program     anon_S23   
8      1.1.2.1                               Manpower     anon_S26   
10   1.3.3.1.1                Self-Assessment Program     anon_S36   
11       1.1.1                               Adequacy     anon_S40   
12     1.2.3.1                             Individual     anon_S45   
13   1.3.3.2.3                              Relevance     anon_S48   
14     1.3.1.2                     Strategic Planning     anon_S51   
15     1.4.3.1     Warfighter or USAF CC Satisfaction     anon_S55   
16       1.4.1                    Primary Mission (s)     anon_S61   
17     1.4.1.2                          Right Quality     anon_S66   
18     1.2.3.3                                   Unit     anon_S71   
19     1.2.3.3                                   Unit     anon_S71   
20     1.2.1.3                                 Intent     anon_S76   
21     1.3.2.2                        Risk Management     anon_S76   
22     1.1.1.2                                  Funds     anon_S81   
23     1.3.2.3   Commitment to Continuous Improvement     anon_S81   
24     1.4.1.1     Warfighter or USAF CC Satisfaction     anon_S85   
25     1.2.1.1                                 System     anon_S88   
29     1.1.2.3                              Equipment    anon_S104   
30     1.3.2.3   Commitment to Continuous Improvement    anon_S104   
31     1.2.2.1                             Compliance    anon_S109   
32     1.1.2.6                          Airmen's Time    anon_S111   
34     1.1.2.5                               Guidance    anon_S119   
35     1.2.3.3                                   Unit    anon_S124   
..         ...                                    ...          ...   
67     1.1.1.5                               Guidance    anon_S229   
70     1.1.2.5                               Guidance    anon_S247   
71     1.3.2.2                        Risk Management    anon_S252   
72     1.1.2.3                              Equipment    anon_S255   
73     1.2.2.1                             Compliance    anon_S255   
75     1.2.1.3                                 Intent    anon_S261   
76     1.3.2.1                     Key Work Processes    anon_S264   
77     1.3.4.1                        Data Collection    anon_S264   
78     1.3.1.2                     Strategic Planning    anon_S274   
80     1.4.1.2                          Right Quality    anon_S283   
81       1.3.3         CC's Inspection Program (CCIP)    anon_S287   
83     1.1.2.4             Facilities and Environment    anon_S297   
84       1.4.1                    Primary Mission (s)    anon_S303   
85     1.1.2.1                               Manpower    anon_S303   
86     1.1.2.4             Facilities and Environment    anon_S303   
87     1.1.2.3                              Equipment    anon_S312   
88     1.4.2.2                          Right Quality    anon_S316   
89     1.1.2.3                              Equipment    anon_S321   
90     1.2.2.5                    Attention to Detail    anon_S321   
91       1.2.3                               Training    anon_S321   
92     1.3.1.2                     Strategic Planning    anon_S321   
93     1.1.1.2                                  Funds    anon_S325   
94       1.3.3         CC's Inspection Program (CCIP)    anon_S325   
95     1.1.2.3                              Equipment    anon_S331   
96       1.3.2                     Process Operations    anon_S331   
97       1.3.2                     Process Operations    anon_S331   
98       1.3.3         CC's Inspection Program (CCIP)    anon_S331   
99     1.1.2.3                              Equipment    anon_S339   
101    1.1.2.4             Facilities and Environment    anon_S349   
102      1.2.2                             Discipline    anon_S349   

                             Type  \
1    Recommended Improvement Area   
2                      Deficiency   
3                      Deficiency   
4                      Deficiency   
5                        Strength   
6                      Deficiency   
7    Recommended Improvement Area   
8    Recommended Improvement Area   
10                     Deficiency   
11                  Grade/Summary   
12                     Deficiency   
13   Recommended Improvement Area   
14   Recommended Improvement Area   
15                  Grade/Summary   
16                     Deficiency   
17                     Deficiency   
18                     Deficiency   
19                     Deficiency   
20   Recommended Improvement Area   
21                     Deficiency   
22   Recommended Improvement Area   
23   Recommended Improvement Area   
24                  Grade/Summary   
25                       Strength   
29                     Deficiency   
30   Recommended Improvement Area   
31                     Deficiency   
32                  Grade/Summary   
34                     Deficiency   
35                     Deficiency   
..                            ...   
67                     Deficiency   
70                     Deficiency   
71                     Deficiency   
72                       Strength   
73                     Deficiency   
75                  Grade/Summary   
76                     Deficiency   
77                     Deficiency   
78                       Strength   
80                     Deficiency   
81   Recommended Improvement Area   
83                     Deficiency   
84                     Deficiency   
85   Recommended Improvement Area   
86                     Deficiency   
87                     Deficiency   
88                     Deficiency   
89                     Deficiency   
90                       Strength   
91   Recommended Improvement Area   
92                     Deficiency   
93                  Grade/Summary   
94                  Grade/Summary   
95                     Deficiency   
96   Recommended Improvement Area   
97                     Deficiency   
98                     Deficiency   
99   Recommended Improvement Area   
101                    Deficiency   
102                    Deficiency   

                                               Comment  
1    Unit Readiness Program -  Recommend the unit a...  
2    The Base Training Manager, Unit Training Manag...  
3    The Airfield Management Training NCOIC did not...  
4    Chief did not document numerous minor discrepa...  
5    Contracted Readiness and Emergency Management ...  
6    Security Forces Operations - The Security Forc...  
7    Self-Assessment Program - Recommend review and...  
8    Separation of Duties - Recommend the Communica...  
10   Self-Assessment Program - The Mission Support ...  
11   HHQ provided adequate resources enabling the w...  
12   The did not ensure a qualified person was assi...  
13   Business Rules to Support SAP - Recommend the ...  
14   IGQ and IGI Interface - Recommend IGI and IGQ ...  
15   Commanders at all levels expressed satisfactio...  
16   Intelligence Flight did not provide current in...  
17   Arming and Use of Force Program - The Security...  
18   Electrostatic Discharge Program - The electros...  
19   Plans Scheduling and Documentation - The Plans...  
20   Family OPSEC Awareness Outreach Program - Reco...  
21   Munitions Account Management - The Munitions A...  
22   Weapons Load Training Program - Recommend revi...  
23   Quality Assurance Program - Recommend the qual...  
24   Formal feedback processes effectively gauged f...  
25   Leadership Communication - The level of commun...  
29   Corrosion Control and Prevention Program - The...  
30   OPSEC Program Management and Oversight - Recom...  
31   Unit Training Manager did not conduct a compre...  
32   The operations tempo within the was extremely ...  
34   The Base Records Manager did not ensure comman...  
35   did not ensure unit members received records m...  
..                                                 ...  
67   Mission Directive - The Mission Directive, ACC...  
70   Materiel Control bench stock program required ...  
71   Respiratory Protection Program - The low obser...  
72   Equipment Management - Customer Service and Eq...  
73   Intelligence Support to Force Protection - The...  
75   The Wg/CC posted a Command Philosophy on their...  
76   Maintenance Operations Section Chief did not e...  
77   did not ensure Job Control Numbers (JCN) great...  
78   The cultivated a stellar patient safety progra...  
80   CRITIC Program Management - Critical Informati...  
81   should ensure exercise related materials are m...  
83   The EMS Coordinator did not properly document ...  
84   Family Advocacy Program did not always meet re...  
85   Recommend multiple Installation Personnel Read...  
86   Force Support Squadron's paintball area did no...  
87   Flight Service did not ensure all research doc...  
88   The Chief of Aerospace Medicine (SGP) did not ...  
89   The Vehicle Management Materiel Control NCOIC ...  
90   Financial Operations personnel developed and e...  
91   Recommend personnel improve Unit Deployment Ma...  
92   The SERE Training Superintendent did not ensur...  
93   The Continuing Resolution Appropriation and bu...  
94   The IGI staff was composed of two ART position...  
95   Shop Supervisor did not ensure fire alarm smok...  
96   Commander should ensure Test Measurement and D...  
97   During the Deployment of Forces exercise in No...  
98   During WIT-led exercise (Disease Containment P...  
99   Equipment Account Management - Recommend the A...  
101  LG Fuels Management Workplace Supervisor did n...  
102  SVFL Lodging Manager did not execute base lodg...  

[85 rows x 5 columns]
['training', 'manager', 'training', 'manager', 'additional', 'training', 'manager', 'supervisor', 'implement', 'account', 'management', 'procedure', 'monitoring', 'role', 'assign', 'individual', 'training', 'business', 'user', 'specifically', 'member', 'assign', 'miss', 'require', 'authorization', 'documentation', 'additionally', 'member', 'miss', 'authorization', 'documentation', 'basic', 'trainee', 'trainer', 'certifier', 'flight', 'chief', 'role', 'relevant']
['airfield', 'management', 'training', 'ncoic', 'conduct', 'document', 'quarterly', 'inspection', 'training', 'record', 'specifically', 'training', 'record', 'sample', 'quarterly', 'inspection', 'complete', 'relevant']
['chief', 'document', 'numerous', 'minor', 'discrepancy', 'member', 'flight', 'evaluation', 'folder', 'minor', 'discrepancy', 'consequently', 'annual', 'review', 'adequately', 'accomplish']
['security', 'force', 'operations', 'security', 'force', 'flight', 'operations', 'require', 'attention', 'security', 'force', 'response', 'force', 'post', 'accordance', 'integrate', 'defense', 'internal', 'security', 'response', 'dedicate', 'interior', 'assign', 'restrict', 'conduct', 'integrate', 'defense', 'management', 'process', 'idrmp', 'coordination', 'integrate', 'defense', 'council', 'integrate', 'defense', 'working', 'group', 'determine', 'appropriate', 'mitigation', 'measure', 'reduce', 'overall', 'assign', 'asset', 'annual', 'idrmps', 'identify', 'vertical', 'inspection', 'failure', 'provide', 'proper', 'security', 'accordance', 'force', 'instructions', 'could', 'inadequate', 'protection', 'assign', 'asset']
['assessment', 'program', 'mission', 'support', 'group', 'assign', 'squadron', 'fail', 'establish', 'assessment', 'program', 'direct', 'prescribe', 'guidance', 'robust', 'commander', 'inspection', 'program', 'find', 'deficiency', 'improve', 'mission', 'readiness', 'furthermore', 'establish', 'identify', 'cause', 'deficiency', 'enable', 'sharing', 'practice', 'organization', 'without', 'commander', 'ability', 'effectively', 'inspect', 'unit', 'subordinate', 'ensure', 'maximum', 'effectiveness', 'efficiency', 'economy', 'discipline', 'force', 'maintain']
['ensure', 'qualify', 'person', 'assign', 'position', 'airfield', 'manager']
['intelligence', 'flight', 'provide', 'current', 'intelligence', 'briefs', 'threat', 'briefs', 'scenario', 'input', 'mission', 'plan', 'mission', 'briefs', 'mission', 'debrief', 'kc-135', 'mission']
['arming', 'force', 'program', 'security', 'force', 'arming', 'force', 'program', 'require', 'immediate', 'attention', 'commander', 'fail', 'develop', 'authority', 'firearm', 'roster', 'personnel', 'carry', 'firearm', 'commander', 'fail', 'personally', 'interview', 'newly', 'assign', 'personnel', 'prior', 'assigning', 'duty', 'require', 'firearm', 'commander', 'conduct', 'weekly', 'review', 'meeting', 'monthly', 'review', 'meeting', 'training', 'assembly', 'category', 'reserve', 'component', 'member', 'fail', 'maintain', 'arming', 'force', 'monitor', 'appointment', 'letter', 'servicing', 'armory', 'fail', 'conduct', 'personnel', 'reliability', 'assurance', 'program', 'training', 'testing', 'assign', 'personnel', 'coordinate', 'installation', 'hospital', 'ensure', 'immediate', 'notification', 'patient', 'commander', 'representative', 'necessary', 'treat', 'provider', 'identify', 'condition', 'capable', 'impair', 'security', 'force', 'member', 'reliability', 'perform', 'arm', 'duty', 'exercise', 'annually', 'exercise', 'include', 'participation', 'military', 'agency', 'expect', 'assist', 'commander', 'determine', 'member', 'suitability', 'firearm', 'execution', 'official', 'duty', 'fail', 'ensure', 'member', 'arm', 'work', 'previous', 'midnight', 'shift', 'current', 'sign', 'qualification', 'posse', 'firearm', 'ammunition', 'weapon', 'qualification', 'lethal', 'weapon', 'qualification', 'member', 'authority', 'expire', 'since', 'member', 'weapon', 'lethal', 'weapon', 'qualification', 'expire', 'fail', 'small', 'receipt', 'verification', 'authority', 'firearm', 'procedure', 'process', 'place', 'armorer', 'verify', 'person', 'authorization', 'information', 'locate', 'armory', 'roster', 'roster', 'qualification', 'mechanism', 'lethal', 'weapon', 'available', 'validation', 'within', 'armory', 'issue', 'pepper', 'spray', 'special', 'certification', 'recur', 'training', 'record', 'sample', 'reflect', 'occurrence', 'arming', 'force', 'training', 'issue', 'indicate', 'serious', 'program', 'oversight', 'could', 'potentially', 'commander', 'unintentionally', 'accept', 'unqualified', 'personnel', 'arming', 'weapon', 'lethal', 'weapon', 'additionally', 'training', 'documentation', 'could', 'result', 'potential', 'legal', 'issue', 'force', 'employ', 'personnel']
['electrostatic', 'discharge', 'program', 'electrostatic', 'discharge', 'program', 'avionics', 'section', 'technical', 'order', 'requirement', 'annual', 'awareness', 'prevention', 'training', 'familiarize', 'airman', 'applicable', 'technical', 'section', 'establish', 'administer', 'annual', 'comprehension', 'challenge', 'effective', 'program', 'essential', 'protect', 'equipment', 'aircraft', 'parts', 'inadvertently', 'damage', 'directly', 'affect', 'aircraft', 'readiness', 'availability']
['plan', 'scheduling', 'documentation', 'plan', 'scheduling', 'documentation', 'section', 'document', 'write', 'guidance', 'training', 'master', 'listing', 'management', 'equipment', 'standard', 'decentralize', 'section', 'without', 'properly', 'train', 'review', 'entire', 'maintenance', 'complex', 'could', 'operate', 'incorrect', 'change', 'cycle', 'result', 'mismanagement', 'critical', 'inspection', 'equipment', 'commodity', 'negatively', 'effect', 'safety', 'flight', 'could', 'potentially', 'aircraft']
['munition', 'account', 'management', 'munition', 'accountable', 'system', 'officer', 'maintain', 'combat', 'ammunition', 'system', 'segregation', 'duty', 'multiple', 'occasions', 'fiscal', 'process', 'munition', 'transactions', 'validate', 'transactions', 'relate', 'specific', 'transactions', 'include', 'munition', 'movement', 'custody', 'account', 'expenditure', 'inventory', 'process', 'validate', 'daily', 'transaction', 'history', 'report', 'maintain', 'segregation', 'ensure', 'personnel', 'responsible', 'processing', 'munition', 'transactions', 'responsible', 'approving']
['corrosion', 'control', 'prevention', 'program', 'aerospace', 'ground', 'equipment', 'section', 'properly', 'manage', 'equipment', 'corrosion', 'control', 'program', 'locally', 'develop', 'product', 'track', 'equipment', 'paint', 'status', 'however', 'sample', 'take', 'capstone', 'assets', 'overdue', 'annual', 'corrosion', 'inspection', 'active', 'corrosion', 'control', 'program', 'require', 'ensure', 'durability', 'support', 'equipment', 'assist', 'scheduling', 'prioritize', 'assets', 'worst', 'first', 'basis', 'appropriate', 'corrosion', 'facility']
['training', 'manager', 'conduct', 'comprehensive', 'trainee', 'orientation', 'trainee', 'initially', 'entering', 'within', 'assignment', 'document', 'completion', 'approve', 'automate', 'system', 'specifically', 'training', 'record', 'review', 'requirement']
['record', 'manager', 'ensure', 'commander', 'receive', 'training', 'regard', 'record', 'management', 'program']
['ensure', 'member', 'receive', 'record', 'management', 'training']
['personally', 'identifiable', 'information', 'disclosure', 'safeguard', 'personally', 'identifiable', 'information', 'share', 'network', 'drive', 'folder', 'contain', 'award', 'package', 'accessible', 'personnel', 'without', 'manage', 'record', 'share', 'network', 'drive', 'involve', 'participation', 'network', 'control', 'center', 'staff', 'record', 'professional', 'commander', 'user', 'share', 'responsibility', 'protect', 'record', 'content', 'comply', 'privacy', 'requirement', 'user', 'store', 'manage', 'protect', 'record', 'consistent', 'organizational', 'requirement', 'procedure', 'accord', 'afman', 'force', 'policy', 'require', 'evaluation', 'information', 'system', 'owner', 'personal', 'collect', 'maintain', 'store', 'electronic', 'system', 'determine', 'impact', 'unauthorized', 'disclosure', 'active', 'application', 'guidance', 'help', 'prevent', 'inadvertent', 'access', 'protect', 'information']
['equipment', 'custodian', 'always', 'conduct', 'floor', 'floor', 'inventory']
['office', 'visit', 'suspense', 'expectation', 'often', 'provide', 'level', 'commensurate', 'developmental', 'level', 'recruiter', 'result', 'drive', 'expectation', 'issue', 'inexperienced', 'recruiter', 'without', 'specific', 'action', 'base', 'expectation', 'achieve', 'result', 'office', 'visit', 'often', 'ass', 'effectiveness', 'seven', 'mission', 'critical', 'task', 'focus', 'compliance', 'effectiveness', 'area', 'amount', 'expectation', 'adjust', 'respond', 'inadequate', 'production']
['civil', 'engineer', 'operations', 'flight', 'follow', 'prioritization', 'system', 'guidance']
['cybersecurity', 'workforce', 'improvement', 'program', 'require', 'attention', 'privilege', 'access', 'user', 'maintain', 'cybersecurity', 'baseline', 'certification', 'privilege', 'access', 'user', 'without', 'cybersecurity', 'baseline', 'certification', 'properly', 'waive']
['flight', 'leadership', 'ensure', 'flight', 'member', 'train', 'certify', 'operate', 'response', 'vehicle', 'flight']
['medical', 'readiness', 'officer', 'ensure', 'comprehensive', 'medical', 'readiness', 'program', 'training', 'analysis', 'conduct', 'annually', 'document', 'afscs']
['combat', 'ncoic', 'review', 'update', 'annually']
['weapon', 'safety', 'manager', 'ensure', 'assessment', 'accomplish', 'exercise', 'training', 'involve', 'explosive']
['small', 'range', 'manager', 'prevent', 'improper', 'disposal', 'hazardous', 'waste', 'small', 'range']
['provide', 'employee', 'initial', 'ethics', 'orientation', 'development', 'training', 'flight', 'member', 'within', 'arrival']
['commander', 'ensure', 'accessible', 'individual', 'whose', 'official', 'duty', 'provide', 'valid']
['annual', 'review', 'special', 'security', 'officer', 'ensure', 'commander', 'conduct', 'document', 'annual', 'review', 'sensitive', 'compartmented', 'information', 'facility', 'standard', 'operate', 'procedure', 'conducting', 'annual', 'review', 'ensure', 'current', 'security', 'procedure', 'place']
['mission', 'directive', 'mission', 'directive', 'accmd', 'volume', 'update', 'mission', 'change', 'least', 'every', 'years', 'recent', 'publish', 'mission', 'directive', 'date', 'almost', 'years', 'minimum', 'require', 'review', 'significant', 'additional', 'mission', 'requirement', 'levy', 'since', 'require', 'mission', 'directive', 'update', 'ensure', 'mission', 'directive', 'current', 'ensure', 'manpower', 'resource', 'available', 'fully', 'execute', 'assign', 'mission', 'accurately', 'reflect', 'scale', 'mission']
['materiel', 'control', 'bench', 'stock', 'program', 'require', 'attention', 'bench', 'stock', 'item', 'approve', 'vehicle', 'fleet', 'manager', 'vehicle', 'management', 'superintendent', 'working', 'stock', 'item', 'approve', 'working', 'stock', 'contain', 'authorize', 'quantity', 'current', 'listing', 'match', 'inventory']
['respiratory', 'protection', 'program', 'observables', 'section', 'respiratory', 'protection', 'program', 'review', 'approve', 'annually', 'bioenvironmental', 'engineering', 'require', 'review', 'approve', 'complete', 'april', 'contaminate', 'cartridge', 'store', 'respirator', 'change', 'schedule', 'always', 'follow', 'require', 'approve', 'cartridge', 'times', 'track', 'document', '30-day', 'inspection', 'respirator', 'complete', 'document', 'inspection', 'maintenance', 'record', 'maintain', 'effective', 'ensure', 'member', 'aware', 'storage', 'requirement', 'respirator', 'protect', 'member', 'exposure', 'hazardous', 'material']
['intelligence', 'support', 'force', 'protection', 'intelligence', 'support', 'force', 'protection', 'appointment', 'letter', 'current', 'appointment', 'letter', 'draft', 'sign', 'current', 'commander', 'appoint', 'member', 'complete', 'require', 'training', 'terrorism', 'level', 'training', 'available', 'force', 'protection', 'intelligence', 'formal', 'training', 'properly', 'train', 'personnel', 'directly', 'impact', 'force', 'protection', 'relate', 'intelligence', 'function', 'deployment', 'preparation', 'conducting', 'focus', 'predictive', 'analysis', 'support', 'threat', 'working', 'group', 'force', 'projection', 'working', 'group', 'sessions']
['maintenance', 'operations', 'section', 'chief', 'ensure', 'proper', 'oversight', 'scheduling', 'process', 'specifically', 'plan', 'scheduling', 'documentation', 'section', 'perform', 'master', 'listing', 'reconcile', 'applicable', 'commodity', 'technical', 'order', 'ensure', 'accuracy', 'currency', 'change', 'maintenance', 'information', 'system', 'receipt', 'force', 'instruction', 'change', 'frequency', 'establish', 'standard', 'munition', 'maintenance', 'scheduling', 'effectiveness', 'program', 'monthly', 'review', 'meeting', 'own', 'agency', 'within', 'scheduling', 'communicator', 'assess', 'within', 'publication', 'within', 'management', 'internal', 'control', 'toolset']
['ensure', 'control', 'numbers', 'greater', 'calendar', 'reschedule', 'defer', 'event', 'beyond', 'schedule', 'start', 'specifically', 'assign', 'deferment', '3,158', 'event']
['critic', 'program', 'management', 'critical', 'information', 'critic', 'program', 'manager', 'ensure', 'subordinate', 'unit', 'develop', 'maintain', 'critic', 'program', 'accordance', 'governing', 'document', 'sigint', 'elements', 'production', 'function', 'rapidly', 'recognize', 'report', 'conditions', 'meeting', 'critic', 'criterion', 'participate', 'critic', 'evaluation', 'program', 'exception', 'waiver', 'approve', 'publish', 'within', 'unit', 'publish', 'unite', 'state', 'signal', 'intelligence', 'directive', 'ussid', 'effective', 'ensure', 'preparation', 'world', 'critic', 'situation', 'failure', 'establish', 'critic', 'program', 'could', 'result', 'latent', 'reporting', 'critical', 'intelligence', 'information']
['coordinator', 'properly', 'document', 'result', 'internal', 'environmental', 'compliance', 'stage', 'inspection', 'edash', 'require', 'specifically', 'event', 'finding', 'tracker', 'findings', 'program', 'assess', 'november', 'however', 'discrepancy', 'report', 'compliant']
['family', 'advocacy', 'program', 'always', 'require', 'timeline', 'provide', 'treatment', 'services', 'refer', 'patient']
['force', 'support', 'squadron', 'paintball', 'utilize', 'protective', 'vegetative', 'cover', 'control', 'stabilize', 'site', 'avoid', 'silt', 'stream']
['flight', 'service', 'ensure', 'research', 'documentation', 'found', 'transactions', 'specifically', 'supply', 'surveillance', 'report', 'd20)was', 'file', 'documentation', 'transaction', 'process']
['chief', 'aerospace', 'medicine', 'manage', 'deployment', 'relate', 'health', 'assessment', 'program', 'specifically', 'ensure', 'complete', 'within', 'require', 'timeframes', 'senior', 'leadership', 'training', 'conduct', 'annually', 'review', 'feedback', 'provide', 'primary', 'team']
['vehicle', 'management', 'materiel', 'control', 'ncoic', 'ensure', 'tool', 'issue', 'consolidate', 'replace', 'personal', 'correspond', 'individual', 'af1297']
['training', 'superintendent', 'ensure', 'assign', 'specialist', 'conduct', 'mandatory', 'review', 'task', 'oplans', 'personnel', 'recovery', 'annexe', 'support', 'theater', 'campaign', 'plan']
['supervisor', 'ensure', 'alarm', 'smoke', 'detection', 'testing', 'equipment', 'calibration', 'facility', 'annual', 'calibration', 'digital', 'manometer', 'remove', 'service', 'calibration', 'expire', 'digital', 'manometer', 'receive', 'annual', 'calibration', 'expiration', 'assessment', 'communicator', 'associate', 'deficiency']
['deployment', 'force', 'exercise', 'november', 'installation', 'deployment', 'officer', 'ensure', 'processing', 'provide', 'deploy', 'personnel', 'opportunity', 'conduct', 'preventive', 'maintenance', 'service', 'check', 'pmscs', 'mask', 'base', 'population', 'deploy', 'personnel', 'issue', 'chalk-1', 'conduct', 'pmscs', 'mask', 'prior', 'palletizing', 'shipment', 'assessment', 'communicator', 'associate', 'deficiency']
['exercise', 'disease', 'containment', 'october', 'november', 'member', 'detect', 'report', 'commander', 'develop', 'implement', 'write', 'guidance', 'small', 'light', 'weapon', 'safety', 'tailor', 'specifically', 'exercise', 'assessment', 'communicator', 'associate', 'deficiency']
['fuel', 'management', 'workplace', 'supervisor', 'inform', 'safety', 'office', 'bioenvironmental', 'engineering', 'public', 'health', 'and/or', 'preventive', 'medicine', 'personnel', 'change', 'workplace', 'equipment', 'practice', 'procedure', 'impact', 'exposure', 'occupational', 'environmental', 'health', 'hazard', 'service', 'station', 'facility', 'operational', 'february', 'receive', 'health', 'assessment', 'prior', 'place', 'service', 'management', 'personnel', 'work', 'around', 'facility', 'personnel', 'service', 'station', 'without', 'health', 'assessment', 'complete', 'accomplish', 'applicable', 'assessment', 'communicator', 'identify', 'compliance', 'deficiency']
['lodging', 'manager', 'execute', 'lodging', 'program', 'establish', 'instructions', 'indicate', 'document', 'training', 'lodging', 'employee', 'lodging', 'program', 'approve', 'base', 'training', 'program', 'ensure', 'employee', 'receive', 'initial', 'training', 'ensure', 'specific', 'preventive', 'maintenance', 'conduct', 'document', 'least', 'guest', 'rooms', 'quarter', 'accomplish', 'applicable', 'assessment', 'communicator', 'detect', 'compliance', 'however', 'identify', 'compliance', 'deficiency']
(0, '0.020*"munition" + 0.016*"stock" + 0.016*"expectation" + 0.016*"transactions"')
(1, '0.028*"force" + 0.019*"training" + 0.016*"mission" + 0.013*"commander"')
(2, '0.023*"ensure" + 0.015*"record" + 0.014*"conduct" + 0.013*"information"')
(3, '0.034*"program" + 0.020*"critic" + 0.014*"require" + 0.014*"document"')
(4, '0.027*"training" + 0.018*"program" + 0.018*"ensure" + 0.016*"manager"')
/Users/rj/anaconda3/lib/python3.7/site-packages/pyLDAvis/_prepare.py:257: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  return pd.concat([default_term_info] + list(topic_dfs))

Word Cloud

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 24 16:04:44 2019

@author: rj
"""
#%% IMPORTS
#import xlrd
import spacy
import pandas as pd
import nltk
from nltk.corpus import wordnet as wn
from nltk.stem.wordnet import WordNetLemmatizer
import random
import gensim
from gensim import corpora
import pickle
import pyLDAvis.gensim

spacy.load('en')
from spacy.lang.en import English
parser = English()

nltk.download('stopwords')
nltk.download('wordnet')
en_stop = set(nltk.corpus.stopwords.words('english'))

#%% INPUT DATA

#loc = ('./Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data
loc = ('/Users/rj/Documents/Hacking_4_Defense/python_code/Example IGEMS data.xlsx') #Insert filepath to the IGEMS Data

"""
wb = xlrd.open_workbook(loc)
'sheet = wb.sheet_by_index(0)
"""

data = pd.read_excel(loc)
df = pd.DataFrame(data, columns= ['Index','Name','Organization','Type','Comment'])
df = df.dropna() #Get rid of NaN rows
print(df)
         Index                                   Name Organization  \
1      1.4.2.2                          Right Quality      anon_S3   
2      1.2.3.3                                   Unit      anon_S8   
3      1.2.3.3                                   Unit      anon_S8   
4      1.3.2.1                     Key Work Processes      anon_S8   
5        1.4.3  Mission-Assurance Command and Control     anon_S14   
6      1.4.1.3                         Right Quantity     anon_S20   
7    1.3.3.1.1                Self-Assessment Program     anon_S23   
8      1.1.2.1                               Manpower     anon_S26   
10   1.3.3.1.1                Self-Assessment Program     anon_S36   
11       1.1.1                               Adequacy     anon_S40   
12     1.2.3.1                             Individual     anon_S45   
13   1.3.3.2.3                              Relevance     anon_S48   
14     1.3.1.2                     Strategic Planning     anon_S51   
15     1.4.3.1     Warfighter or USAF CC Satisfaction     anon_S55   
16       1.4.1                    Primary Mission (s)     anon_S61   
17     1.4.1.2                          Right Quality     anon_S66   
18     1.2.3.3                                   Unit     anon_S71   
19     1.2.3.3                                   Unit     anon_S71   
20     1.2.1.3                                 Intent     anon_S76   
21     1.3.2.2                        Risk Management     anon_S76   
22     1.1.1.2                                  Funds     anon_S81   
23     1.3.2.3   Commitment to Continuous Improvement     anon_S81   
24     1.4.1.1     Warfighter or USAF CC Satisfaction     anon_S85   
25     1.2.1.1                                 System     anon_S88   
29     1.1.2.3                              Equipment    anon_S104   
30     1.3.2.3   Commitment to Continuous Improvement    anon_S104   
31     1.2.2.1                             Compliance    anon_S109   
32     1.1.2.6                          Airmen's Time    anon_S111   
34     1.1.2.5                               Guidance    anon_S119   
35     1.2.3.3                                   Unit    anon_S124   
..         ...                                    ...          ...   
67     1.1.1.5                               Guidance    anon_S229   
70     1.1.2.5                               Guidance    anon_S247   
71     1.3.2.2                        Risk Management    anon_S252   
72     1.1.2.3                              Equipment    anon_S255   
73     1.2.2.1                             Compliance    anon_S255   
75     1.2.1.3                                 Intent    anon_S261   
76     1.3.2.1                     Key Work Processes    anon_S264   
77     1.3.4.1                        Data Collection    anon_S264   
78     1.3.1.2                     Strategic Planning    anon_S274   
80     1.4.1.2                          Right Quality    anon_S283   
81       1.3.3         CC's Inspection Program (CCIP)    anon_S287   
83     1.1.2.4             Facilities and Environment    anon_S297   
84       1.4.1                    Primary Mission (s)    anon_S303   
85     1.1.2.1                               Manpower    anon_S303   
86     1.1.2.4             Facilities and Environment    anon_S303   
87     1.1.2.3                              Equipment    anon_S312   
88     1.4.2.2                          Right Quality    anon_S316   
89     1.1.2.3                              Equipment    anon_S321   
90     1.2.2.5                    Attention to Detail    anon_S321   
91       1.2.3                               Training    anon_S321   
92     1.3.1.2                     Strategic Planning    anon_S321   
93     1.1.1.2                                  Funds    anon_S325   
94       1.3.3         CC's Inspection Program (CCIP)    anon_S325   
95     1.1.2.3                              Equipment    anon_S331   
96       1.3.2                     Process Operations    anon_S331   
97       1.3.2                     Process Operations    anon_S331   
98       1.3.3         CC's Inspection Program (CCIP)    anon_S331   
99     1.1.2.3                              Equipment    anon_S339   
101    1.1.2.4             Facilities and Environment    anon_S349   
102      1.2.2                             Discipline    anon_S349   

                             Type  \
1    Recommended Improvement Area   
2                      Deficiency   
3                      Deficiency   
4                      Deficiency   
5                        Strength   
6                      Deficiency   
7    Recommended Improvement Area   
8    Recommended Improvement Area   
10                     Deficiency   
11                  Grade/Summary   
12                     Deficiency   
13   Recommended Improvement Area   
14   Recommended Improvement Area   
15                  Grade/Summary   
16                     Deficiency   
17                     Deficiency   
18                     Deficiency   
19                     Deficiency   
20   Recommended Improvement Area   
21                     Deficiency   
22   Recommended Improvement Area   
23   Recommended Improvement Area   
24                  Grade/Summary   
25                       Strength   
29                     Deficiency   
30   Recommended Improvement Area   
31                     Deficiency   
32                  Grade/Summary   
34                     Deficiency   
35                     Deficiency   
..                            ...   
67                     Deficiency   
70                     Deficiency   
71                     Deficiency   
72                       Strength   
73                     Deficiency   
75                  Grade/Summary   
76                     Deficiency   
77                     Deficiency   
78                       Strength   
80                     Deficiency   
81   Recommended Improvement Area   
83                     Deficiency   
84                     Deficiency   
85   Recommended Improvement Area   
86                     Deficiency   
87                     Deficiency   
88                     Deficiency   
89                     Deficiency   
90                       Strength   
91   Recommended Improvement Area   
92                     Deficiency   
93                  Grade/Summary   
94                  Grade/Summary   
95                     Deficiency   
96   Recommended Improvement Area   
97                     Deficiency   
98                     Deficiency   
99   Recommended Improvement Area   
101                    Deficiency   
102                    Deficiency   

                                               Comment  
1    Unit Readiness Program -  Recommend the unit a...  
2    The Base Training Manager, Unit Training Manag...  
3    The Airfield Management Training NCOIC did not...  
4    Chief did not document numerous minor discrepa...  
5    Contracted Readiness and Emergency Management ...  
6    Security Forces Operations - The Security Forc...  
7    Self-Assessment Program - Recommend review and...  
8    Separation of Duties - Recommend the Communica...  
10   Self-Assessment Program - The Mission Support ...  
11   HHQ provided adequate resources enabling the w...  
12   The did not ensure a qualified person was assi...  
13   Business Rules to Support SAP - Recommend the ...  
14   IGQ and IGI Interface - Recommend IGI and IGQ ...  
15   Commanders at all levels expressed satisfactio...  
16   Intelligence Flight did not provide current in...  
17   Arming and Use of Force Program - The Security...  
18   Electrostatic Discharge Program - The electros...  
19   Plans Scheduling and Documentation - The Plans...  
20   Family OPSEC Awareness Outreach Program - Reco...  
21   Munitions Account Management - The Munitions A...  
22   Weapons Load Training Program - Recommend revi...  
23   Quality Assurance Program - Recommend the qual...  
24   Formal feedback processes effectively gauged f...  
25   Leadership Communication - The level of commun...  
29   Corrosion Control and Prevention Program - The...  
30   OPSEC Program Management and Oversight - Recom...  
31   Unit Training Manager did not conduct a compre...  
32   The operations tempo within the was extremely ...  
34   The Base Records Manager did not ensure comman...  
35   did not ensure unit members received records m...  
..                                                 ...  
67   Mission Directive - The Mission Directive, ACC...  
70   Materiel Control bench stock program required ...  
71   Respiratory Protection Program - The low obser...  
72   Equipment Management - Customer Service and Eq...  
73   Intelligence Support to Force Protection - The...  
75   The Wg/CC posted a Command Philosophy on their...  
76   Maintenance Operations Section Chief did not e...  
77   did not ensure Job Control Numbers (JCN) great...  
78   The cultivated a stellar patient safety progra...  
80   CRITIC Program Management - Critical Informati...  
81   should ensure exercise related materials are m...  
83   The EMS Coordinator did not properly document ...  
84   Family Advocacy Program did not always meet re...  
85   Recommend multiple Installation Personnel Read...  
86   Force Support Squadron's paintball area did no...  
87   Flight Service did not ensure all research doc...  
88   The Chief of Aerospace Medicine (SGP) did not ...  
89   The Vehicle Management Materiel Control NCOIC ...  
90   Financial Operations personnel developed and e...  
91   Recommend personnel improve Unit Deployment Ma...  
92   The SERE Training Superintendent did not ensur...  
93   The Continuing Resolution Appropriation and bu...  
94   The IGI staff was composed of two ART position...  
95   Shop Supervisor did not ensure fire alarm smok...  
96   Commander should ensure Test Measurement and D...  
97   During the Deployment of Forces exercise in No...  
98   During WIT-led exercise (Disease Containment P...  
99   Equipment Account Management - Recommend the A...  
101  LG Fuels Management Workplace Supervisor did n...  
102  SVFL Lodging Manager did not execute base lodg...  

[85 rows x 5 columns]
[nltk_data] Downloading package stopwords to /Users/rj/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/rj/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!