View Javadoc
1 /* 2 $Header: /cvsroot/ebxmlrr/ebxmlrr/src/share/com/sun/ebxml/registry/util/NAICSImporter.java,v 1.3 2001/12/16 15:33:16 skchonghk Exp $ 3 */ 4 5 package com.sun.ebxml.registry.util; 6 7 import java.io.*; 8 import java.util.*; 9 import org.oasis.ebxml.registry.bindings.rim.*; 10 import org.oasis.ebxml.registry.bindings.rim.types.*; 11 import org.oasis.ebxml.registry.bindings.query.*; 12 import org.oasis.ebxml.registry.bindings.rs.*; 13 14 /*** 15 It is the importer for NAICS taxonomy. It expects the input file is of 16 version 2002. It will automatically skip the first four lines in the input file. 17 18 @author Adrian Chong 19 */ 20 public class NAICSImporter 21 { 22 private StreamTokenizer tokenizer; 23 private int maxNumOfEntries; 24 private UUIDFactory uUIDFactory; 25 private static final int SECTOR = 2; 26 private static final int SUBSECTOR = 3; 27 private static final int INDUSTRY_GROUP = 4; 28 private static final int INDUSTRY = 5; 29 private static final int NATIONAL = 6; 30 31 /*** 32 Constructor. 33 @param fileName The file path and name of the NAICS taxonomy file. 34 @param uUIDFactory The UUIDFactory 35 */ 36 public NAICSImporter(String fileName, UUIDFactory uUIDFactory) throws 37 IOException 38 { 39 setFile(fileName); 40 tokenizer.eolIsSignificant(false); 41 tokenizer.slashSlashComments(true); 42 tokenizer.slashStarComments(true); 43 tokenizer.wordChars(' ', ' '); 44 tokenizer.wordChars(',', ','); 45 this.uUIDFactory = uUIDFactory; 46 } 47 48 /*** 49 Set the file path of the ISO3166 taxonomy file. 50 @param fileName The file path of the NAICS taxonomy file 51 */ 52 public void setFile(String fileName) throws IOException 53 { 54 tokenizer = new StreamTokenizer(new FileReader(fileName)); 55 tokenizer.eolIsSignificant(true); 56 57 int lineCount = 0; 58 while (tokenizer.nextToken()!=StreamTokenizer.TT_EOF && lineCount < 3) 59 { 60 if (tokenizer.ttype==StreamTokenizer.TT_EOL) 61 { 62 lineCount++; 63 } 64 } 65 66 tokenizer.eolIsSignificant(false); 67 } 68 69 /*** 70 Set the maximum number of entries should be handled. Setting it to 0 means 71 that the importer will handle unlimited number of entries. 72 @param maxNum The maximum number of entries the importer will handle 73 */ 74 public void setMaxNumOfEntries(int maxNum) 75 { 76 maxNumOfEntries = maxNum; 77 } 78 79 /*** 80 Get the ClassificationScheme for NAICS taxonomy 81 @return the ClassificationScheme 82 */ 83 public ClassificationScheme getClassificationScheme() throws IOException 84 { 85 // Create the classification scheme 86 87 ClassificationScheme classScheme = new ClassificationScheme(); 88 classScheme.setId("urn:uuid:" + uUIDFactory.newUUID().toString()); 89 classScheme.setIsInternal(true); 90 classScheme.setNodeType(NodeTypeType.UNIQUECODE); 91 classScheme.setDescription(getInternationalDesc 92 ("This is the classification scheme for NAICS version 2002")); 93 classScheme.setName(getInternationalName("ntis-gov:naics")); 94 95 ClassificationNode parentSector = null; 96 ClassificationNode parentSubSector = null; 97 ClassificationNode parentIndustryGroup = null; 98 ClassificationNode parentIndustry = null; 99 ClassificationNode national = null; 100 101 int count = 0; 102 while(true) 103 { 104 if (maxNumOfEntries != 0 && count > maxNumOfEntries 105 || tokenizer.nextToken()==StreamTokenizer.TT_EOF) 106 { 107 break; 108 } 109 110 // code 111 String code = ""; 112 if (tokenizer.ttype==StreamTokenizer.TT_NUMBER) 113 { 114 code = ((int)tokenizer.nval) + ""; 115 } 116 // It is to check whether the code is a range 117 tokenizer.nextToken(); 118 if (tokenizer.ttype==StreamTokenizer.TT_NUMBER) 119 { 120 code += (int) tokenizer.nval; 121 tokenizer.nextToken(); 122 } 123 124 //System.out.println(code); 125 //System.out.println(getType(code)); 126 127 String name = tokenizer.sval; 128 //System.out.println(name); 129 130 String id = "urn:uuid:" + uUIDFactory.newUUID().toString(); 131 if (getType(code)==SECTOR) 132 { 133 parentSector = new ClassificationNode(); 134 parentSector.setId(id); 135 parentSector.setName(getInternationalName(name)); 136 parentSector.setCode(code); 137 classScheme.addClassificationNode(parentSector); 138 } 139 else if (getType(code)==SUBSECTOR) 140 { 141 parentSubSector = new ClassificationNode(); 142 parentSubSector.setId(id); 143 parentSubSector.setName(getInternationalName(name)); 144 parentSubSector.setCode(code); 145 parentSector.addClassificationNode(parentSubSector); 146 } 147 else if (getType(code)==INDUSTRY_GROUP) 148 { 149 parentIndustryGroup = new ClassificationNode(); 150 parentIndustryGroup.setId(id); 151 parentIndustryGroup.setName(getInternationalName(name)); 152 parentIndustryGroup.setCode(code); 153 parentSubSector.addClassificationNode(parentIndustryGroup); 154 } 155 else if (getType(code)==INDUSTRY) 156 { 157 parentIndustry = new ClassificationNode(); 158 parentIndustry.setId(id); 159 parentIndustry.setName(getInternationalName(name)); 160 parentIndustry.setCode(code); 161 parentIndustryGroup.addClassificationNode(parentIndustry); 162 } 163 else if (getType(code)==NATIONAL) 164 { 165 national = new ClassificationNode(); 166 national.setId(id); 167 national.setName(getInternationalName(name)); 168 national.setCode(code); 169 parentIndustry.addClassificationNode(national); 170 } 171 count++; 172 } 173 return classScheme; 174 } 175 176 /*** 177 Get the type (i.e. Sector, SubSector, etc.) by inspecting the code length. 178 A code range 's type is specified by the number of digits. 179 */ 180 private int getType(String code) 181 { 182 if (code.indexOf('-')==-1) 183 { 184 return code.length(); 185 } 186 else 187 { 188 StringTokenizer sTokenizer = new StringTokenizer(code); 189 return sTokenizer.nextToken("-").length(); 190 } 191 } 192 193 private Name getInternationalName(String name) 194 { 195 196 Name internaionalName = new Name(); 197 internaionalName.addInternationalStringTypeItem( 198 getInternationalStringTypeItem(name)); 199 return internaionalName; 200 } 201 202 private Description getInternationalDesc(String desc) 203 { 204 205 Description internaionalDesc = new Description(); 206 internaionalDesc.addInternationalStringTypeItem( 207 getInternationalStringTypeItem(desc)); 208 return internaionalDesc; 209 } 210 211 private InternationalStringTypeItem getInternationalStringTypeItem(String 212 str) 213 { 214 LocalizedString localizedStr = new LocalizedString(); 215 localizedStr.setLang(null); 216 localizedStr.setValue(str); 217 InternationalStringTypeItem inStrTypeItem = new 218 InternationalStringTypeItem(); 219 inStrTypeItem.setLocalizedString(localizedStr); 220 return inStrTypeItem; 221 } 222 }

This page was automatically generated by Maven