Skip to content

Commit e7b8280

Browse files
Add files via upload
1 parent 6e12acd commit e7b8280

File tree

3 files changed

+131
-0
lines changed

3 files changed

+131
-0
lines changed

‎IndexCSVFilters.java‎

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
packagecom.solr.index;
2+
3+
importorg.apache.solr.client.solrj.SolrClient;
4+
importorg.apache.solr.client.solrj.SolrServerException;
5+
importorg.apache.solr.client.solrj.impl.HttpSolrClient;
6+
importorg.apache.solr.common.SolrInputDocument;
7+
importorg.slf4j.Logger;
8+
importorg.slf4j.LoggerFactory;
9+
10+
importjava.io.*;
11+
importjava.nio.charset.StandardCharsets;
12+
importjava.text.ParseException;
13+
importjava.text.SimpleDateFormat;
14+
importjava.util.ArrayList;
15+
importjava.util.Collection;
16+
importjava.util.TimeZone;
17+
18+
publicclassIndexCSVFilters{
19+
20+
privateSolrClientsolrClient;
21+
privatestaticfinalTimeZoneUTC = TimeZone.getTimeZone("UTC");
22+
privateSimpleDateFormatsolrCompatibleSdf,csvCompatibleSdf;
23+
privatestaticfinalLoggerlogger = LoggerFactory.getLogger(IndexCSVFilters.class);
24+
25+
privatestaticfinalStringTIME_STAMP = "T00:00:00Z";
26+
privatestaticfinalStringDATE_STAMP = "1970-01-01T";
27+
privatestaticfinalStringDATE_TIME_FIELD = "DateTime";
28+
privatestaticfinalStringTIME_FIELD = "Time";
29+
privatestaticfinalStringID_FIELD = "id";
30+
privatestaticfinalStringDATE_PART_Z = "Z";
31+
// private static final String DATA_SET_NAME_FIELD = "DataSetName"
32+
publicIndexCSVFilters(StringsolrUrl, StringcoreName){
33+
StringurlString = solrUrl+"/"+coreName;
34+
solrClient = newHttpSolrClient.Builder(urlString)
35+
.withSocketTimeout(0)
36+
.withConnectionTimeout(0)
37+
.build();
38+
solrCompatibleSdf = newSimpleDateFormat("YYYY-MM-dd'T'HH:mm:ss'Z'");
39+
solrCompatibleSdf.setTimeZone(UTC);
40+
csvCompatibleSdf = newSimpleDateFormat("MM/dd/yyyy hh:mm:ss aa");
41+
csvCompatibleSdf.setTimeZone(UTC);
42+
}
43+
publicbooleanindexCSVFile(Filefile){
44+
45+
logger.info("Indexing Started for file : "+file.getName());
46+
try(BufferedReaderbufferedReader = newBufferedReader(newInputStreamReader(newFileInputStream(file), StandardCharsets.UTF_8))){
47+
48+
Stringline = null;
49+
line = bufferedReader.readLine();
50+
Stringsep = "";
51+
if(line.contains(",")){
52+
sep = ",";
53+
}
54+
StringfieldNames[] = line.split(sep);
55+
Collection<SolrInputDocument> docList = newArrayList<>();
56+
// DataSetName,TimeStamp,First,Max,Min,Last,ValB,ValA,FilterS,FilterE,FilterT,ValS,ValC
57+
// E8374H231J#Type0,2017-01-09 09:31:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.42,0
58+
// E8374H231J#Type0,2017-01-09 09:32:00,66.2,71,66.2,71,66.2,71,35,2017-01-13,C,103.57,0
59+
intcnt =0;
60+
intlineCnt =0;
61+
while ((line = bufferedReader.readLine())!=null){
62+
lineCnt++;
63+
if(line.trim().length()==0){
64+
continue;
65+
}
66+
Stringdata [] = line.trim().split(sep);
67+
if(data.length!=fieldNames.length){
68+
System.out.println(lineCnt+"\t"+line);
69+
continue;
70+
}
71+
SolrInputDocumentsolrInputDocument = newSolrInputDocument();
72+
for(intindex = 0;index<fieldNames.length;index++){
73+
74+
if(index==1 || index==9){
75+
continue;
76+
}
77+
solrInputDocument.addField(fieldNames[index],data[index]);
78+
}
79+
Stringtime = data[1].substring(data[1].indexOf(" ")+1);
80+
81+
solrInputDocument.addField(TIME_FIELD,DATE_STAMP+time+ DATE_PART_Z);
82+
solrInputDocument.addField(DATE_TIME_FIELD,data[1].replace(" ","T")+ DATE_PART_Z);
83+
solrInputDocument.addField(fieldNames[9],data[9]+TIME_STAMP);
84+
solrInputDocument.addField(ID_FIELD,data[1].replace(" ","_")+"_"+data[0]+"_"+data[8]+"_"+data[9]+"_"+data[10]);
85+
docList.add(solrInputDocument);
86+
87+
if(cnt==100000){
88+
cnt = 0;
89+
solrClient.add(docList);
90+
solrClient.commit();
91+
docList.clear();
92+
}else{
93+
cnt++;
94+
}
95+
}
96+
if(cnt!=0){
97+
solrClient.add(docList);
98+
solrClient.commit();
99+
docList.clear();
100+
}
101+
} catch (FileNotFoundExceptione){
102+
e.printStackTrace();
103+
logger.error("Error in Indexing file : "+file.getName(),e);
104+
} catch (IOExceptione){
105+
e.printStackTrace();
106+
logger.error("Error in Indexing file : "+file.getName(),e);
107+
} catch (SolrServerExceptione){
108+
e.printStackTrace();
109+
logger.error("Error in Indexing file : "+file.getName(),e);
110+
}
111+
logger.info("Indexing Completed for file : "+file.getName());
112+
returntrue;
113+
}
114+
115+
publicvoidoptimizeIndex(){
116+
try{
117+
logger.info("Index Optimization Process Start");
118+
solrClient.commit();
119+
solrClient.optimize();
120+
logger.info("Index Optimization Process Completed");
121+
} catch (SolrServerExceptione){
122+
e.printStackTrace();
123+
logger.error("Error in Index optimization process",e);
124+
} catch (IOExceptione){
125+
e.printStackTrace();
126+
logger.error("Error in Index optimization process",e);
127+
}
128+
129+
}
130+
131+
}

‎conf.zip‎

138 KB
Binary file not shown.

‎csv_samples.zip‎

13.7 MB
Binary file not shown.

0 commit comments

Comments
(0)