tag:blogger.com,1999:blog-33877007775307573772024-03-13T00:45:02.836-07:00IT Pulse++The posts on this blog are fundamental problems that are worked and implemented on day to day working which I came across.Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.comBlogger45125tag:blogger.com,1999:blog-3387700777530757377.post-47139775696009300612021-03-05T04:48:00.001-08:002021-03-05T04:48:27.848-08:00OOZIE JOB TRIGGERING WITH OozieClient API -- WITH Kerberos Login ,using KEYTAB FILE AND PRINCIPAL AND USERNAME<p> </p><p><span style="color: #008800; font-weight: bold;">Assuming You have proper job.properties file in place, and kerberos keytab files in place, using them, the below code will be helpful to work .</span></p><p><br /></p><pre style="color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.FileInputStream</span>;
<pre style="line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.File</span>;</pre></pre><pre style="color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.IOException</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.security.PrivilegedAction</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.ArrayList</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.HashMap</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Locale</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Map</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.security.auth.Subject</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.security.auth.login.AppConfigurationEntry</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.security.auth.login.Configuration</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.security.auth.login.LoginContext</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.security.auth.login.LoginException</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.oozie.client.OozieClient</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.oozie.client.OozieClientException</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.oozie.client.WorkflowJob</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.cronutils.descriptor.CronDescriptor</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.cronutils.model.CronType</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.cronutils.model.definition.CronDefinition</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.cronutils.model.definition.CronDefinitionBuilder</span>;
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.cronutils.parser.CronParser</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">class</span> <span style="color: #bb0066; font-weight: bold;">OozieSubmitJob</span> {
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> String OOZIE_SERVER_URL = <span style="background-color: #fff0f0;">"http://oozieserverurl:11000/oozie"</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> String nameNode = <span style="background-color: #fff0f0;">"hdfs://namenodeserverip.com:8020"</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> String principal = <span style="background-color: #fff0f0;">"username@axaxax.COM"</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> String keytabFilename = <span style="background-color: #fff0f0;">"C:\\Users\\username\\Documents\\2021\\2021Q1\\learning\\username.keytab"</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> String krb5_conf = <span style="background-color: #fff0f0;">"C:\\ici_fw\\krb5.ini"</span>;
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> CronDefinition cronDefinition = CronDefinitionBuilder.<span style="color: #0000cc;">instanceDefinitionFor</span>(CronType.<span style="color: #0000cc;">UNIX</span>);
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> CronParser parser = <span style="color: #008800; font-weight: bold;">new</span> CronParser(cronDefinition);
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> CronDescriptor descriptor = CronDescriptor.<span style="color: #0000cc;">instance</span>(Locale.<span style="color: #0000cc;">US</span>);
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> ArrayList<JobDetails> listOfJobs = <span style="color: #008800; font-weight: bold;">new</span> ArrayList<JobDetails>();
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">void</span> <span style="color: #0066bb; font-weight: bold;">main</span>(String[] args) <span style="color: #008800; font-weight: bold;">throws</span> IOException, LoginException, IllegalArgumentException {
String propertiesFile = <span style="background-color: #fff0f0;">"C:\\Users\\npedamal\\Documents\\2021\\2021Q1\\learning\\job.properties"</span>;
File keytab = <span style="color: #008800; font-weight: bold;">new</span> File(keytabFilename);
LoginContext loginContext = <span style="color: #008800; font-weight: bold;">null</span>;
Properties jobProperties = <span style="color: #008800; font-weight: bold;">new</span> Properties();
jobProperties.<span style="color: #0000cc;">load</span>(<span style="color: #008800; font-weight: bold;">new</span> FileInputStream(propertiesFile));
Subject subject = <span style="color: #008800; font-weight: bold;">null</span>;
subject = <span style="color: #008800; font-weight: bold;">new</span> Subject();
loginContext = <span style="color: #008800; font-weight: bold;">new</span> LoginContext(<span style="background-color: #fff0f0;">""</span>, subject, <span style="color: #008800; font-weight: bold;">null</span>, KerberosConfiguration.<span style="color: #0000cc;">createClientConfig</span>(principal, keytab));
loginContext.<span style="color: #0000cc;">login</span>();
submitJobOozieClient(subject, jobProperties);
}
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">void</span> <span style="color: #0066bb; font-weight: bold;">submitJobOozieClient</span>( Subject subject, Properties jobProps) {
Subject.<span style="color: #0000cc;">doAs</span>(subject, <span style="color: #008800; font-weight: bold;">new</span> PrivilegedAction<Void>() {
<span style="color: #888888;">//@SuppressWarnings("static-access")</span>
<span style="color: #008800; font-weight: bold;">public</span> Void <span style="color: #0066bb; font-weight: bold;">run</span>() {
OozieClient wc = <span style="color: #008800; font-weight: bold;">new</span> OozieClient(OOZIE_SERVER_URL);
<span style="color: #008800; font-weight: bold;">try</span> {
String jobId = wc.<span style="color: #0000cc;">run</span>(jobProps);
System.<span style="color: #0000cc;">out</span>.<span style="color: #0000cc;">println</span>(<span style="background-color: #fff0f0;">"Workflow job submitted : "</span>+ jobId);
<span style="color: #008800; font-weight: bold;">while</span> (wc.<span style="color: #0000cc;">getJobInfo</span>(jobId).<span style="color: #0000cc;">getStatus</span>() == WorkflowJob.<span style="color: #0000cc;">Status</span>.<span style="color: #0000cc;">RUNNING</span>) {
System.<span style="color: #0000cc;">out</span>.<span style="color: #0000cc;">println</span>(<span style="background-color: #fff0f0;">"Workflow job running ..."</span>);
Thread.<span style="color: #0000cc;">sleep</span>(<span style="color: #0000dd; font-weight: bold;">10</span> * <span style="color: #0000dd; font-weight: bold;">1000</span>);
}
System.<span style="color: #0000cc;">out</span>.<span style="color: #0000cc;">println</span>(<span style="background-color: #fff0f0;">"Workflow job completed ..."</span>);
System.<span style="color: #0000cc;">out</span>.<span style="color: #0000cc;">println</span>(wc.<span style="color: #0000cc;">getJobInfo</span>(jobId));
} <span style="color: #008800; font-weight: bold;">catch</span> (OozieClientException | InterruptedException e) {
e.<span style="color: #0000cc;">printStackTrace</span>();
}
<span style="color: #008800; font-weight: bold;">return</span> <span style="color: #008800; font-weight: bold;">null</span>;
}
});
}
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">class</span> <span style="color: #bb0066; font-weight: bold;">KerberosConfiguration</span> <span style="color: #008800; font-weight: bold;">extends</span> Configuration {
<span style="color: #008800; font-weight: bold;">private</span> String principal;
<span style="color: #008800; font-weight: bold;">private</span> String keytab;
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #0066bb; font-weight: bold;">KerberosConfiguration</span>(String principal, File keytab) {
<span style="color: #008800; font-weight: bold;">this</span>.<span style="color: #0000cc;">principal</span> = principal;
<span style="color: #008800; font-weight: bold;">this</span>.<span style="color: #0000cc;">keytab</span> = keytab.<span style="color: #0000cc;">getAbsolutePath</span>();
}
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> KerberosConfiguration <span style="color: #0066bb; font-weight: bold;">createClientConfig</span>(String principal, File keytab) {
<span style="color: #008800; font-weight: bold;">return</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #0066bb; font-weight: bold;">KerberosConfiguration</span>(principal, keytab);
}
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> String <span style="color: #0066bb; font-weight: bold;">getKrb5LoginModuleName</span>() {
<span style="color: #008800; font-weight: bold;">return</span> System.<span style="color: #0000cc;">getProperty</span>(<span style="background-color: #fff0f0;">"java.vendor"</span>).<span style="color: #0000cc;">contains</span>(<span style="background-color: #fff0f0;">"IBM"</span>) ? <span style="background-color: #fff0f0;">"com.ibm.security.auth.module.Krb5LoginModule"</span>
: <span style="background-color: #fff0f0;">"com.sun.security.auth.module.Krb5LoginModule"</span>;
}
<span style="color: #008800; font-weight: bold;">public</span> AppConfigurationEntry[] <span style="color: #0066bb; font-weight: bold;">getAppConfigurationEntry</span>(String name) {
Map<String, String> options = <span style="color: #008800; font-weight: bold;">new</span> HashMap<String, String>();
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"keyTab"</span>, keytab);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"principal"</span>, principal);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"useKeyTab"</span>, <span style="background-color: #fff0f0;">"true"</span>);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"storeKey"</span>, <span style="background-color: #fff0f0;">"true"</span>);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"doNotPrompt"</span>, <span style="background-color: #fff0f0;">"true"</span>);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"refreshKrb5Config"</span>, <span style="background-color: #fff0f0;">"true"</span>);
options.<span style="color: #0000cc;">put</span>(<span style="background-color: #fff0f0;">"isInitiator"</span>, <span style="background-color: #fff0f0;">"true"</span>);
<span style="color: #008800; font-weight: bold;">return</span> <span style="color: #008800; font-weight: bold;">new</span> AppConfigurationEntry[] { <span style="color: #008800; font-weight: bold;">new</span> AppConfigurationEntry(getKrb5LoginModuleName(),
AppConfigurationEntry.<span style="color: #0000cc;">LoginModuleControlFlag</span>.<span style="color: #0000cc;">REQUIRED</span>, options) };
}
}
}</pre>terminatorhttp://www.blogger.com/profile/07460664943272440814noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-46693750796233457062020-11-09T06:54:00.003-08:002020-11-09T06:54:22.091-08:00Creating Thread pool running parallel list of queries parallelly - PySpark <p> </p><p><br /></p><p><br /></p><pre style="background-color: #f0f0f0; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><span style="color: #007020; font-weight: bold;">from</span> <span style="color: #0e84b5; font-weight: bold;">pyspark_llap.sql.session</span> <span style="color: #007020; font-weight: bold;">import</span> HiveWarehouseSession
<span style="color: #007020; font-weight: bold;">from</span> <span style="color: #0e84b5; font-weight: bold;">datetime</span> <span style="color: #007020; font-weight: bold;">import</span> datetime
<span style="color: #007020; font-weight: bold;">from</span> <span style="color: #0e84b5; font-weight: bold;">multiprocessing.dummy</span> <span style="color: #007020; font-weight: bold;">import</span> Pool <span style="color: #007020; font-weight: bold;">as</span> ThreadPool
<span style="color: #007020; font-weight: bold;">from</span> <span style="color: #0e84b5; font-weight: bold;">pyspark.sql.types</span> <span style="color: #007020; font-weight: bold;">import</span> StructField, StructType, StringType
queriesList<span style="color: #666666;">=</span>[<span style="color: #4070a0;">"select * from table_A"</span>,<span style="color: #4070a0;">"select * from table_B"</span>,<span style="color: #4070a0;">"select * from table_C"</span>,<span style="color: #4070a0;">"select * from table_D"</span>,
<span style="color: #4070a0;">"select * from table_E"</span>,<span style="color: #4070a0;">"select * from table_F"</span>,<span style="color: #4070a0;">"select * from table_G"</span>
<span style="color: #4070a0;">"select * from table_H"</span>,<span style="color: #4070a0;">"select * from table_I"</span>
<span style="color: #4070a0;">"select * from table_A"</span>,<span style="color: #4070a0;">"select * from table_J"</span>,<span style="color: #4070a0;">"select * from table_K"</span>,
<span style="color: #4070a0;">"select * from table_L"</span>,<span style="color: #4070a0;">"select * from table_M"</span>]
<span style="color: #007020; font-weight: bold;">def</span> <span style="color: #06287e;">extractAggValues</span>(query):
<span style="color: #007020; font-weight: bold;">global</span> spark,hive
output_df<span style="color: #666666;">=</span>hive<span style="color: #666666;">.</span>executeQuery(query)
output_df<span style="color: #666666;">=</span>output_df<span style="color: #666666;">.</span>repartition(<span style="color: #40a070;">20</span>)
<span style="color: #007020; font-weight: bold;">return</span> output_df<span style="color: #666666;">.</span>rdd<span style="color: #666666;">.</span>collect()
<span style="color: #007020; font-weight: bold;">def</span> <span style="color: #06287e;">runStatsParallel</span>(queriesList, threads<span style="color: #666666;">=</span><span style="color: #40a070;">2</span>):
pool <span style="color: #666666;">=</span> ThreadPool(threads)
rowsList <span style="color: #666666;">=</span> pool<span style="color: #666666;">.</span>map(extractAggValues, queriesList)
pool<span style="color: #666666;">.</span>close()
pool<span style="color: #666666;">.</span>join()
<span style="color: #007020; font-weight: bold;">return</span> rowsList
allmyRowsList <span style="color: #666666;">=</span> runStatsParallel(queriesList, <span style="color: #40a070;">4</span>)
flatlist <span style="color: #666666;">=</span> spark<span style="color: #666666;">.</span>sparkContext<span style="color: #666666;">.</span>parallelize(allmyRowsList)<span style="color: #666666;">.</span>flatMap(<span style="color: #007020; font-weight: bold;">lambda</span> x: x)<span style="color: #666666;">.</span>collect()
outDf <span style="color: #666666;">=</span> spark<span style="color: #666666;">.</span>createDataFrame(spark<span style="color: #666666;">.</span>sparkContext<span style="color: #666666;">.</span>parallelize(flatlist),dfSchema)
outDf<span style="color: #666666;">.</span>show()</pre><pre style="background-color: #f0f0f0; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><br /></pre><pre style="background-color: #f0f0f0; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><br /></pre><pre style="background-color: #f0f0f0; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><br /></pre>Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com2tag:blogger.com,1999:blog-3387700777530757377.post-40106737976730582282020-11-09T06:43:00.002-08:002020-11-09T06:44:06.965-08:00##CREATING DDL USING PYSPARK AND PYTHON <p> <span style="background-color: white; color: #008800; font-style: italic;">##CREATING DDL USING PYSPARK AND PYTHON </span></p><pre style="background-color: white; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;">dbName=str(<span style="color: blue;">'some-db-name'</span>)
inTableName=str(<span style="color: blue;">'some-table-name'</span>)
inTableColsQry=str(<span style="color: blue;">"select * from "</span>+ str(dbName + <span style="color: blue;">"."</span> + str(inTableName)) + <span style="color: blue;">" where 1=2 "</span>)
inTableCols_DF = hive.executeQuery(inTableColsQry)
in_tble_columns_list=inTableCols_DF.columns
in_tble_column_type_list=inTableCols_DF.dtypes
extTbldbName=str(<span style="color: blue;">'some-destination-db-name'</span>)
extTableName=str(inTableName+<span style="color: blue;">'_ext '</span>)
createString=str(<span style="color: blue;">'CREATE EXTERNAL TABLE IF NOT EXISTS '</span>)
storageFormat=<span style="color: blue;">' STORED AS PARQUET '</span>
defaultLocation=<span style="color: blue;">'hdfs://hdfs/filesystem/location/hdfsfilesystemlocation'</span>
allColumsForTable= <span style="color: blue;">', '</span>.join(map(<span style="color: navy; font-weight: bold;">lambda</span> colmn: <span style="color: blue;">'`'</span>+str(colmn[<span style="color: blue;">0</span>])+<span style="color: blue;">'`'</span> +<span style="color: blue;">' '</span>+ colmn[<span style="color: blue;">1</span>] , in_tble_column_type_list))
extTblCreateScript = createString + extTbldbName+<span style="color: blue;">'.'</span>+ extTableName + <span style="color: blue;">'('</span> + allColumsForTable + <span style="color: blue;">')'</span> + storageFormat + <span style="color: blue;">' LOCATION '</span> + str(<span style="color: blue;">"\'"</span>+defaultLocation+<span style="color: blue;">"\'"</span>) + <span style="color: blue;">' TBLPROPERTIES (\'external.table.purge\'=\'true\')'</span>
hive.setDatabase(extTbldbName)
hive.executeUpdate(extTblCreateScript)</pre><pre style="background-color: white; color: #333333; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><br /></pre><pre style="background-color: white; line-height: 16.25px; margin-bottom: 0px; margin-top: 0px;"><span style="color: #333333;">Make changes to your code according to the required DBMS , I have created Hive version DDL with this sample.</span></pre>Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-11183105222176001772019-12-17T08:16:00.001-08:002019-12-19T07:16:36.347-08:00DDL and Insert Script Generator using CSV file using Spark-Scala<div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="color: #333333; line-height: 16.25px;"><pre style="line-height: 16.25px;"><pre style="line-height: 16.25px;"><div style="background: rgb(255, 255, 255); float: left; line-height: 1;">
<span class="sc0">import java.util.Properties
import java.io.FileInputStream
import org.apache.spark.sql.{ DataFrame, SaveMode, SparkSession }
import org.apache.log4j.{ Level, Logger }
import org.apache.spark.sql.types.StructType
import java.util.Arrays
import java.util.stream.Collectors
import com.github.vertical_blank.sqlformatter.SqlFormatter
import scala.collection.Seq
object ReadingCsv {
def main(args: Array[String]): Unit = {
val filepath = "/Users/npedamal/Documents/Work/mywork/valid.properties";
val configs = new Properties()
configs.load(new FileInputStream(filepath))
System.setProperty("hadoop.home.dir", configs.getProperty("hadoop.home.dir"));
System.setProperty("spark.sql.warehouse.dir", configs.getProperty("spark.sql.warehouse.dir"));
val spark = SparkSession
.builder()
.appName("Reading_CSV_application")
.master("local")
.config("spark.sql.warehouse.dir", configs.getProperty("spark.sql.warehouse.dir"))
.getOrCreate()
val rootLogger = Logger.getRootLogger
rootLogger.setLevel(Level.ERROR)
val tableName = "sampleTableName";
val databaseName = "sample_database_name";
val fileLocation = "/Users/npedamal/Documents/2019Q4/100000 CC Records.csv"
val sourceFileType = "csv"
val dfLkpData = spark.read.format(sourceFileType).option("header", "true").option("inferSchema", "true").load(fileLocation)
val columnsList = dfLkpData.schema.map(x => x.name.toLowerCase().replaceAll("[^a-zA-Z0-9]+", "_"))
val countryLkpData = dfLkpData.toDF(columnsList: _*)
countryLkpData.show(10, false)
println(SqlFormatter.format(displayCreateScript(countryLkpData, tableName)))
countryLkpData.createOrReplaceTempView("myjaffatable")
val selQuery = createSelectStatement(spark, countryLkpData, databaseName, tableName)
println(SqlFormatter.format(selQuery))
spark.sql(selQuery).show(50, false)
spark.close()
}
def createSelectStatement(spark: SparkSession, dataFrame: DataFrame, databaseName: String, tableName: String): String = {
import spark.implicits._
import org.apache.spark.sql.functions.{ concat, lit }
val insertLiteral = "insert into table " + databaseName + "." + tableName + " values ( "
val columnsString = dataFrame.schema.map(x => "\"\\".concat("\"\"").concat(",").concat(x.name.toLowerCase().replaceAll("[^a-zA-Z0-9]+", "_")).concat(",").concat("\"\\").concat("\",\"")).mkString(", ")
val stringStmt = columnsString.toString()
val newStringStmt = stringStmt.toString.patch(stringStmt.toString.lastIndexOf(','), "", 1)
val selQuery = "select " + "concat(" + "\"" + insertLiteral + "\"" + newStringStmt.toString() + "\"" + ");" + "\"" + " ) as bisketcolumn" + " from myjaffatable"
selQuery
}
def displayCreateScript(dataFrame: DataFrame, tableName: String): String = {
val schema = dataFrame.schema.map(x => x.name.toLowerCase().replaceAll("[^a-zA-Z0-9]+", "_").concat(" ")
.concat(x.dataType.toString() match {
case "StringType" => "STRING"
case "IntegerType" => "INT"
case "LongType" => "BIGINT"
})).mkString(", ")
val hive_sql = "CREATE TABLE " + tableName + " (" + schema + " );"
hive_sql
}
def createInsertAutomatically(spark: SparkSession, dataFrame: DataFrame, databaseName: String, tableName: String): String = {
import spark.implicits._
import org.apache.spark.sql.functions.{ concat, lit }
val insertLiteral = "insert into table " + databaseName + "." + tableName + " values ( "
val string = dataFrame.schema.map(x => "$".concat("\"").concat(x.name.toLowerCase().replaceAll("[^a-zA-Z0-9]+", "_")).concat("\"")).mkString(", ")
val hardCodedString = ", FROM_UNIXTIME( UNIX_TIMESTAMP())"
val insertString = insertLiteral + string + hardCodedString + " );"
insertString
}
}</span></div>
</pre>
</pre>
</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-66460509620424503112019-11-06T18:40:00.001-08:002019-11-06T18:40:30.606-08:00Reading Fixedwith File using Spark <div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">You will be needing metafile which is a csv, consists of length of the column and field names that to apply on the data file </span></pre>
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">
</span></pre>
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">
</span></pre>
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">reading_fixedwidth</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.FileInputStream</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span>{ <span style="color: #bb0066; font-weight: bold;">Level</span>, <span style="color: #bb0066; font-weight: bold;">Logger</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.SparkSession</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.functions._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.types._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.types.</span>{ <span style="color: #bb0066; font-weight: bold;">StructType</span>, <span style="color: #bb0066; font-weight: bold;">StructField</span>, <span style="color: #bb0066; font-weight: bold;">StringType</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.types.StructType</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.types.StructField</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.types.StringType</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.Row</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.functions._</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">readingFixedWidthFiles</span> {
<span style="color: #008800; font-weight: bold;">def</span> splittinglines(pos<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">List</span>[<span style="color: #333399; font-weight: bold;">Int</span>], str<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">String</span>)<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Row</span> = {
<span style="color: #008800; font-weight: bold;">val</span> (rest, result) <span style="color: #008800; font-weight: bold;">=</span> pos.foldLeft((str, <span style="color: #bb0066; font-weight: bold;">List</span>[<span style="color: #333399; font-weight: bold;">String</span>]())) {
<span style="color: #008800; font-weight: bold;">case</span> ((s, res), curr) <span style="color: #008800; font-weight: bold;">=></span>
<span style="color: #008800; font-weight: bold;">if</span> (s.length() <= curr) {
<span style="color: #008800; font-weight: bold;">val</span> split <span style="color: #008800; font-weight: bold;">=</span> s.substring(<span style="color: #0000dd; font-weight: bold;">0</span>).trim()
<span style="color: #008800; font-weight: bold;">val</span> rest <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">""</span>
(rest, split :: res)
} <span style="color: #008800; font-weight: bold;">else</span> <span style="color: #008800; font-weight: bold;">if</span> (s.length() > curr) {
<span style="color: #008800; font-weight: bold;">val</span> split <span style="color: #008800; font-weight: bold;">=</span> s.substring(<span style="color: #0000dd; font-weight: bold;">0</span>, curr).trim()
<span style="color: #008800; font-weight: bold;">val</span> rest <span style="color: #008800; font-weight: bold;">=</span> s.substring(curr)
(rest, split :: res)
} <span style="color: #008800; font-weight: bold;">else</span> {
<span style="color: #008800; font-weight: bold;">val</span> split <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">""</span>
<span style="color: #008800; font-weight: bold;">val</span> rest <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">""</span>
(rest, split :: res)
}
}
<span style="color: #bb0066; font-weight: bold;">Row</span>.fromSeq(result.reverse)
}
<span style="color: #008800; font-weight: bold;">def</span> main(args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span>[<span style="color: #333399; font-weight: bold;">String</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Unit</span> = {
println(<span style="background-color: #fff0f0;">"hello"</span>)
<span style="color: #008800; font-weight: bold;">val</span> filepath <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"C:\\Users\\nfolder\\Documents\\na_folder\\work\\valid.properties"</span>
<span style="color: #008800; font-weight: bold;">val</span> configs <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Properties</span>()
configs.load(<span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">FileInputStream</span>(filepath))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>))
println(configs.getProperty(<span style="background-color: #fff0f0;">"spark.jdbc.partition.enabled"</span>))
<span style="color: #008800; font-weight: bold;">val</span> spark <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">SparkSession</span>
.builder()
.config(<span style="background-color: #fff0f0;">"spark.some.config.option"</span>, <span style="background-color: #fff0f0;">"some-value"</span>)
.appName(<span style="background-color: #fff0f0;">"TestJSon"</span>)
.master(<span style="background-color: #fff0f0;">"local[*]"</span>)
.getOrCreate()
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span>.getRootLogger
rootLogger.setLevel(<span style="color: #bb0066; font-weight: bold;">Level</span>.<span style="color: #bb0066; font-weight: bold;">ERROR</span>)
<span style="color: #888888;">//val path = "/Users/folder/Documents/folder/work/MEOI/tpefile.json"</span>
<span style="color: #888888;">//val data = spark.read.json(path)</span>
<span style="color: #888888;">//data.show()</span>
<span style="color: #008800; font-weight: bold;">val</span> rdd <span style="color: #008800; font-weight: bold;">=</span> spark.sparkContext.textFile(configs.getProperty(<span style="background-color: #fff0f0;">"app.source.file.location"</span>))
<span style="color: #008800; font-weight: bold;">val</span> metadata <span style="color: #008800; font-weight: bold;">=</span> spark.read.option(<span style="background-color: #fff0f0;">"header"</span>, <span style="background-color: #fff0f0;">"true"</span>).csv(configs.getProperty(<span style="background-color: #fff0f0;">"app.source.metafile.location"</span>))
<span style="color: #008800; font-weight: bold;">val</span> header <span style="color: #008800; font-weight: bold;">=</span> metadata.select(<span style="background-color: #fff0f0;">"col_name"</span>).rdd.map(x <span style="color: #008800; font-weight: bold;">=></span> x.getString(<span style="color: #0000dd; font-weight: bold;">0</span>).trim()).collect()
<span style="color: #008800; font-weight: bold;">val</span> sizeOfColumn <span style="color: #008800; font-weight: bold;">=</span> metadata.select(<span style="background-color: #fff0f0;">"size"</span>).rdd.map(x <span style="color: #008800; font-weight: bold;">=></span> x.getString(<span style="color: #0000dd; font-weight: bold;">0</span>).trim()).collect().map(<span style="color: #008800; font-weight: bold;">_</span>.toInt).toList
<span style="color: #008800; font-weight: bold;">val</span> fields <span style="color: #008800; font-weight: bold;">=</span> header.map(fieldName <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #bb0066; font-weight: bold;">StructField</span>(fieldName, <span style="color: #bb0066; font-weight: bold;">StringType</span>, nullable <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">true</span>))
<span style="color: #008800; font-weight: bold;">val</span> schema <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">StructType</span>(fields)
<span style="color: #008800; font-weight: bold;">val</span> df <span style="color: #008800; font-weight: bold;">=</span> spark.createDataFrame(rdd.map { x <span style="color: #008800; font-weight: bold;">=></span> splittinglines(sizeOfColumn, x) }, schema)
<span style="color: #888888;">// df.show(false)</span>
df.createOrReplaceTempView(<span style="background-color: #fff0f0;">"sourcedata"</span>)
<span style="color: #008800; font-weight: bold;">val</span> sourcedatadf <span style="color: #008800; font-weight: bold;">=</span> spark.sql(<span style="background-color: #fff0f0;">"select distinct * from sourcedata"</span>)
<span style="color: #888888;">//wh</span>
sourcedatadf.show(<span style="color: #008800; font-weight: bold;">false</span>)
println(sourcedatadf.count())
}
} </pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-44871560808508769912019-10-04T20:20:00.002-07:002019-10-04T20:20:06.822-07:00Reading Excel Files into Spark Dataframe for Comparision<div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.FileInputStream</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.SparkSession</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.Logger</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.Level</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Calendar</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.text.SimpleDateFormat</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">ExcelJoinsComparision</span> {
<span style="color: #008800; font-weight: bold;">def</span> main(args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span>[<span style="color: #333399; font-weight: bold;">String</span>]) {
<span style="color: #008800; font-weight: bold;">val</span> filepath <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"/Users/navee/eclipse-workspace/valid.properties"</span>;
<span style="color: #008800; font-weight: bold;">val</span> configs <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Properties</span>()
configs.load(<span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">FileInputStream</span>(filepath))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>));
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>));
<span style="color: #008800; font-weight: bold;">val</span> spark <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">SparkSession</span>
.builder()
.appName(<span style="background-color: #fff0f0;">"Excel sheets validation"</span>)
.master(<span style="background-color: #fff0f0;">"local"</span>)
.getOrCreate()
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span>.getRootLogger
rootLogger.setLevel(<span style="color: #bb0066; font-weight: bold;">Level</span>.<span style="color: #bb0066; font-weight: bold;">ERROR</span>)
<span style="color: #008800; font-weight: bold;">val</span> bigplaydataloc <span style="color: #008800; font-weight: bold;">=</span> configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.bigplaydataloc"</span>)
<span style="color: #008800; font-weight: bold;">val</span> mssqldataloc <span style="color: #008800; font-weight: bold;">=</span> configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.mssqldatadoc"</span>)
<span style="color: #008800; font-weight: bold;">val</span> keyColsStr <span style="color: #008800; font-weight: bold;">=</span> configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.keycols"</span>)
<span style="color: #008800; font-weight: bold;">val</span> bigplaySheet <span style="color: #008800; font-weight: bold;">=</span> configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.bigplaysheet"</span>)
<span style="color: #008800; font-weight: bold;">val</span> mssqlSheet <span style="color: #008800; font-weight: bold;">=</span> configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.mssqlsheet"</span>)
println(bigplaydataloc + <span style="background-color: #fff0f0;">" ,"</span> +mssqldataloc+<span style="background-color: #fff0f0;">" ,"</span> +keyColsStr+<span style="background-color: #fff0f0;">" ,"</span> +bigplaySheet+<span style="background-color: #fff0f0;">" ,"</span> +mssqlSheet)
<span style="color: #008800; font-weight: bold;">val</span> bigplayData <span style="color: #008800; font-weight: bold;">=</span> spark.read.format(<span style="background-color: #fff0f0;">"com.crealytics.spark.excel"</span>)
.option(<span style="background-color: #fff0f0;">"sheetName"</span>, bigplaySheet)
.option(<span style="background-color: #fff0f0;">"useHeader"</span>, <span style="background-color: #fff0f0;">"true"</span>)
.option(<span style="background-color: #fff0f0;">"treatEmptyValuesAsNulls"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.option(<span style="background-color: #fff0f0;">"inferSchema"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.option(<span style="background-color: #fff0f0;">"location"</span>, bigplaydataloc)
.option(<span style="background-color: #fff0f0;">"addColorColumns"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.load(bigplaydataloc)
<span style="color: #008800; font-weight: bold;">val</span> mssqlData <span style="color: #008800; font-weight: bold;">=</span> spark.read.format(<span style="background-color: #fff0f0;">"com.crealytics.spark.excel"</span>)
.option(<span style="background-color: #fff0f0;">"sheetName"</span>, bigplaySheet)
.option(<span style="background-color: #fff0f0;">"useHeader"</span>, <span style="background-color: #fff0f0;">"true"</span>)
.option(<span style="background-color: #fff0f0;">"treatEmptyValuesAsNulls"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.option(<span style="background-color: #fff0f0;">"inferSchema"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.option(<span style="background-color: #fff0f0;">"location"</span>, bigplaydataloc)
.option(<span style="background-color: #fff0f0;">"addColorColumns"</span>, <span style="background-color: #fff0f0;">"false"</span>)
.load(mssqldataloc)
<span style="color: #008800; font-weight: bold;">val</span> keyColumnsList <span style="color: #008800; font-weight: bold;">=</span> keyColsStr.split(<span style="background-color: #fff0f0;">","</span>).map(x <span style="color: #008800; font-weight: bold;">=></span> x.toLowerCase()).toList
bigplayData.createOrReplaceTempView(<span style="background-color: #fff0f0;">"bigplaydata"</span>)
mssqlData.createOrReplaceTempView(<span style="background-color: #fff0f0;">"mssqldata"</span>)
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">spark.implicits._</span>
<span style="color: #008800; font-weight: bold;">val</span> commonColums <span style="color: #008800; font-weight: bold;">=</span> ((mssqlData.columns.map(x <span style="color: #008800; font-weight: bold;">=></span> x.toLowerCase()).toList.toDF())
.intersect(bigplayData.columns.map(x <span style="color: #008800; font-weight: bold;">=></span> x.toLowerCase()).toList.toDF())).sort(<span style="background-color: #fff0f0;">"value"</span>).rdd.collect().map(r <span style="color: #008800; font-weight: bold;">=></span> r(<span style="color: #0000dd; font-weight: bold;">0</span>)).toList
<span style="color: #008800; font-weight: bold;">val</span> filters <span style="color: #008800; font-weight: bold;">=</span> filterStatement(keyColumnsList)
<span style="color: #008800; font-weight: bold;">val</span> bpresult <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
bpresult.append(<span style="background-color: #fff0f0;">"concat("</span>)
keyColumnsList.foreach(col <span style="color: #008800; font-weight: bold;">=></span> bpresult.append(<span style="background-color: #fff0f0;">"bp."</span> + col.toString().trim() + <span style="background-color: #fff0f0;">","</span>))
bpresult.append(<span style="background-color: #fff0f0;">")"</span>)
<span style="color: #008800; font-weight: bold;">val</span> msresult <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
msresult.append(<span style="background-color: #fff0f0;">"concat("</span>)
keyColumnsList.foreach(col <span style="color: #008800; font-weight: bold;">=></span> msresult.append(<span style="background-color: #fff0f0;">"ms."</span> + col.toString().trim() + <span style="background-color: #fff0f0;">","</span>))
msresult.append(<span style="background-color: #fff0f0;">")"</span>)
<span style="color: #008800; font-weight: bold;">val</span> concatcols <span style="color: #008800; font-weight: bold;">=</span> bpresult.toString.patch(bpresult.toArray.lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>) + <span style="background-color: #fff0f0;">" as bpkeys"</span>
<span style="color: #008800; font-weight: bold;">val</span> selcasewhenStatement <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"select distinct "</span> + concatcols + <span style="background-color: #fff0f0;">", \n"</span> + prepareCaseWhen(commonColums) + <span style="background-color: #fff0f0;">" from bigplaydata AS bp INNER JOIN mssqldata as ms ON ("</span> + filters + <span style="background-color: #fff0f0;">" ) order by bpkeys"</span>
println(selcasewhenStatement)
<span style="color: #008800; font-weight: bold;">val</span> outputData <span style="color: #008800; font-weight: bold;">=</span> spark.sql(selcasewhenStatement)
outputData.write.format(<span style="background-color: #fff0f0;">"com.crealytics.spark.excel"</span>)
.option(<span style="background-color: #fff0f0;">"sheetName"</span>, <span style="background-color: #fff0f0;">"bp_to_ms_validtn_results"</span>)
.option(<span style="background-color: #fff0f0;">"useHeader"</span>, <span style="background-color: #fff0f0;">"true"</span>)
.option(<span style="background-color: #fff0f0;">"dateFormat"</span>, <span style="background-color: #fff0f0;">"yy-mmm-dd"</span>)
.option(<span style="background-color: #fff0f0;">"timestampFormat"</span>, <span style="background-color: #fff0f0;">"mm-dd-yyyy hh:mm:ss"</span>)
.mode(<span style="background-color: #fff0f0;">"overwrite"</span>)
.save(configs.getProperty(<span style="background-color: #fff0f0;">"org.axa.outputfile"</span>))
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.functions._</span>
<span style="color: #008800; font-weight: bold;">val</span> calcAggQuery <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"select "</span> + bpresult.toString.patch(bpresult.toString.lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>) + <span style="background-color: #fff0f0;">" AS bpkeys, \n"</span> + prepareSum(commonColums) + <span style="background-color: #fff0f0;">" From bigplaydata as bp INNER JOIN mssqldata AS ms ON ("</span> + filters + <span style="background-color: #fff0f0;">") ORDER BY bpkeys"</span>
<span style="color: #008800; font-weight: bold;">val</span> currentTimeNow <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Calendar</span>.getInstance.getTime
<span style="color: #008800; font-weight: bold;">val</span> formatter <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SimpleDateFormat</span>(<span style="background-color: #fff0f0;">"yyyy-MM-dd'T'hh:mm:ss"</span>)
<span style="color: #008800; font-weight: bold;">val</span> aggData <span style="color: #008800; font-weight: bold;">=</span> spark.sql(calcAggQuery)
aggData.groupBy().sum().withColumn(<span style="background-color: #fff0f0;">"created_date"</span>, lit(formatter.format(currentTimeNow))).show(<span style="color: #008800; font-weight: bold;">false</span>)
spark.close()
<span style="color: #bb0066; font-weight: bold;">System</span>.exit(<span style="color: #0000dd; font-weight: bold;">0</span>)
}
<span style="color: #008800; font-weight: bold;">def</span> filterStatement(keyColumnsList<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">List</span>[<span style="color: #333399; font-weight: bold;">Any</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">String</span> = {
<span style="color: #008800; font-weight: bold;">val</span> bpresult <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
bpresult.append(<span style="background-color: #fff0f0;">"concat("</span>)
keyColumnsList.foreach(col <span style="color: #008800; font-weight: bold;">=></span> bpresult.append(<span style="background-color: #fff0f0;">"bp."</span> + col.toString().trim() + <span style="background-color: #fff0f0;">","</span>))
bpresult.append(<span style="background-color: #fff0f0;">")"</span>)
<span style="color: #008800; font-weight: bold;">val</span> msresult <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
msresult.append(<span style="background-color: #fff0f0;">"concat("</span>)
keyColumnsList.foreach(col <span style="color: #008800; font-weight: bold;">=></span> msresult.append(<span style="background-color: #fff0f0;">"ms."</span> + col.toString().trim() + <span style="background-color: #fff0f0;">","</span>))
msresult.append(<span style="background-color: #fff0f0;">")"</span>)
<span style="color: #008800; font-weight: bold;">return</span> bpresult.toString.patch(bpresult.toString.lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>) + <span style="background-color: #fff0f0;">"="</span> + msresult.toString.patch(msresult.toString.lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>)
}
<span style="color: #008800; font-weight: bold;">def</span> prepareCaseWhen(commonColList<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">List</span>[<span style="color: #333399; font-weight: bold;">Any</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">String</span> = {
<span style="color: #008800; font-weight: bold;">val</span> result <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
commonColList.foreach(col <span style="color: #008800; font-weight: bold;">=></span> result.append(<span style="background-color: #fff0f0;">" (case when bp."</span> + col + <span style="background-color: #fff0f0;">" = "</span> + <span style="background-color: #fff0f0;">" ms."</span> + col + <span style="background-color: #fff0f0;">" then \"equal\" \nwhen (bp."</span> + col + <span style="background-color: #fff0f0;">"-"</span> + <span style="background-color: #fff0f0;">"ms."</span> + col + <span style="background-color: #fff0f0;">") between -1 and 1 then \"*equal*\" \n else CONCAT("</span> + <span style="background-color: #fff0f0;">"bp."</span> + col + <span style="background-color: #fff0f0;">","</span> + <span style="background-color: #fff0f0;">" \"-\", ms."</span> + col + <span style="background-color: #fff0f0;">","</span> + <span style="background-color: #fff0f0;">" \"(\""</span> + <span style="background-color: #fff0f0;">", bp."</span> + col + <span style="background-color: #fff0f0;">"-"</span> + <span style="background-color: #fff0f0;">"ms."</span> + col + <span style="background-color: #fff0f0;">", \")\""</span> + <span style="background-color: #fff0f0;">") END ) AS "</span> + col + <span style="background-color: #fff0f0;">", \n"</span>))
<span style="color: #008800; font-weight: bold;">return</span> result.toString.patch(result.toString().lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>)
}
<span style="color: #008800; font-weight: bold;">def</span> prepareSum(commonColList<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">List</span>[<span style="color: #333399; font-weight: bold;">Any</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">String</span> = {
<span style="color: #008800; font-weight: bold;">val</span> result <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StringBuilder</span>
commonColList.foreach(col <span style="color: #008800; font-weight: bold;">=></span>
result.append(<span style="background-color: #fff0f0;">" (ms."</span> + col + <span style="background-color: #fff0f0;">"-"</span> + <span style="background-color: #fff0f0;">"bp."</span> + col + <span style="background-color: #fff0f0;">") AS "</span> + col + <span style="background-color: #fff0f0;">", \n"</span>))
<span style="color: #008800; font-weight: bold;">return</span> result.toString.patch(result.toString().lastIndexOf(<span style="color: #0044dd;">','</span>), <span style="background-color: #fff0f0;">""</span>, <span style="color: #0000dd; font-weight: bold;">1</span>)
}
}</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-37422398878431041322019-07-31T08:53:00.001-07:002019-07-31T08:53:34.309-07:00DataMasking on Spark Dataframe - using UDF<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
I have applied DataMasking on the SparkDataframe using UDF's.<br />
<br />
<br />
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #007700;">import</span> <span style="color: #007700;">java</span><span style="color: #bb0066; font-weight: bold;">.util.Properties</span>
<span style="color: #007700;">import</span> <span style="color: #007700;">java</span><span style="color: #bb0066; font-weight: bold;">.io.FileInputStream</span>
<span style="color: #007700;">import</span> <span style="color: #007700;">org</span><span style="color: #bb0066; font-weight: bold;">.apache.spark.sql</span>.{ DataFrame, SaveMode, SparkSession }
<span style="color: #007700;">import</span> <span style="color: #007700;">org</span><span style="color: #bb0066; font-weight: bold;">.apache.log4j</span>.{ Level, Logger }
<span style="color: #007700;">import</span> <span style="color: #007700;">java</span><span style="color: #bb0066; font-weight: bold;">.security.MessageDigest</span>
<span style="color: #007700;">import</span> <span style="color: #007700;">java</span><span style="color: #bb0066; font-weight: bold;">.util.Base64</span>
<span style="color: #007700;">object</span> <span style="color: #007700;">DataMaskingDataFrame</span> {
def getMD5(string: String): String = MessageDigest.getInstance(<span style="background-color: #fff0f0;">"MD5"</span>).digest(string.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>)).map(<span style="background-color: #fff0f0;">"%02X"</span>.format(_)).mkString
def encodeToBase64String(bytes: Array[Byte]): String = Base64.getEncoder.encodeToString(bytes)
def main(args: Array[String]): Unit = <span style="background-color: #ffaaaa; color: red;">{</span>
val filepath = <span style="background-color: #fff0f0;">"/Users/pedam/eclipse-workspace/valid.properties"</span>;
val configs = new Properties()
configs.load(new FileInputStream(filepath))
System.setProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>));
System.setProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>));
val spark = SparkSession
.builder()
.appName(<span style="background-color: #fff0f0;">"Udf_application"</span>)
.master(<span style="background-color: #fff0f0;">"local"</span>)
.getOrCreate()
val rootLogger = Logger.getRootLogger
rootLogger.setLevel(Level.ERROR)
val file = <span style="background-color: #fff0f0;">"D:\\Work\\source_data\\sample_ssn_data.csv"</span>
val source_df = spark.read.option(<span style="background-color: #fff0f0;">"header"</span>, <span style="background-color: #fff0f0;">"true"</span>).
option(<span style="background-color: #fff0f0;">"inferSchema"</span>, <span style="background-color: #fff0f0;">"true"</span>).csv(file)
source_df.<span style="color: #008800; font-weight: bold;">show</span>(<span style="color: #6600ee; font-weight: bold;">5</span>,false)
// for(columnname <- source_df.columns)print(columnname+<span style="background-color: #fff0f0;">", "</span>)
// println
source_df.createOrReplaceTempView(<span style="background-color: #fff0f0;">"sample_ssn_data"</span>)
val dm_with_clsr_one =(inputString:String) => getMD5(inputString)
val dm_with_clsr_two =(inputString:String) => encodeToBase64String(inputString.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>))
spark.udf.register(<span style="background-color: #fff0f0;">"DATA_MASK_ONE"</span>, dm_with_clsr_one)
spark.udf.register(<span style="background-color: #fff0f0;">"DATA_MASK_TWO"</span>, dm_with_clsr_one)
spark.sql(<span style="background-color: #fff0f0;">"select id,DATA_MASK_ONE(id), gender, birthdate, maiden_name, lname, fname, address, city, state, zip, cc_number, DATA_MASK_TWO(cc_number), cc_cvc, cc_expiredate from sample_ssn_data"</span>).<span style="color: #008800; font-weight: bold;">show</span>(<span style="color: #6600ee; font-weight: bold;">5</span>,false)
spark.close()
}
<span style="background-color: #ffaaaa; color: red;">}</span></pre>
<pre style="color: #333333; line-height: 16.25px;"><span style="background-color: #ffaaaa; color: red;">
</span></pre>
<pre style="color: #333333; line-height: 16.25px;">Sample Output:</pre>
<pre style="line-height: 16.25px;"><pre style="color: #333333; line-height: 16.25px;">
</pre>
<pre style="color: #333333; line-height: 16.25px;">input csv data:</pre>
<pre style="line-height: 16.25px;"><span style="color: #333333;">+-----------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+------------+------------------+-------+-------------------+------+-------------+
|id |gender|birthdate |maiden_name|lname |fname |address |city |state|zip |phone |email |cc_type|cc_number |cc_cvc|cc_expiredate|
+-----------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+------------+------------------+-------+-------------------+------+-------------+
|172-32-1176|m |1958/04/21|Smith |White |Johnson |10932 Bigge Rd |Menlo Park |CA |94025|408 496-7223|jwhite@domain.com |m |5270 4267 6450 5516|123 |2010/06/25 |
|514-14-8905|f |1944/12/22|Amaker |Borden|Ashley |4469 Sherman Street |Goff |KS |66428|785-939-6046|aborden@domain.com|m |5370 4638 8881 3020|713 |2011/02/01 |
|213-46-8915|f |1958/04/21|Pinson |Green |Marjorie|309 63rd St. #411 |Oakland |CA |94618|415 986-7020|mgreen@domain.com |v |4916 9766 5240 6147|258 |2009/02/25 |
|524-02-7657|m |1962/03/25|Hall |Munsch|Jerome |2183 Roy Alley |Centennial |CO |80112|303-901-6123|jmunsch@domain.com|m |5180 3807 3679 8221|612 |2010/03/01 |
|489-36-8350|m |1964/09/06|Porter |Aragon|Robert |3181 White Oak Drive|Kansas City|MO |66215|816-645-6936|raragon@domain.com|v |4929 3813 3266 4295|911 |2011/12/01 |
+-----------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+------------+------------------+-------+-------------------+------+-------------+
only showing top 5 rows
</span></pre>
<pre style="line-height: 16.25px;"><span style="color: #333333;">After applying Data masking on two columns with two different criteria </span></pre>
<pre style="line-height: 16.25px;"><span style="color: #333333;">
+-----------+--------------------------------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+-------------------+--------------------------------+------+-------------+
|id |UDF:DATA_MASK_ONE(id) |gender|birthdate |maiden_name|lname |fname |address |city |state|zip |cc_number |UDF:DATA_MASK_TWO(cc_number) |cc_cvc|cc_expiredate|
+-----------+--------------------------------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+-------------------+--------------------------------+------+-------------+
|172-32-1176|4DDA8A5D35947B12B948EFF6EF14579A|m |1958/04/21|Smith |White |Johnson |10932 Bigge Rd |Menlo Park |CA |94025|5270 4267 6450 5516|4F88DDF6489891710B9C5A5D8412129E|123 |2010/06/25 |
|514-14-8905|1404970BFBB5B496EDE9C0BD7AD18CCF|f |1944/12/22|Amaker |Borden|Ashley |4469 Sherman Street |Goff |KS |66428|5370 4638 8881 3020|771ACE4DE4BE795449D7357F84996890|713 |2011/02/01 |
|213-46-8915|1510403E23A69CF50BF1C28E47DCD1E0|f |1958/04/21|Pinson |Green |Marjorie|309 63rd St. #411 |Oakland |CA |94618|4916 9766 5240 6147|A2FAAE57E13DD293049201AD50EA8E64|258 |2009/02/25 |
|524-02-7657|379A83190FE989FE68FCBFA1379836D7|m |1962/03/25|Hall |Munsch|Jerome |2183 Roy Alley |Centennial |CO |80112|5180 3807 3679 8221|A1AC21427F36A1B5DF23AB67071DC674|612 |2010/03/01 |
|489-36-8350|014727AB8058ABCFC4D7B7810D593329|m |1964/09/06|Porter |Aragon|Robert |3181 White Oak Drive|Kansas City|MO |66215|4929 3813 3266 4295|67389F909BC746133BB0E880D65EF640|911 |2011/12/01 |
+-----------+--------------------------------+------+----------+-----------+------+--------+--------------------+-----------+-----+-----+-------------------+--------------------------------+------+-------------+
only showing top 5 rows</span></pre>
<pre style="color: #333333; line-height: 16.25px;">
</pre>
</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-33179088725047571832019-07-31T06:02:00.000-07:002019-07-31T17:48:36.497-07:00Encryption and Decryption of the messages in Scala<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
<br />
Here are the different possible Encryption and Decryption techniques we usually come across while masking Data during transforming, depending up on the requirement.<br />
<br />
Base64 - Encrytion/Decryption, used with closures.<br />
AES - Encryption/Decryption with a secretKeytext --Scala functions<br />
<br />
<pre style="color: #333333; line-height: 16.25px;">import java.security.spec.<span style="color: #996633;">KeySpec</span>
import javax.crypto.spec.<span style="color: #996633;">DESedeKeySpec</span>
import javax.crypto.spec.<span style="color: #996633;">SecretKeySpec</span>
import java.security.<span style="color: #996633;">MessageDigest</span>
import java.util.<span style="color: #996633;">Base64</span>
import javax.crypto.<span style="color: #996633;">Mac</span>
import javax.crypto.spec.<span style="color: #996633;">SecretKeySpec</span>
import java.nio.charset.<span style="color: #996633;">StandardCharsets</span>
import javax.crypto.<span style="color: #996633;">Cipher</span>
import javax.crypto.spec.<span style="color: #996633;">IvParameterSpec</span>
import javax.crypto.<span style="color: #996633;">KeyGenerator</span>
import javax.crypto.<span style="color: #996633;">SecretKey</span>
import java.util.<span style="color: #996633;">Arrays</span>
object <span style="color: #996633;">DataMaskingUDF</span> {
val ivspec = new <span style="color: #996633;">IvParameterSpec</span>(<span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>](<span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">1</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">2</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">3</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">4</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">5</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">6</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">7</span>, <span style="color: #0000dd; font-weight: bold;">0</span>, <span style="color: #0000dd; font-weight: bold;">8</span>))
def main(<span style="color: #0e84b5; font-weight: bold;">args</span>: <span style="color: #996633;">Array</span>[<span style="color: #996633;">String</span>]): <span style="color: #996633;">Unit</span> = {
val message = <span style="background-color: #fff0f0;">"this is first class message"</span>
val algorithmPad = <span style="background-color: #fff0f0;">"AES/CBC/PKCS5Padding"</span>
val algorithm = <span style="background-color: #fff0f0;">"AES"</span>
val secretWord = <span style="background-color: #fff0f0;">"Bar12345Bar12345"</span>
println(<span style="background-color: #fff0f0;">"Actual String is: "</span> + message)
println(<span style="background-color: #fff0f0;">"MD5 of the String is: "</span> + getMD5(message))
println(<span style="background-color: #fff0f0;">"Encoding of input to Base64 is: "</span> + encodeToBase64String(message.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>)))
println(<span style="background-color: #fff0f0;">"Decoding of input to Base64 String is: "</span> + decodeBase64ToString(encodeToBase64String(message.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>))))
println(<span style="background-color: #fff0f0;">"Encrypting using SHA-256 algorithm: "</span>+ encryptToHmacSHA256(secretWord, message).map(<span style="background-color: #fff0f0;">"%02X"</span>.format(_)).mkString)
println(algorithm + <span style="background-color: #fff0f0;">" encryption with "</span> + secretWord + <span style="background-color: #fff0f0;">" as secret key : "</span> + arrayBytesToString(encrypt(message, secretWord, algorithm)))
println(algorithm + <span style="background-color: #fff0f0;">" decryption with "</span> + secretWord + <span style="background-color: #fff0f0;">" as secret key : "</span> + decrypt(encrypt(message, secretWord, algorithm), secretWord, algorithm))
}
def getMD5(<span style="color: #0e84b5; font-weight: bold;">string</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">String</span> = <span style="color: #996633;">MessageDigest</span>.getInstance(<span style="background-color: #fff0f0;">"MD5"</span>).digest(string.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>)).map(<span style="background-color: #fff0f0;">"%02X"</span>.format(_)).mkString
def encodeToBase64String(<span style="color: #0e84b5; font-weight: bold;">bytes</span>: <span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>]): <span style="color: #996633;">String</span> = <span style="color: #996633;">Base64</span>.getEncoder.encodeToString(bytes)
def decodeBase64ToString(<span style="color: #0e84b5; font-weight: bold;">inputStr</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">String</span> = new <span style="color: #996633;">String</span>(<span style="color: #996633;">Base64</span>.getDecoder.decode(inputStr.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>)), <span style="color: #996633;">StandardCharsets</span>.<span style="color: #996633;">UTF_8</span>)
def encrypt(<span style="color: #0e84b5; font-weight: bold;">inputText</span>: <span style="color: #996633;">String</span>, <span style="color: #0e84b5; font-weight: bold;">secretStr</span>: <span style="color: #996633;">String</span>, <span style="color: #0e84b5; font-weight: bold;">algorithm</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>] = {
val secKeySpec = getSecretKey(secretStr, algorithm)
val cipher = <span style="color: #996633;">Cipher</span>.getInstance(algorithm)
cipher.init(<span style="color: #996633;">Cipher</span>.<span style="color: #996633;">ENCRYPT_MODE</span>, secKeySpec)
cipher.doFinal(inputText.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>))
}
def decrypt(<span style="color: #0e84b5; font-weight: bold;">inputByteArray</span>: <span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>], <span style="color: #0e84b5; font-weight: bold;">secretStr</span>: <span style="color: #996633;">String</span>, <span style="color: #0e84b5; font-weight: bold;">algorithm</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">String</span> = {
val secKeyspec = getSecretKey(secretStr, algorithm)
val cipher = <span style="color: #996633;">Cipher</span>.getInstance(algorithm)
cipher.init(<span style="color: #996633;">Cipher</span>.<span style="color: #996633;">DECRYPT_MODE</span>, secKeyspec)
val newStr = new <span style="color: #996633;">String</span>(cipher.doFinal(inputByteArray))
newStr
}
def arrayBytesToString(<span style="color: #0e84b5; font-weight: bold;">input</span>: <span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>]): <span style="color: #996633;">String</span> = {
input.map(<span style="background-color: #fff0f0;">"%02X"</span>.format(_)).mkString
}
def getSecretKey(<span style="color: #0e84b5; font-weight: bold;">inputStr</span>: <span style="color: #996633;">String</span>, <span style="color: #0e84b5; font-weight: bold;">algorithm</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">SecretKeySpec</span> = {
val temp = <span style="color: #996633;">Arrays</span>.copyOf(inputStr.getBytes, <span style="color: #0000dd; font-weight: bold;">16</span>)
val secretKey = new <span style="color: #996633;">SecretKeySpec</span>(temp, algorithm)
secretKey
}
//<span style="color: #996633;">SHA</span> <span style="color: #996633;">Secure</span> <span style="color: #996633;">Hash</span> <span style="color: #996633;">Algorithm</span> is <span style="color: black; font-weight: bold;">not</span> a cipher its a <span style="color: #996633;">Digest</span> we cannot reverse it.
def encryptToHmacSHA256(<span style="color: #0e84b5; font-weight: bold;">secret</span>: <span style="color: #996633;">String</span>, <span style="color: #0e84b5; font-weight: bold;">content</span>: <span style="color: #996633;">String</span>): <span style="color: #996633;">Array</span>[<span style="color: #996633;">Byte</span>] = {
val secretKey = new <span style="color: #996633;">SecretKeySpec</span>(secret.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>), <span style="background-color: #fff0f0;">"HmacSHA256"</span>)
val mac = <span style="color: #996633;">Mac</span>.getInstance(<span style="background-color: #fff0f0;">"HmacSHA256"</span>)
mac.init(secretKey)
val finalBytes = mac.doFinal(content.getBytes(<span style="background-color: #fff0f0;">"UTF-8"</span>))
finalBytes
}
}</pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;">Sample Output:</pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;"><pre style="line-height: 16.25px;"><span style="color: #007700;">Actual</span> <span style="color: #007700;">String</span> <span style="color: #007700;">is</span>: <span style="color: #007700;">this</span> <span style="color: #007700;">is</span> <span style="color: #007700;">first</span> <span style="color: #007700;">class</span> <span style="color: #007700;">message</span>
<span style="color: #007700;">MD5</span> <span style="color: #007700;">of</span> <span style="color: #007700;">the</span> <span style="color: #007700;">String</span> <span style="color: #007700;">is</span>: <span style="color: #007700;">1C9F1E2EB0D42E2DE1F04F6DA4F7814E</span>
<span style="color: #007700;">Encoding</span> <span style="color: #007700;">of</span> <span style="color: #007700;">input</span> <span style="color: #007700;">to</span> <span style="color: #007700;">Base64</span> <span style="color: #007700;">is</span>: <span style="color: #007700;">dGhpcyBpcyBmaXJzdCBjbGFzcyBtZXNzYWdl</span>
<span style="color: #007700;">Decoding</span> <span style="color: #007700;">of</span> <span style="color: #007700;">input</span> <span style="color: #007700;">to</span> <span style="color: #007700;">Base64</span> <span style="color: #007700;">String</span> <span style="color: #007700;">is</span>: <span style="color: #007700;">this</span> <span style="color: #007700;">is</span> <span style="color: #007700;">first</span> <span style="color: #007700;">class</span> <span style="color: #007700;">message</span>
<span style="color: #007700;">Encrypting</span> <span style="color: #007700;">using</span> <span style="color: #007700;">SHA-256</span> <span style="color: #007700;">algorithm</span>: <span style="color: #007700;">ED332B5BEEC5880220BDA6A0DCC4D75864D471F531057EEAB90B6ACF7A4DC335</span>
<span style="color: #007700;">AES</span> <span style="color: #007700;">encryption</span> <span style="color: #007700;">with</span> <span style="color: #007700;">Bar12345Bar12345</span> <span style="color: #007700;">as</span> <span style="color: #007700;">secret</span> <span style="color: #007700;">key</span> : <span style="color: #007700;">72FC93F78CFBBA81FAAACB01AD00C944C5A39C76BAF42E2423A99EDC224D65C1</span>
<span style="color: #007700;">AES</span> <span style="color: #007700;">decryption</span> <span style="color: #007700;">with</span> <span style="color: #007700;">Bar12345Bar12345</span> <span style="color: #007700;">as</span> <span style="color: #007700;">secret</span> <span style="color: #007700;">key</span> : <span style="color: #007700;">this</span> <span style="color: #007700;">is</span> <span style="color: #007700;">first</span> <span style="color: #007700;">class</span> <span style="color: #007700;">message</span></pre>
</pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
<pre style="color: #333333; line-height: 16.25px;"></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-50560338004763687792019-03-09T20:49:00.000-08:002019-07-26T16:29:39.537-07:00Spark Memory Calculator from available cluster<div dir="ltr" style="text-align: left;" trbidi="on">
Suppose I have a Cluster configuration as below:<br />
<br />
clusterMember|RAM|noofCores<br />
c01|64|16<br />
c02|64|16<br />
c03|64|16<br />
c04|64|16<br />
c05|64|16<br />
c06|64|16<br />
c07|64|16<br />
<br />
The above is pipe seperated file. If you are wanted to calculate Spark Memory Calculations.<br />
Like Executor Memory after excluding all the no.of cores to application master, yarn, . etc., the below code will suggest user the maximum executor memory.<br />
<br />
<pre style="color: #333333; line-height: 16.25px;"><pre style="line-height: 16.25px;">package reusable_utils;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
<span style="color: #003388; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">class</span> <span style="color: #bb0066; font-weight: bold;">SparkMemoryCalculator</span> {
<span style="color: #003388; font-weight: bold;">public</span> static final <span style="color: #007020;">Integer</span> <span style="color: #003366; font-weight: bold;">OS_RAM_SIZE</span> = <span style="color: #0000dd; font-weight: bold;">1</span>;
<span style="color: #003388; font-weight: bold;">public</span> static final <span style="color: #007020;">Integer</span> <span style="color: #003366; font-weight: bold;">OS_CORES</span> = <span style="color: #0000dd; font-weight: bold;">1</span>;
<span style="color: #003388; font-weight: bold;">public</span> static final <span style="color: #007020;">Integer</span> <span style="color: #003366; font-weight: bold;">OPTIMAL_NO_OF_TASKS</span> = <span style="color: #0000dd; font-weight: bold;">5</span>; <span style="background-color: #fff0ff; color: black;">//</span> <span style="color: #0000dd; font-weight: bold;">1</span> task per <span style="color: #0000dd; font-weight: bold;">1</span> core, <span style="color: #008800; font-weight: bold;">for</span> best hdfs throughput <span style="color: #0000dd; font-weight: bold;">5</span> tasks
<span style="color: #003388; font-weight: bold;">public</span> static final <span style="color: #007020;">Integer</span> <span style="color: #003366; font-weight: bold;">EXECUTOR_FOR_APPLICATION_MASTER</span> = <span style="color: #0000dd; font-weight: bold;">1</span>; <span style="background-color: #fff0ff; color: black;">//</span> <span style="color: #0000dd; font-weight: bold;">1</span> core reserved <span style="color: #008800; font-weight: bold;">for</span> <span style="color: #003366; font-weight: bold;">YARN</span> application master.
<span style="color: #003388; font-weight: bold;">public</span> static void main(<span style="color: #007020;">String</span>[] args) {
<span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>>> clusterMap = setupCluster();
<span style="color: #003366; font-weight: bold;">ArrayList</span><<span style="color: #007020;">Integer</span>> availableRamList = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">ArrayList</span><<span style="color: #007020;">Integer</span>>();
<span style="color: #003366; font-weight: bold;">ArrayList</span><<span style="color: #007020;">Integer</span>> availableCoreList = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">ArrayList</span><<span style="color: #007020;">Integer</span>>();
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Cluster Size : "</span> + clusterMap.size() + <span style="background-color: #fff0f0;">" nodes."</span>);
<span style="color: #007020;">Integer</span> clustsiz = clusterMap.size();
<span style="color: #008800; font-weight: bold;">for</span> (int i = <span style="color: #0000dd; font-weight: bold;">1</span>; i < clustsiz + <span style="color: #0000dd; font-weight: bold;">1</span>; i++) {
<span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>> testMap = clusterMap.get(<span style="background-color: #fff0f0;">"node"</span> + i);
<span style="color: #007020;">Integer</span> avblRAMperNode = <span style="color: #007020;">Integer</span>.valueOf(testMap.get(<span style="background-color: #fff0f0;">"ramSize"</span> + i)) - <span style="color: #003366; font-weight: bold;">OS_RAM_SIZE</span>;
<span style="color: #007020;">Integer</span> numberOfCoresperNode = testMap.get(<span style="background-color: #fff0f0;">"noOfCores"</span> + i) - <span style="color: #003366; font-weight: bold;">OS_CORES</span>;
availableRamList.add(avblRAMperNode);
availableCoreList.add(numberOfCoresperNode);
}
<span style="color: #007020;">Integer</span> ramTotal = availableRamList.stream().mapToInt(a -> a).sum();
<span style="color: #007020;">Integer</span> totalCores = availableCoreList.stream().mapToInt(a -> a).sum();
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Available Total RAM : "</span> + ramTotal + <span style="background-color: #fff0f0;">" gB."</span> + <span style="background-color: #fff0f0;">"</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\n</span><span style="background-color: #fff0f0;">Available Total Cores : "</span> + totalCores);
<span style="color: #007020;">Integer</span> avbleExecs = availableExecutorsIncluster(totalCores);
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Total Number of available Executors for user usage : "</span> + avbleExecs);
<span style="color: #003366; font-weight: bold;">BigDecimal</span> v1 = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">BigDecimal</span>(avbleExecs);
<span style="color: #003366; font-weight: bold;">BigDecimal</span> v2 = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">BigDecimal</span>(clusterMap.size());
<span style="background-color: #fff0ff; color: black;">//</span> <span style="color: #003366; font-weight: bold;">System</span>.out.println(v1.divide(v2, <span style="color: #0000dd; font-weight: bold;">2</span>, <span style="color: #003366; font-weight: bold;">RoundingMode</span>.HALF_UP).toPlainString());
<span style="color: #003366; font-weight: bold;">BigDecimal</span> noOfexecsPerNode = v1.divide(v2, <span style="color: #0000dd; font-weight: bold;">2</span>, <span style="color: #003366; font-weight: bold;">RoundingMode</span>.HALF_UP);
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Total Number of Executors per node are : "</span> + <span style="color: #003366; font-weight: bold;">Math</span>.round(noOfexecsPerNode.floatValue()));
<span style="color: #007020;">Integer</span> availableMemoryPerExecutor = ramTotal / avbleExecs;
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Availbale Memory per Executor : "</span> + availableMemoryPerExecutor + <span style="background-color: #fff0f0;">" gB."</span>);
<span style="color: #003366; font-weight: bold;">BigDecimal</span> memOverHead = memoryOverhead(availableMemoryPerExecutor);
<span style="background-color: #fff0ff; color: black;">//</span> <span style="color: #003366; font-weight: bold;">Long</span> L = <span style="color: #003366; font-weight: bold;">Math</span>.round(memOverHead);
<span style="background-color: #fff0ff; color: black;">//</span> int mo = <span style="color: #007020;">Integer</span>.valueOf(L.intValue());
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"Memory OverHead : "</span> + memOverHead.floatValue());
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"round : "</span> + <span style="color: #003366; font-weight: bold;">Math</span>.ceil(memOverHead.floatValue()));
<span style="color: #003366; font-weight: bold;">BigDecimal</span> finalMemoryPerExecutor = <span style="color: #003366; font-weight: bold;">BigDecimal</span>.valueOf(availableMemoryPerExecutor)
.subtract(<span style="color: #003366; font-weight: bold;">BigDecimal</span>.valueOf(<span style="color: #003366; font-weight: bold;">Math</span>.ceil(memOverHead.floatValue())));
<span style="color: #003366; font-weight: bold;">System</span>.out.println(<span style="background-color: #fff0f0;">"SparkExecutor Memory after memoryOverhead's will between be : </span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\n</span><span style="background-color: #fff0f0;">"</span>
+ finalMemoryPerExecutor.subtract(<span style="color: #003366; font-weight: bold;">BigDecimal</span>.valueOf(<span style="color: #0000dd; font-weight: bold;">1</span>)) + <span style="background-color: #fff0f0;">" ~ "</span> + finalMemoryPerExecutor + <span style="background-color: #fff0f0;">" gB."</span>);
}
<span style="color: #003388; font-weight: bold;">private</span> static <span style="color: #003366; font-weight: bold;">BigDecimal</span> memoryOverhead(<span style="color: #007020;">Integer</span> availableMemoryPerExecutor) {
<span style="color: #003366; font-weight: bold;">Double</span> memOverHead = <span style="color: #0000dd; font-weight: bold;">0</span>.<span style="color: #4400ee; font-weight: bold;">07</span> * availableMemoryPerExecutor;
<span style="color: #008800; font-weight: bold;">if</span> (memOverHead > <span style="color: #0000dd; font-weight: bold;">0</span>.<span style="color: #0000dd; font-weight: bold;">384</span>)
<span style="color: #008800; font-weight: bold;">return</span> <span style="color: #003366; font-weight: bold;">BigDecimal</span>.valueOf(memOverHead);
<span style="color: #008800; font-weight: bold;">else</span>
<span style="color: #008800; font-weight: bold;">return</span> <span style="color: #003366; font-weight: bold;">BigDecimal</span>.valueOf(<span style="color: #0000dd; font-weight: bold;">0</span>.<span style="color: #0000dd; font-weight: bold;">384</span>);
}
<span style="color: #003388; font-weight: bold;">private</span> static <span style="color: #007020;">Integer</span> availableExecutorsIncluster(<span style="color: #007020;">Integer</span> totalCores) {
int noOfExecutors = totalCores / <span style="color: #003366; font-weight: bold;">OPTIMAL_NO_OF_TASKS</span>;
<span style="color: #008800; font-weight: bold;">return</span> noOfExecutors - <span style="color: #003366; font-weight: bold;">EXECUTOR_FOR_APPLICATION_MASTER</span>;
}
<span style="color: #003388; font-weight: bold;">private</span> static <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>>> setupCluster() {
<span style="color: #007020;">String</span> csvFile = <span style="background-color: #fff0f0;">"D:</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\\</span><span style="background-color: #fff0f0;">Work</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\\</span><span style="background-color: #fff0f0;">Documents</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\\</span><span style="background-color: #fff0f0;">Work</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\\</span><span style="background-color: #fff0f0;">10node_cluster.csv"</span>;
<span style="color: #003366; font-weight: bold;">BufferedReader</span> br = null;
<span style="color: #007020;">String</span> line = <span style="background-color: #fff0f0;">""</span>;
<span style="color: #007020;">String</span> cvsSplitBy = <span style="background-color: #fff0f0;">"</span><span style="background-color: #fff0f0; color: #666666; font-weight: bold;">\\</span><span style="background-color: #fff0f0;">|"</span>;
<span style="background-color: #fff0ff; color: black;">//</span> int counter = <span style="color: #0000dd; font-weight: bold;">0</span>;
<span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>>> nodeMap = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>>>();
try {
br = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">BufferedReader</span>(<span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">FileReader</span>(csvFile));
<span style="color: #008800; font-weight: bold;">for</span> (int i = <span style="color: #0000dd; font-weight: bold;">0</span>; (line = br.readLine()) != null; i++) {
<span style="color: #008800; font-weight: bold;">if</span> (i != <span style="color: #0000dd; font-weight: bold;">0</span>) { <span style="background-color: #fff0ff; color: black;">//</span> skipping first row of the input file
<span style="color: #007020;">String</span>[] dataArray = line.split(cvsSplitBy);
<span style="color: #007020;">String</span> nodeName = <span style="background-color: #fff0f0;">"node"</span> + i;
<span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>> configMap = <span style="color: #003388; font-weight: bold;">new</span> <span style="color: #003366; font-weight: bold;">HashMap</span><<span style="color: #007020;">String</span>, <span style="color: #007020;">Integer</span>>();
<span style="color: #007020;">String</span> ramSize = dataArray[<span style="color: #0000dd; font-weight: bold;">1</span>];
<span style="color: #003366; font-weight: bold;">Double</span> intRam = <span style="color: #003366; font-weight: bold;">Double</span>.valueOf(ramSize);
<span style="color: #003366; font-weight: bold;">Long</span> L = <span style="color: #003366; font-weight: bold;">Math</span>.round(intRam);
int mo = <span style="color: #007020;">Integer</span>.valueOf(L.intValue());
configMap.put(<span style="background-color: #fff0f0;">"ramSize"</span> + i, mo);
<span style="color: #007020;">String</span> noOfCores = dataArray[<span style="color: #0000dd; font-weight: bold;">2</span>];
<span style="color: #007020;">Integer</span> intnoOfCores = <span style="color: #007020;">Integer</span>.valueOf(noOfCores);
configMap.put(<span style="background-color: #fff0f0;">"noOfCores"</span> + i, intnoOfCores);
nodeMap.put(nodeName, configMap);
}
}
} <span style="color: #003388; font-weight: bold;">catch</span> (<span style="color: #003366; font-weight: bold;">FileNotFoundException</span> e) {
e.printStackTrace();
} <span style="color: #003388; font-weight: bold;">catch</span> (<span style="color: #003366; font-weight: bold;">IOException</span> e) {
e.printStackTrace();
} finally {
<span style="color: #008800; font-weight: bold;">if</span> (br != null) {
try {
br.close();
} <span style="color: #003388; font-weight: bold;">catch</span> (<span style="color: #003366; font-weight: bold;">IOException</span> e2) {
e2.printStackTrace();
}
}
}
<span style="color: #008800; font-weight: bold;">return</span> nodeMap;
}
}</pre>
</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-11091318644659774272019-03-08T13:38:00.001-08:002019-08-11T06:37:18.125-07:00Spark to MySql Connection - Portforwarding <div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="color: #333333; line-height: 16.25px;">Add this to your POM.xml</pre>
<pre style="color: #333333; line-height: 16.25px;"> <span style="color: #888888;"><!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>mysql<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>mysql-connector-java<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>5.1.47<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/com.jcraft/jsch --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>com.jcraft<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>jsch<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>0.1.51<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span></pre>
<br />
<br />
<pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">reading_dbms.mysql</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.FileInputStream</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.HashMap</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.</span>{ <span style="color: #bb0066; font-weight: bold;">DataFrame</span>, <span style="color: #bb0066; font-weight: bold;">SaveMode</span>, <span style="color: #bb0066; font-weight: bold;">SparkSession</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span>{ <span style="color: #bb0066; font-weight: bold;">Level</span>, <span style="color: #bb0066; font-weight: bold;">Logger</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.JSch</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.Session</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.sql.Connection</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.sql.DriverManager</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">SSHSparkScala</span> {
<span style="color: #008800; font-weight: bold;">def</span> main(args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span>[<span style="color: #333399; font-weight: bold;">String</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Unit</span> = {
<span style="color: #008800; font-weight: bold;">val</span> filepath <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"/Users/pedam/eclipse-workspace/valid.properties"</span>
<span style="color: #008800; font-weight: bold;">val</span> configs <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Properties</span>()
configs.load(<span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">FileInputStream</span>(filepath))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>))
<span style="color: #008800; font-weight: bold;">val</span> spark <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">SparkSession</span>
.builder()
.config(<span style="background-color: #fff0f0;">"spark.some.config.option"</span>, <span style="background-color: #fff0f0;">"some-value"</span>)
.appName(<span style="background-color: #fff0f0;">"SSHSparkScala"</span>)
.master(<span style="background-color: #fff0f0;">"local[*]"</span>)
.getOrCreate()
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span>.getRootLogger
rootLogger.setLevel(<span style="color: #bb0066; font-weight: bold;">Level</span>.<span style="color: #bb0066; font-weight: bold;">ERROR</span>)
<span style="color: #008800; font-weight: bold;">val</span> jdbcConfigs <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Properties</span>()
<span style="color: #008800; font-weight: bold;">var</span> session<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Session</span> = <span style="color: #008800; font-weight: bold;">null</span>
<span style="color: #008800; font-weight: bold;">val</span> lport <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #0000dd; font-weight: bold;">5656</span>
<span style="color: #008800; font-weight: bold;">val</span> rhost <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"10.0.2.15"</span>
<span style="color: #008800; font-weight: bold;">val</span> host <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"192.168.56.101"</span>
<span style="color: #008800; font-weight: bold;">val</span> rport <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #0000dd; font-weight: bold;">3306</span>
<span style="color: #008800; font-weight: bold;">val</span> user <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"cloudera"</span>
<span style="color: #008800; font-weight: bold;">val</span> password <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"cloudera"</span>
<span style="color: #008800; font-weight: bold;">val</span> dbuserName <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"root"</span>
<span style="color: #008800; font-weight: bold;">val</span> dbpassword <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"cloudera"</span>
<span style="color: #008800; font-weight: bold;">val</span> url <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"jdbc:mysql://localhost:"</span> + lport + <span style="background-color: #fff0f0;">"/movielens"</span>
<span style="color: #008800; font-weight: bold;">val</span> driverName <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"com.mysql.jdbc.Driver"</span>
jdbcConfigs.put(<span style="background-color: #fff0f0;">"user"</span>, dbuserName)
jdbcConfigs.put(<span style="background-color: #fff0f0;">"password"</span>, dbpassword)
<span style="color: #888888;">//jdbcConfigs.put("StrictHostKeyChecking", "no")</span>
configs.put(<span style="background-color: #fff0f0;">"StrictHostKeyChecking"</span>, <span style="background-color: #fff0f0;">"no"</span>)
<span style="color: #008800; font-weight: bold;">val</span> jsch <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">JSch</span>()
session <span style="color: #008800; font-weight: bold;">=</span> jsch.getSession(user, host, <span style="color: #0000dd; font-weight: bold;">22</span>)
session.setPassword(password)
session.setConfig(configs)
session.connect()
<span style="color: #008800; font-weight: bold;">val</span> assinged_port <span style="color: #008800; font-weight: bold;">=</span> session.setPortForwardingL(lport, rhost, rport)
println(session.getHost +<span style="background-color: #fff0f0;">"|"</span> + session.getHostKey+<span style="background-color: #fff0f0;">"|"</span> + session.getPort+<span style="background-color: #fff0f0;">"|"</span> + session.getServerVersion)
<span style="color: #008800; font-weight: bold;">val</span> genre_table <span style="color: #008800; font-weight: bold;">=</span> spark.read.jdbc(url, <span style="background-color: #fff0f0;">"movielens.ratings"</span>, jdbcConfigs)
genre_table.show(<span style="color: #008800; font-weight: bold;">false</span>)
spark.close()
session.disconnect()
<span style="color: #bb0066; font-weight: bold;">System</span>.exit(<span style="color: #0000dd; font-weight: bold;">0</span>)
}
}</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-22822910918027289732019-03-01T18:16:00.001-08:002019-07-29T10:41:18.025-07:00Processing REST Service Data Spark-Scala<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
I have added this below dependency to pom.xml :<br />
<pre style="line-height: 16.25px;"><span style="color: #888888;"><!-- https://mvnrepository.com/artifact/javax.ws.rs/javax.ws.rs-api --></span><span style="color: #333333;">
</span><span style="color: #007700;"><dependency></span><span style="color: #333333;">
</span><span style="color: #007700;"><groupId></span><span style="color: #333333;">javax.ws.rs</span><span style="color: #007700;"></groupId></span><span style="color: #333333;">
</span><span style="color: #007700;"><artifactId></span><span style="color: #333333;">javax.ws.rs-api</span><span style="color: #007700;"></artifactId></span><span style="color: #333333;">
</span><span style="color: #007700;"><version></span><span style="color: #333333;">2.1</span><span style="color: #007700;"></version></span><span style="color: #333333;">
</span><span style="color: #007700;"></dependency></span></pre>
<pre style="line-height: 16.25px;"><span style="color: #007700;">Spark-Scala</span></pre>
<pre style="line-height: 16.25px;"><span style="color: #007700;">------------</span></pre>
<pre style="line-height: 16.25px;"><pre style="color: #333333; line-height: 16.25px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">reading_rest.driver</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.FileInputStream</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.ws.rs.core.MediaType</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.ws.rs.client.ClientBuilder</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.</span>{ <span style="color: #bb0066; font-weight: bold;">DataFrame</span>, <span style="color: #bb0066; font-weight: bold;">SaveMode</span>, <span style="color: #bb0066; font-weight: bold;">SparkSession</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span>{ <span style="color: #bb0066; font-weight: bold;">Level</span>, <span style="color: #bb0066; font-weight: bold;">Logger</span> }
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">scala.collection.immutable.Seq</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">ReadingFromRest</span> {
<span style="color: #008800; font-weight: bold;">def</span> main(args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span>[<span style="color: #333399; font-weight: bold;">String</span>])<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Unit</span> = {
<span style="color: #008800; font-weight: bold;">val</span> filepath <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"/Users/pedam/eclipse-workspace/valid.properties"</span>;
<span style="color: #008800; font-weight: bold;">val</span> configs <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Properties</span>()
configs.load(<span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">FileInputStream</span>(filepath))
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"hadoop.home.dir"</span>));
<span style="color: #bb0066; font-weight: bold;">System</span>.setProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>, configs.getProperty(<span style="background-color: #fff0f0;">"spark.sql.warehouse.dir"</span>));
<span style="color: #008800; font-weight: bold;">val</span> spark <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">SparkSession</span>
.builder()
.appName(<span style="background-color: #fff0f0;">"Rest_Reading_application"</span>)
.master(<span style="background-color: #fff0f0;">"local"</span>)
.getOrCreate()
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span>.getRootLogger
rootLogger.setLevel(<span style="color: #bb0066; font-weight: bold;">Level</span>.<span style="color: #bb0066; font-weight: bold;">ERROR</span>)
<span style="color: #008800; font-weight: bold;">val</span> jsonStr<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">String</span> = <span style="color: #bb0066; font-weight: bold;">ClientBuilder</span>.newClient().target(<span style="background-color: #fff0f0;">"http://dummy.restapiexample.com/api/v1/employees"</span>).request()
.accept(<span style="color: #bb0066; font-weight: bold;">MediaType</span>.<span style="color: #bb0066; font-weight: bold;">APPLICATION_JSON</span>).get(classOf[<span style="color: #333399; font-weight: bold;">String</span>])
println(jsonStr)
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">spark.implicits._</span>
<span style="color: #008800; font-weight: bold;">val</span> employeesDF <span style="color: #008800; font-weight: bold;">=</span> spark.read.json(<span style="color: #bb0066; font-weight: bold;">Seq</span>(jsonStr).toDS)
employeesDF.show()
spark.close()
<span style="color: #bb0066; font-weight: bold;">System</span>.exit(<span style="color: #0000dd; font-weight: bold;">1</span>)
}
} </pre>
<pre style="color: #333333; line-height: 16.25px;">Output:</pre>
<pre style="line-height: 16.25px;"><span style="color: #333333;">[{"id":"2027","employee_name":"testmadhan1","employee_salary":"345000","employee_age":"35","profile_image":""},....</span></pre>
<pre style="line-height: 16.25px;"></pre>
+------------+-----------------+---------------+----+-------------+
|employee_age| employee_name|employee_salary| id|profile_image|
+------------+-----------------+---------------+----+-------------+
| 35| testmadhan1| 345000|2027| |
| 23| test_gslab| 123|2028| |
| 23111111|test_gslab1111111| 123111111|2030| |
| 23| test_gslab| 123|2032| |
| 23| test_gslab| 123|2034| |
| 23| test_gslab| 123|2035| |
| 23| test_gslab| 123|2036| |
| 23| test_gslab| 123|2037| |
| 23| test_gslab| 123|2038| |
| 23| test_gslab| 123|2043| |
| 23| test_gslab| 123|2044| |
</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-20376693004710344322019-02-10T11:50:00.000-08:002019-03-09T20:39:54.385-08:00Creating DDL from Parquet file <div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="background-color: white; color: #333333; line-height: 16.25px;"><span style="color: navy; font-weight: bold;">import</span> java.io.File;
<span style="color: navy; font-weight: bold;">import</span> org.apache.commons.configuration.ConfigurationException;
<span style="color: navy; font-weight: bold;">import</span> org.apache.commons.configuration.PropertiesConfiguration;
<span style="color: navy; font-weight: bold;">import</span> org.apache.spark.sql.Dataset;
<span style="color: navy; font-weight: bold;">import</span> org.apache.spark.sql.Row;
<span style="color: navy; font-weight: bold;">import</span> org.apache.spark.sql.SparkSession;
<span style="color: navy; font-weight: bold;">import</span> org.apache.spark.sql.types.StructField;
<span style="color: navy; font-weight: bold;">public</span> <span style="color: navy; font-weight: bold;">class</span> ParquetToDDL {
<span style="color: navy; font-weight: bold;">public</span> <span style="color: navy; font-weight: bold;">static</span> <span style="color: navy; font-weight: bold;">void</span> main(String[] args) {
System.<span style="color: red;">setProperty</span>(<span style="color: blue;">"hadoop.home.dir"</span>, <span style="color: blue;">"C:\\winutils\\"</span>);
File propertiesFile = <span style="color: navy; font-weight: bold;">new</span> File(<span style="color: blue;">"C:\\Users\\navee\\eclipse-workspace\\valid.properties"</span>);
PropertiesConfiguration configs = <span style="color: navy; font-weight: bold;">new</span> PropertiesConfiguration();
<span style="color: navy; font-weight: bold;">try</span> {
configs.<span style="color: red;">load</span>(propertiesFile);
SparkSession session = SparkSession.<span style="color: red;">builder</span>().<span style="color: red;">appName</span>(<span style="color: blue;">"fm_spark_data_extractor"</span>).<span style="color: red;">master</span>(<span style="color: blue;">"local[*]"</span>)
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.sql.warehouse.dir"</span>, configs.<span style="color: red;">getString</span>(<span style="color: blue;">"spark.sql.warehouse.dir"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.local.dir"</span>, configs.<span style="color: red;">getString</span>(<span style="color: blue;">"spark.local.dir"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.driver.memory"</span>, configs.<span style="color: red;">getString</span>(<span style="color: blue;">"spark.driver.memory"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.executor.memory"</span>, configs.<span style="color: red;">getString</span>(<span style="color: blue;">"spark.executor.memory"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.executor.cores"</span>, configs.<span style="color: red;">getInt</span>(<span style="color: blue;">"spark.executor.cores"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.dynamicAllocation.enabled"</span>, configs.<span style="color: red;">getBoolean</span>(<span style="color: blue;">"spark.dynamicAllocation.enabled"</span>))
.<span style="color: red;">config</span>(<span style="color: blue;">"spark.serializer"</span>, configs.<span style="color: red;">getString</span>(<span style="color: blue;">"spark.serializer"</span>)).<span style="color: red;">getOrCreate</span>();
Dataset < Row > verifydata = session.<span style="color: red;">read</span>().<span style="color: red;">parquet</span>(<span style="color: blue;">"D:\\userdata1.parquet"</span>);
StringBuilder ddlBuilder = <span style="color: navy; font-weight: bold;">new</span> StringBuilder();
ddlBuilder.<span style="color: red;">append</span>(<span style="color: blue;">"CREATE EXTERNAL TABLE if NOT EXISTS `"</span> + <span style="color: blue;">"MBR"</span>.<span style="color: red;">toLowerCase</span>() + <span style="color: blue;">"` ( "</span>);
<span style="color: navy; font-weight: bold;">for</span> (StructField field: verifydata.<span style="color: red;">schema</span>().<span style="color: red;">fields</span>()) {
System.<span style="color: red;">out</span>.<span style="color: red;">println</span>(<span style="color: blue;">" `"</span> + field.<span style="color: red;">name</span>() + <span style="color: blue;">"` "</span> + field.<span style="color: red;">dataType</span>().<span style="color: red;">simpleString</span>().<span style="color: red;">toUpperCase</span>() + <span style="color: blue;">","</span>);
ddlBuilder.<span style="color: red;">append</span>(<span style="color: blue;">" `"</span> + field.<span style="color: red;">name</span>() + <span style="color: blue;">"` "</span> + field.<span style="color: red;">dataType</span>().<span style="color: red;">simpleString</span>().<span style="color: red;">toUpperCase</span>() + <span style="color: blue;">", \n"</span>);
}
ddlBuilder.<span style="color: red;">replace</span>(ddlBuilder.<span style="color: red;">toString</span>().<span style="color: red;">lastIndexOf</span>(<span style="color: purple;">','</span>),
<span style="color: blue;">","</span>.<span style="color: red;">length</span>() + ddlBuilder.<span style="color: red;">toString</span>().<span style="color: red;">lastIndexOf</span>(<span style="color: purple;">','</span>), <span style="color: blue;">""</span>);
ddlBuilder.<span style="color: red;">append</span>(<span style="color: blue;">" ) "</span>);
ddlBuilder.<span style="color: red;">append</span>(<span style="color: blue;">"STORED AS PARQUET LOCATION '"</span> + <span style="color: blue;">"hdfslocationPath+tableName"</span> + <span style="color: blue;">"/'"</span>);
System.<span style="color: red;">out</span>.<span style="color: red;">println</span>(ddlBuilder.<span style="color: red;">toString</span>());
} <span style="color: navy; font-weight: bold;">catch</span> (ConfigurationException e) {
e.<span style="color: red;">printStackTrace</span>();
}
}
}</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-47918868066539065702019-02-10T11:43:00.001-08:002019-03-20T12:33:29.987-07:00Processing Unix Array Building queries<div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="background-color: #f8f8f8; color: #333333; line-height: 16.25px;"><span style="color: #008800; font-style: italic;">##if tableNames is a comma seperated string like employees,departments,stuff</span>
<span style="color: darkgoldenrod;">IFS</span><span style="color: #666666;">=</span><span style="color: #bb4444;">','</span> <span style="color: #aa22ff;">read</span> -r -a tableArray <span style="color: #666666;"><<<</span> <span style="color: #bb4444;">"${tableNames}"</span>
<span style="color: #aa22ff;">echo</span> <span style="color: #bb4444;">"${#tableArray[@]}"</span>
<span style="color: #aa22ff; font-weight: bold;">for </span>indx in <span style="color: #aa22ff; font-weight: bold;">${</span>!tableArray[@]<span style="color: #aa22ff; font-weight: bold;">}</span>
<span style="color: #aa22ff; font-weight: bold;">do</span>
<span style="color: #aa22ff; font-weight: bold;"> </span><span style="color: darkgoldenrod;">tableName</span><span style="color: #666666;">=</span><span style="color: #bb4444;">`</span><span style="color: #aa22ff;">echo</span> <span style="color: #aa22ff; font-weight: bold;">${</span><span style="color: darkgoldenrod;">tableArray</span>[indx]<span style="color: #aa22ff; font-weight: bold;">}</span>| awk <span style="color: #bb4444;">'{$1=$1};1'`</span>
<span style="color: darkgoldenrod;">buildAquery</span><span style="color: #666666;">=</span><span style="color: #bb4444;">" select distinct INPUT__FILE__NAME FROM ${TARGET_DATABASE}.${tableName} where to_date(ss_date) = '${BUSINESS_DATE}'"</span>
<span style="color: #aa22ff; font-weight: bold;">if</span> <span style="color: #666666;">[[</span> <span style="color: #aa22ff; font-weight: bold;">${</span><span style="color: darkgoldenrod;">indx</span><span style="color: #aa22ff; font-weight: bold;">}</span> -lt <span style="color: #aa22ff; font-weight: bold;">${</span><span style="color: darkgoldenrod;">count</span><span style="color: #aa22ff; font-weight: bold;">}</span> <span style="color: #666666;">]]</span>
<span style="color: #aa22ff; font-weight: bold;">then</span>
<span style="color: #aa22ff; font-weight: bold;"> </span>buildAquery +<span style="color: #666666;">=</span> <span style="color: #bb4444;">" union all "</span>
<span style="color: #aa22ff; font-weight: bold;">fi</span>
<span style="color: #aa22ff; font-weight: bold;"> </span>pseudoQuery +<span style="color: #666666;">=</span><span style="color: #bb4444;">"${buildAquery}"</span>
<span style="color: #aa22ff; font-weight: bold;">done</span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-14432693200306197202017-11-15T11:07:00.002-08:002017-11-15T11:07:47.262-08:00Filtering out Source Error Records from the dataframe and Saving to specific location<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
The below code reads and json with error/malformed json records in it, and you will be creating a dataframe <span style="background-color: white; color: green; font-family: "DejaVu Sans Mono"; font-size: 7.5pt; font-weight: bold;">_corrupt_record</span> column in it, and that errors can be saved to specified location as error path if you want, finally you are returning <span style="background-color: white; font-family: "DejaVu Sans Mono"; font-size: 7.5pt;">errorFreeDF</span> to the next required objects<br />
<br />
<pre style="background-color: white; font-family: "DejaVu Sans Mono"; font-size: 7.5pt;"><span style="color: navy; font-weight: bold;">import </span>com.typesafe.config.ConfigFactory
<span style="color: navy; font-weight: bold;">import </span>org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
<span style="color: navy; font-weight: bold;">class </span>ReadWriteData {
<span style="color: grey; font-style: italic;">/*</span><span style="color: grey; font-style: italic;"> Class created for reading sources and writing files</span><span style="color: grey; font-style: italic;"> */</span><span style="color: grey; font-style: italic;">
</span><span style="color: grey; font-style: italic;"> </span><span style="color: navy; font-weight: bold;">def </span>readJsonFile(sparkSession: SparkSession, sourcePath:String ): DataFrame = <span style="color: navy; font-weight: bold;">try </span>{
<span style="color: navy; font-weight: bold;">import </span>sparkSession.implicits._
<span style="color: navy; font-weight: bold;">val </span>sourceDataDF = sparkSession.read.option(<span style="color: green; font-weight: bold;">"columnNameOfCorruptRecord"</span>, <span style="color: green; font-weight: bold;">"_corrupt_record"</span>).json(sourcePath)
<span style="color: grey; font-style: italic;">//sourceDataDF.show()</span><span style="color: grey; font-style: italic;"> </span><span style="color: navy; font-weight: bold;">val </span>errorDataDF = sourceDataDF.select(<span style="color: green; font-weight: bold;">"_corrupt_record"</span>).filter(<span style="color: green; font-weight: bold;">$"_corrupt_record"</span>.isNotNull)
errorDataDF.write.mode(SaveMode.Overwrite).json(ConfigFactory.load().getString(<span style="color: green; font-weight: bold;">"data.filesystem.errorpath"</span>))
<span style="color: navy; font-weight: bold;">val </span>errorFreeDF = sourceDataDF.drop(sourceDataDF.col(<span style="color: green; font-weight: bold;">"_corrupt_record"</span>))
errorFreeDF
} <span style="color: navy; font-weight: bold;">catch </span>{
<span style="color: navy; font-weight: bold;">case </span>ex: Exception => {sparkSession.read.json(sourcePath)}
}
}</pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-22643125536059544902017-06-07T09:18:00.003-07:002019-03-20T12:37:10.652-07:00TwitterLive Data Processing with Spark Scala<div dir="ltr" style="text-align: left;" trbidi="on">
This is an Interesting sample:<br />
I added dependency something like the below to my POM.XML<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-twitter_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-streaming-twitter_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.twitter4j/twitter4j-stream --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.twitter4j<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>twitter4j-stream<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>4.0.2<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency> </span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">.</span><span style="color: #0000cc;">At</span> Run Configurations I have given arguments as below to your spark<span style="color: #333333;">-</span>scala code<span style="color: #333333;">,</span>
<span style="color: #0000dd; font-weight: bold;">2</span><span style="color: #333333;">.</span><span style="color: #0000cc;">Please</span> make sure you have twitter account available<span style="color: #333333;">,</span> on web you find bunch of documents to create twitter<span style="color: #333333;">-</span>app pull your consumerkeys and accesstokens
<span style="color: #0000dd; font-weight: bold;">3</span><span style="color: #333333;">.</span><span style="color: #0000cc;">Having</span> said that<span style="color: #333333;">,</span> you can replace <span style="color: #333333;"><</span>consumer key<span style="color: #333333;">></span> <span style="color: #333333;"><</span>consumer secret<span style="color: #333333;">></span> <span style="color: #333333;"><</span>access token<span style="color: #333333;">></span> <span style="color: #333333;"><</span>access token secret<span style="color: #333333;">></span> <span style="color: #333333;">[<</span>filters<span style="color: #333333;">>]</span> as shown below
<span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">.</span><span style="color: #0000cc;">tulbXXXXXXXXXXXOOOOOOOOOOOW</span> nrZXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXbkxVxpS9 <span style="color: #0000dd; font-weight: bold;">8</span>XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXi5o vXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXMO00aN3 trump
I trying to search on filter name <span style="background-color: #fff0f0;">"trump"</span><span style="color: #333333;">,</span> live tweets and count of tweets<span style="color: #333333;">,</span> observe the results on screenshots</pre>
Code:<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.streaming.</span><span style="color: #333333;">{</span><span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">StreamingContext</span><span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.streaming.twitter._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Logger</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">TwitterTagCount</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">if</span> <span style="color: #333333;">(</span>args<span style="color: #333333;">.</span>length <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>err<span style="color: #333333;">.</span>println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Usage: TwitterTagCount <consumer key> <consumer secret> "</span> <span style="color: #333333;">+</span>
<span style="background-color: #fff0f0;">"<access token> <access token secret> [<filters>]"</span><span style="color: #333333;">)</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>exit<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">val</span> <span style="color: #bb0066; font-weight: bold;">Array</span><span style="color: #333333;">(</span>consumerKey<span style="color: #333333;">,</span> consumerSecret<span style="color: #333333;">,</span> accessToken<span style="color: #333333;">,</span> accessTokenSecret<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=</span> args<span style="color: #333333;">.</span>take<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> filters <span style="color: #008800; font-weight: bold;">=</span> args<span style="color: #333333;">.</span>takeRight<span style="color: #333333;">(</span>args<span style="color: #333333;">.</span>length <span style="color: #333333;">-</span> <span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">)</span>
<span style="color: #888888;">// Set the system properties so that Twitter4j library used by twitter stream</span>
<span style="color: #888888;">// can use them to generat OAuth credentials</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>setProperty<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"twitter4j.oauth.consumerKey"</span><span style="color: #333333;">,</span> consumerKey<span style="color: #333333;">)</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>setProperty<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"twitter4j.oauth.consumerSecret"</span><span style="color: #333333;">,</span> consumerSecret<span style="color: #333333;">)</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>setProperty<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"twitter4j.oauth.accessToken"</span><span style="color: #333333;">,</span> accessToken<span style="color: #333333;">)</span>
<span style="color: #bb0066; font-weight: bold;">System</span><span style="color: #333333;">.</span>setProperty<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"twitter4j.oauth.accessTokenSecret"</span><span style="color: #333333;">,</span> accessTokenSecret<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> sparkConf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"TwitterPopularTags"</span><span style="color: #333333;">).</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local[2]"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> ssc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StreamingContext</span><span style="color: #333333;">(</span>sparkConf<span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">2</span><span style="color: #333333;">))</span>
<span style="color: #008800; font-weight: bold;">val</span> stream <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">TwitterUtils</span><span style="color: #333333;">.</span>createStream<span style="color: #333333;">(</span>ssc<span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">None</span><span style="color: #333333;">,</span> filters<span style="color: #333333;">)</span><span style="color: #888888;">//Dstream</span>
<span style="color: #008800; font-weight: bold;"> val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span><span style="color: #333333;">.</span>getRootLogger<span style="color: #333333;">()</span>
rootLogger<span style="color: #333333;">.</span>setLevel<span style="color: #333333;">(</span><span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">ERROR</span><span style="color: #333333;">)</span>
ssc<span style="color: #333333;">.</span>checkpoint<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"checkpoint"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> hashTags <span style="color: #008800; font-weight: bold;">=</span> stream<span style="color: #333333;">.</span>flatMap<span style="color: #333333;">(</span>status <span style="color: #008800; font-weight: bold;">=></span> status<span style="color: #333333;">.</span>getText<span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">" "</span><span style="color: #333333;">).</span>filter<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>startsWith<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"#"</span><span style="color: #333333;">)))</span>
<span style="color: #008800; font-weight: bold;">val</span> statuses <span style="color: #008800; font-weight: bold;">=</span> stream<span style="color: #333333;">.</span>map <span style="color: #333333;">(</span> x <span style="color: #008800; font-weight: bold;">=></span> x<span style="color: #333333;">.</span>getText <span style="color: #333333;">)</span>
statuses<span style="color: #333333;">.</span>print<span style="color: #333333;">()</span>
<span style="color: #008800; font-weight: bold;">val</span> topCounts60 <span style="color: #008800; font-weight: bold;">=</span> hashTags<span style="color: #333333;">.</span>map<span style="color: #333333;">((</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)).</span>reduceByKeyAndWindow<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span> <span style="color: #333333;">+</span> <span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">60</span><span style="color: #333333;">))</span>
<span style="color: #333333;">.</span>map<span style="color: #333333;">{</span><span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>topic<span style="color: #333333;">,</span> count<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">(</span>count<span style="color: #333333;">,</span> topic<span style="color: #333333;">)}</span>
<span style="color: #333333;">.</span>transform<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>sortByKey<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">false</span><span style="color: #333333;">))</span>
<span style="color: #008800; font-weight: bold;">val</span> topCounts10 <span style="color: #008800; font-weight: bold;">=</span> hashTags<span style="color: #333333;">.</span>map<span style="color: #333333;">((</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)).</span>reduceByKeyAndWindow<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span> <span style="color: #333333;">+</span> <span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">10</span><span style="color: #333333;">))</span>
<span style="color: #333333;">.</span>map<span style="color: #333333;">{</span><span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>topic<span style="color: #333333;">,</span> count<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">(</span>count<span style="color: #333333;">,</span> topic<span style="color: #333333;">)}</span>
<span style="color: #333333;">.</span>transform<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>sortByKey<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">false</span><span style="color: #333333;">))</span>
<span style="color: #888888;">// Print popular hashtags</span>
topCounts60<span style="color: #333333;">.</span>foreachRDD<span style="color: #333333;">(</span>rdd <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> topList <span style="color: #008800; font-weight: bold;">=</span> rdd<span style="color: #333333;">.</span>take<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">10</span><span style="color: #333333;">)</span>
println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"\nPopular topics in last 60 seconds (%s total):"</span><span style="color: #333333;">.</span>format<span style="color: #333333;">(</span>rdd<span style="color: #333333;">.</span>count<span style="color: #333333;">()))</span>
topList<span style="color: #333333;">.</span>foreach<span style="color: #333333;">{</span><span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>count<span style="color: #333333;">,</span> tag<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"%s (%s tweets)"</span><span style="color: #333333;">.</span>format<span style="color: #333333;">(</span>tag<span style="color: #333333;">,</span> count<span style="color: #333333;">))}</span>
<span style="color: #333333;">})</span>
topCounts10<span style="color: #333333;">.</span>foreachRDD<span style="color: #333333;">(</span>rdd <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> topList <span style="color: #008800; font-weight: bold;">=</span> rdd<span style="color: #333333;">.</span>take<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">10</span><span style="color: #333333;">)</span>
println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"\nPopular topics in last 10 seconds (%s total):"</span><span style="color: #333333;">.</span>format<span style="color: #333333;">(</span>rdd<span style="color: #333333;">.</span>count<span style="color: #333333;">()))</span>
topList<span style="color: #333333;">.</span>foreach<span style="color: #333333;">{</span><span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>count<span style="color: #333333;">,</span> tag<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"%s (%s tweets)"</span><span style="color: #333333;">.</span>format<span style="color: #333333;">(</span>tag<span style="color: #333333;">,</span> count<span style="color: #333333;">))}</span>
<span style="color: #333333;">})</span>
ssc<span style="color: #333333;">.</span>start<span style="color: #333333;">()</span>
ssc<span style="color: #333333;">.</span>awaitTermination<span style="color: #333333;">()</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
<br />
<br />
<div class="separator" style="clear: both; text-align: center;">
<a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEg50nRr0M6aZEQhwlwi8bxiNkodGdaFYIjAQgagVKNsZBe1_dXVvQby3bpM23JmYqXdADVBvRdU21mVG2Ekmvjr709pDRUFHIm4pbPx2lzqmRGeZzQuYCsnnNB8nnzZu0rJglOVTdEEXHM/s1600/Screenshot+from+2017-06-07+09%253A11%253A22.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" data-original-height="768" data-original-width="1366" height="179" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEg50nRr0M6aZEQhwlwi8bxiNkodGdaFYIjAQgagVKNsZBe1_dXVvQby3bpM23JmYqXdADVBvRdU21mVG2Ekmvjr709pDRUFHIm4pbPx2lzqmRGeZzQuYCsnnNB8nnzZu0rJglOVTdEEXHM/s320/Screenshot+from+2017-06-07+09%253A11%253A22.png" width="320" /></a></div>
<br />
<div class="separator" style="clear: both; text-align: center;">
<a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEifxhKNYm30nmyUQDXJy0sVBqas5opvDuFWKrvIgWGWmp2VWWMQS1FQsayXSXaN5Vcq4PmlZprPEI5dwUxKBpJCAE-ZXLi0-7AAWba9C1DANOehoZ6YwntRPIplSgucyEOc08iXXGT-sgg/s1600/Screenshot+from+2017-06-07+09%253A10%253A59.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" data-original-height="768" data-original-width="1366" height="179" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEifxhKNYm30nmyUQDXJy0sVBqas5opvDuFWKrvIgWGWmp2VWWMQS1FQsayXSXaN5Vcq4PmlZprPEI5dwUxKBpJCAE-ZXLi0-7AAWba9C1DANOehoZ6YwntRPIplSgucyEOc08iXXGT-sgg/s320/Screenshot+from+2017-06-07+09%253A10%253A59.png" width="320" /></a></div>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;">
</span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-14470655075899526152017-05-30T12:36:00.001-07:002017-05-30T12:36:22.053-07:00KafkaWordCount example Spark-Scala<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
Install kafka in your box, as described in the kafka.org website.<br />
<br />
I have used ubuntu installation process.<br />
<br />
Now execute the below commands one by one. <br />
<span style="color: #38761d;"><span style="font-size: xx-small;">i)/home/naveen/Documents/work/infa/kafka_2.11/bin/zookeeper-server-start.sh /home/naveen/Documents/work/infa/kafka_2.11/config/zookeeper.properties & </span></span><br />
<span style="color: #38761d;"><span style="font-size: xx-small;">ii)cd /home/naveen/Documents/work/infa/kafka_2.11/<br />iii)bin/kafka-server-start.sh config/server.properties &</span></span><br />
<span style="color: #38761d;"><span style="font-size: xx-small;">iv)/home/naveen/Documents/work/infa/kafka_2.11/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic testone &</span></span><br />
<span style="color: #38761d;"><span style="font-size: xx-small;">v)/home/naveen/Documents/work/infa/kafka_2.11/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic testone <br />vi)/home/naveen/Documents/work/infa/kafka_2.11/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic testone --from-beginning</span></span><br />
From eclipse, I have executed the below code:<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">bigdata.sparkapplications</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.HashMap</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.streaming._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.streaming.kafka._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Logger</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">KafkaWordCount</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> zkQuorum <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"localhost:2181"</span>
<span style="color: #008800; font-weight: bold;">val</span> group <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"zookeeper"</span>
<span style="color: #008800; font-weight: bold;">val</span> topics <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"testone"</span>
<span style="color: #008800; font-weight: bold;">val</span> numThreads <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"1"</span>
<span style="color: #008800; font-weight: bold;">val</span> sparkConf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"KafkaWordCount"</span><span style="color: #333333;">).</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local[2]"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> ssc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">StreamingContext</span><span style="color: #333333;">(</span>sparkConf<span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">2</span><span style="color: #333333;">))</span>
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span><span style="color: #333333;">.</span>getRootLogger<span style="color: #333333;">()</span>
rootLogger<span style="color: #333333;">.</span>setLevel<span style="color: #333333;">(</span><span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">ERROR</span><span style="color: #333333;">)</span>
ssc<span style="color: #333333;">.</span>checkpoint<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"checkpoint"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> topicMap <span style="color: #008800; font-weight: bold;">=</span> topics<span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">","</span><span style="color: #333333;">).</span>map<span style="color: #333333;">((</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> numThreads<span style="color: #333333;">.</span>toInt<span style="color: #333333;">)).</span>toMap
<span style="color: #008800; font-weight: bold;">val</span> lines <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">KafkaUtils</span><span style="color: #333333;">.</span>createStream<span style="color: #333333;">(</span>ssc<span style="color: #333333;">,</span> zkQuorum<span style="color: #333333;">,</span> group<span style="color: #333333;">,</span> topicMap<span style="color: #333333;">).</span>map<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>_2<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> words <span style="color: #008800; font-weight: bold;">=</span> lines<span style="color: #333333;">.</span>flatMap<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">" "</span><span style="color: #333333;">))</span>
<span style="color: #008800; font-weight: bold;">val</span> wordCounts <span style="color: #008800; font-weight: bold;">=</span> words<span style="color: #333333;">.</span>map<span style="color: #333333;">(</span>x <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">(</span>x<span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">1L</span><span style="color: #333333;">)).</span>reduceByKeyAndWindow<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span> <span style="color: #333333;">+</span> <span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #008800; font-weight: bold;">_</span> <span style="color: #333333;">-</span> <span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Minutes</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">10</span><span style="color: #333333;">),</span> <span style="color: #bb0066; font-weight: bold;">Seconds</span><span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">2</span><span style="color: #333333;">),</span> <span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)</span>
wordCounts<span style="color: #333333;">.</span>print<span style="color: #333333;">()</span>
ssc<span style="color: #333333;">.</span>start<span style="color: #333333;">()</span>
ssc<span style="color: #333333;">.</span>awaitTermination<span style="color: #333333;">()</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;"> </span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;">Find the result below, as the count is processed. </span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;"> </span></pre>
<div class="separator" style="clear: both; text-align: center;">
<a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjbMtWxxQsXKWdSOiWbT0-n6gDLYHPDZ9j6DqegQ6bj-VNMGJ9IN3HSChmojjnAmHAot58cfam36Xe5K3PsIStpBAi77ukh1lKKNYi_p9Ee994hnKh0KgLkqBhO2s0OGvgjABfpYeONhYg/s1600/Screenshot+from+2017-05-30+11%253A16%253A32.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" data-original-height="768" data-original-width="1366" height="179" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjbMtWxxQsXKWdSOiWbT0-n6gDLYHPDZ9j6DqegQ6bj-VNMGJ9IN3HSChmojjnAmHAot58cfam36Xe5K3PsIStpBAi77ukh1lKKNYi_p9Ee994hnKh0KgLkqBhO2s0OGvgjABfpYeONhYg/s320/Screenshot+from+2017-05-30+11%253A16%253A32.png" width="320" /></a></div>
<br />
<div class="separator" style="clear: both; text-align: center;">
<a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEilYcSDL8vJY6x7L-4fjJKPLBPUbUmZWDGCXm-jxh4rabJrTG71cagI0wfG6XuBjC7ieNho4SLYqMkq2YFSHHd1CNI0zcs5bVDfFCdb0FzPlyRSwCkMNcVQxvUgpBEz_9nzC71VPJDPwjc/s1600/Screenshot+from+2017-05-30+11%253A19%253A38.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" data-original-height="768" data-original-width="1366" height="179" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEilYcSDL8vJY6x7L-4fjJKPLBPUbUmZWDGCXm-jxh4rabJrTG71cagI0wfG6XuBjC7ieNho4SLYqMkq2YFSHHd1CNI0zcs5bVDfFCdb0FzPlyRSwCkMNcVQxvUgpBEz_9nzC71VPJDPwjc/s320/Screenshot+from+2017-05-30+11%253A19%253A38.png" width="320" /></a></div>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;"> </span></pre>
<br /></div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-90058840428289874102017-03-20T23:31:00.000-07:002017-03-21T12:25:27.320-07:00Spark Scala CSV Parsing, Filtering data with String input.<div dir="ltr" style="text-align: left;" trbidi="on">
Downloaded data source data from <br />
<br />
<pre style="-webkit-text-stroke-width: 0px; background-color: #f7f7f7; border-radius: 4px; border: none; color: #333333; display: block; font-family: monospace; font-size: 14px; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: inherit; margin: 0px; orphans: 2; padding: 0px; text-align: start; text-indent: 0px; text-transform: none; white-space: pre-wrap; widows: 2; word-break: break-all; word-spacing: 0px; word-wrap: break-word;"><span class="s1" style="color: #ba2121; margin: 0px; padding: 0px;">https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data</span></pre>
<br />
Put it in your desired location, I kept in my hadoop directory<br />
<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">bigdata.spark.sparkproject</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Logger</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.spark.util.Utills</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util._</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">CSVFileReading</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> conf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"SparkCSVFileProcessing"</span><span style="color: #333333;">).</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local[2]"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> sc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkContext</span><span style="color: #333333;">(</span>conf<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span><span style="color: #333333;">.</span>getRootLogger<span style="color: #333333;">()</span>
rootLogger<span style="color: #333333;">.</span>setLevel<span style="color: #333333;">(</span><span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">ERROR</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> sqlContext <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> org<span style="color: #333333;">.</span>apache<span style="color: #333333;">.</span>spark<span style="color: #333333;">.</span>sql<span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">SQLContext</span><span style="color: #333333;">(</span>sc<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> headers <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-county,income"</span><span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">","</span><span style="color: #333333;">).</span>toList
<span style="color: #008800; font-weight: bold;">val</span> inputDataFileDF <span style="color: #008800; font-weight: bold;">=</span> sqlContext<span style="color: #333333;">.</span>read<span style="color: #333333;">.</span>format<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"com.databricks.spark.csv"</span><span style="color: #333333;">).</span>option<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"header"</span><span style="color: #333333;">,</span> <span style="background-color: #fff0f0;">"false"</span><span style="color: #333333;">).</span>option<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"inferSchema"</span><span style="color: #333333;">,</span> <span style="background-color: #fff0f0;">"true"</span><span style="color: #333333;">)</span>
<span style="color: #333333;">.</span>load<span style="color: #333333;">(</span><span style="color: #bb0066; font-weight: bold;">Utills</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">DATA_PATH</span> <span style="color: #333333;">+</span><span style="background-color: #fff0f0;">"adult.data"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> dFRenamed <span style="color: #008800; font-weight: bold;">=</span> inputDataFileDF<span style="color: #333333;">.</span>toDF<span style="color: #333333;">(</span>headers<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #008800; font-weight: bold;">_</span><span style="color: #333399; font-weight: bold;">*</span><span style="color: #333333;">)</span>
<span style="color: #888888;">// inputDataFileDF.show(5)</span>
dFRenamed<span style="color: #333333;">.</span>printSchema<span style="color: #333333;">()</span> <span style="color: #888888;">//printing schema</span>
dFRenamed<span style="color: #333333;">.</span>show<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">5</span><span style="color: #333333;">)</span> <span style="color: #888888;">//showing the top 5 rows</span>
<span style="color: #888888;">// dfRenamed.filter($"workclass" === "Private").show</span>
<span style="color: #888888;">// dFRenamed.select("education-num","workclass").distinct.show</span>
<span style="color: #008800; font-weight: bold;">val</span> prvtData <span style="color: #008800; font-weight: bold;">=</span> dFRenamed<span style="color: #333333;">.</span>select<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"*"</span><span style="color: #333333;">).</span>where<span style="color: #333333;">(</span>dFRenamed<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"workclass"</span><span style="color: #333333;">).</span>contains<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Private"</span><span style="color: #333333;">))</span>
<span style="color: #888888;">// dfPrivate.show(5)</span>
prvtData<span style="color: #333333;">.</span>show<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">5</span><span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
<br /></div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-16004764035977918842017-03-17T16:06:00.004-07:002017-03-17T21:32:42.574-07:00 spark k means clustering example<div dir="ltr" style="text-align: left;" trbidi="on">
I used Spark Scala 2.11 version mllib library, and for plotting I used <br />
<br />
Put the below on your pom.xml<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #888888;"><!-- https://mvnrepository.com/artifact/com.github.yannrichet/JMathPlot --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>com.github.yannrichet<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>JMathPlot<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.0.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;"> </span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;">Using Spark Scala KMean-Algorithm and Plotting :</span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;"><span style="color: black;">Once after these, use below code for plotting them. replace input file path.</span></span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;"><span style="color: black;">Note: The commented part of the code will help you to leverage it, to the files having more than two columns</span></span></pre>
<div class="separator" style="clear: both; text-align: center;">
<br /></div>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">bigdata.spark.sparkproject</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">SparkContext</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.mllib.linalg.DenseVector</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.sql.DataFrame</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.rdd.RDD</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.mllib.clustering.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">KMeans</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">KMeansModel</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.mllib.linalg.Vectors</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.mllib.regression.LabeledPoint</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">javax.swing.JFrame</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.math.plot.Plot2DPanel</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.log4j.</span><span style="color: #333333;">{</span> <span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Logger</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.awt.Color</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">KMeanGraphModel</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> conf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"KMeansExample"</span><span style="color: #333333;">).</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Spark-DataFrame"</span><span style="color: #333333;">).</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> sc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkContext</span><span style="color: #333333;">(</span>conf<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> rootLogger <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">Logger</span><span style="color: #333333;">.</span>getRootLogger<span style="color: #333333;">()</span>
rootLogger<span style="color: #333333;">.</span>setLevel<span style="color: #333333;">(</span><span style="color: #bb0066; font-weight: bold;">Level</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">ERROR</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> data <span style="color: #008800; font-weight: bold;">=</span> sc<span style="color: #333333;">.</span>textFile<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"/home/naveen/Documents/work/learning/exercises/mldata/in/kmeans_online.txt"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> parsedDenseVector <span style="color: #008800; font-weight: bold;">=</span> data<span style="color: #333333;">.</span>map<span style="color: #333333;">(</span>s <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #bb0066; font-weight: bold;">Vectors</span><span style="color: #333333;">.</span>dense<span style="color: #333333;">(</span>s<span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="color: #0044dd;">','</span><span style="color: #333333;">).</span>map<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>toDouble<span style="color: #333333;">))).</span>cache<span style="color: #333333;">()</span>
<span style="color: #008800; font-weight: bold;">val</span> indexedDenseVector <span style="color: #008800; font-weight: bold;">=</span> parsedDenseVector<span style="color: #333333;">.</span>zipWithIndex<span style="color: #333333;">()</span>
<span style="color: #008800; font-weight: bold;">val</span> indexedData <span style="color: #008800; font-weight: bold;">=</span> indexedDenseVector<span style="color: #333333;">.</span>map <span style="color: #333333;">{</span> <span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>value<span style="color: #333333;">,</span> index<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">(</span>index<span style="color: #333333;">,</span> value<span style="color: #333333;">)</span> <span style="color: #333333;">}</span>
<span style="color: #888888;">//val rdd1 = indexedData.map { case (i, DenseVector(arr)) => (i, Vectors.dense(arr.take(2))) }</span>
<span style="color: #888888;">//val rdd2 = indexedData.map { case (i, DenseVector(arr)) => (i, Vectors.dense(arr.drop(2))) }</span>
<span style="color: #888888;">// parsedDenseVector.foreach(println)</span>
<span style="color: #888888;">// indexedData.foreach(println) </span>
<span style="color: #008800; font-weight: bold;">val</span> rddXextract <span style="color: #008800; font-weight: bold;">=</span> indexedData<span style="color: #333333;">.</span>map <span style="color: #333333;">{</span> <span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>x<span style="color: #333333;">,</span> y<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">((</span>y<span style="color: #333333;">.</span>toArray<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">)))</span> <span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">val</span> rddYextract <span style="color: #008800; font-weight: bold;">=</span> indexedData<span style="color: #333333;">.</span>map <span style="color: #333333;">{</span> <span style="color: #008800; font-weight: bold;">case</span> <span style="color: #333333;">(</span>x<span style="color: #333333;">,</span> y<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">=></span> <span style="color: #333333;">((</span>y<span style="color: #333333;">.</span>toArray<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)))</span> <span style="color: #333333;">}</span>
<span style="color: #888888;">// rdd1.foreach(println) </span>
<span style="color: #888888;">// rdd2.foreach(println)</span>
<span style="color: #888888;">//rddXextract.foreach { println }</span>
<span style="color: #888888;">//rddYextract.foreach { println }</span>
<span style="color: #888888;">// Cluster the data into two classes using KMeans</span>
<span style="color: #008800; font-weight: bold;">val</span> numClusters <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #0000dd; font-weight: bold;">3</span>
<span style="color: #008800; font-weight: bold;">val</span> numIterations <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #0000dd; font-weight: bold;">10</span>
<span style="color: #008800; font-weight: bold;">val</span> kmmodel <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #bb0066; font-weight: bold;">KMeans</span><span style="color: #333333;">.</span>train<span style="color: #333333;">(</span>parsedDenseVector<span style="color: #333333;">,</span> numClusters<span style="color: #333333;">,</span> numIterations<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> centroids <span style="color: #008800; font-weight: bold;">=</span> kmmodel<span style="color: #333333;">.</span>clusterCenters
<span style="color: #008800; font-weight: bold;">val</span> centroidPoints <span style="color: #008800; font-weight: bold;">=</span> centroids<span style="color: #333333;">.</span>map<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>toArray<span style="color: #333333;">)</span>
<span style="color: #888888;">//val rddXextract = indexedData.map{case (x,y) => (( y.toArray(0)))}</span>
<span style="color: #888888;">//val rddYextract = indexedData.map{case (x,y) => (( y.toArray(1)))}</span>
<span style="color: #888888;">// kmmodel.predict(parsedDenseVector).foreach(println)</span>
<span style="color: #008800; font-weight: bold;">var</span> xplot <span style="color: #008800; font-weight: bold;">=</span> rddXextract<span style="color: #333333;">.</span>collect<span style="color: #333333;">()</span>
<span style="color: #008800; font-weight: bold;">var</span> yplot <span style="color: #008800; font-weight: bold;">=</span> rddYextract<span style="color: #333333;">.</span>collect<span style="color: #333333;">()</span>
println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"["</span> <span style="color: #333333;">+</span> xplot <span style="color: #333333;">+</span> <span style="background-color: #fff0f0;">","</span><span style="color: #333333;">)</span>
centroids<span style="color: #333333;">.</span>foreach <span style="color: #333333;">{</span> println <span style="color: #333333;">}</span>
<span style="color: #888888;">//var cent = centroids.collect()</span>
<span style="color: #008800; font-weight: bold;">val</span> plt <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">Plot2DPanel</span><span style="color: #333333;">()</span>
plt<span style="color: #333333;">.</span>addLegend<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"SOUTH"</span><span style="color: #333333;">)</span>
plt<span style="color: #333333;">.</span>addScatterPlot<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Data Points"</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Color</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">RED</span><span style="color: #333333;">,</span> xplot<span style="color: #333333;">,</span> yplot<span style="color: #333333;">)</span>
plt<span style="color: #333333;">.</span>addScatterPlot<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Centroids"</span><span style="color: #333333;">,</span> <span style="color: #bb0066; font-weight: bold;">Color</span><span style="color: #333333;">.</span><span style="color: #bb0066; font-weight: bold;">BLUE</span><span style="color: #333333;">,</span> centroidPoints<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> frame <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">JFrame</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"KMean plot panel"</span><span style="color: #333333;">)</span>
frame<span style="color: #333333;">.</span>setSize<span style="color: #333333;">(</span><span style="color: #0000dd; font-weight: bold;">600</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">600</span><span style="color: #333333;">)</span>
frame<span style="color: #333333;">.</span>setContentPane<span style="color: #333333;">(</span>plt<span style="color: #333333;">)</span>
frame<span style="color: #333333;">.</span>setVisible<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">true</span><span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;"> </span></pre>
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #333333;"><a href="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjuDNEckAPTf7FVzQXHLJFqFlKp1u8Fbi1FY4_fuETAwgPO6mO1qeQRyXM604rAGCxPMH9M3YyCB-AXtctfhmPpfnVMNUanw5rOnHWcNfTFfwLuq-tGI7WjaT7rEWMgmCDeUq5gh8jvRVA/s1600/Screenshot+from+2017-03-17+17%253A07%253A17.png" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="179" src="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEjuDNEckAPTf7FVzQXHLJFqFlKp1u8Fbi1FY4_fuETAwgPO6mO1qeQRyXM604rAGCxPMH9M3YyCB-AXtctfhmPpfnVMNUanw5rOnHWcNfTFfwLuq-tGI7WjaT7rEWMgmCDeUq5gh8jvRVA/s320/Screenshot+from+2017-03-17+17%253A07%253A17.png" width="320" /></a> </span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-1199428906731209282017-02-02T13:02:00.000-08:002017-06-09T09:15:43.542-07:00Spark Scala, MySQL JDBC-Connect Select program<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
The below program will let you know, how the Spark-Scala object getting connected with MySQL database and saving the contents of the table in a local directory.<br />
<br />
Prerequisite: Place a mysql-jdbc connector jar in the build path.<br />
<br />
<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"> <span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">bigdata.sparkapplications</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.rdd.JdbcRDD</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.sql.</span><span style="color: #333333;">{</span><span style="color: #bb0066; font-weight: bold;">DriverManager</span><span style="color: #333333;">,</span><span style="color: #bb0066; font-weight: bold;">Connection</span><span style="color: #333333;">,</span><span style="color: #bb0066; font-weight: bold;">ResultSet</span><span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">ScalaJdbcConnectSelect</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> conf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"JDBC RDD"</span><span style="color: #333333;">).</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local[2]"</span><span style="color: #333333;">).</span>set<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"spark.executor.memory"</span><span style="color: #333333;">,</span> <span style="background-color: #fff0f0;">"1g"</span><span style="color: #333333;">);</span>
<span style="color: #008800; font-weight: bold;">val</span> sc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkContext</span><span style="color: #333333;">(</span>conf<span style="color: #333333;">);</span>
<span style="color: #008800; font-weight: bold;">val</span> driver <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"com.mysql.jdbc.Driver"</span>
<span style="color: #008800; font-weight: bold;">val</span> url <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"jdbc:mysql://localhost:3306/sakila"</span>
<span style="color: #008800; font-weight: bold;">val</span> username <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"root"</span>
<span style="color: #008800; font-weight: bold;">val</span> password <span style="color: #008800; font-weight: bold;">=</span> <span style="background-color: #fff0f0;">"root"</span>
<span style="color: #bb0066; font-weight: bold;">Class</span><span style="color: #333333;">.</span>forName<span style="color: #333333;">(</span>driver<span style="color: #333333;">).</span>newInstance<span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">val</span> myRDD <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">JdbcRDD</span><span style="color: #333333;">(</span>sc<span style="color: #333333;">,</span> <span style="color: #333333;">()</span> <span style="color: #333333;">=></span> <span style="color: #bb0066; font-weight: bold;">DriverManager</span><span style="color: #333333;">.</span>getConnection<span style="color: #333333;">(</span>url<span style="color: #333333;">,</span> username<span style="color: #333333;">,</span> password<span style="color: #333333;">),</span>
<span style="background-color: #fff0f0;">"select first_name, last_name, email from sakila.customer limit ?,?"</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">50</span><span style="color: #333333;">,</span> <span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">,</span> r <span style="color: #333333;">=></span>
r<span style="color: #333333;">.</span>getString<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"first_name"</span><span style="color: #333333;">)</span> <span style="color: #333333;">+</span> <span style="background-color: #fff0f0;">", "</span> <span style="color: #333333;">+</span> r<span style="color: #333333;">.</span>getString<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"last_name"</span><span style="color: #333333;">)</span> <span style="color: #333333;">+</span> <span style="background-color: #fff0f0;">", "</span><span style="color: #333333;">+</span>r<span style="color: #333333;">.</span>getString<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"email"</span><span style="color: #333333;">));</span>
myRDD<span style="color: #333333;">.</span>foreach<span style="color: #333333;">(</span>println<span style="color: #333333;">);</span>
myRDD<span style="color: #333333;">.</span>saveAsTextFile<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"/home/naveen/Documents/work/data.txt"</span><span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-26283448032236088082017-02-02T12:54:00.001-08:002017-03-08T10:10:34.369-08:00WordCount with Spark, Cloudera Hadoop<div dir="ltr" style="text-align: left;" trbidi="on">
This is the Wordcount example, executed on Cloudera with Spark!<br />
<br />
<br /><pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">package</span> <span style="color: #0e84b5; font-weight: bold;">bigdata.sparkapplications</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkContext._</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">org.apache.spark.SparkConf</span>
<span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">hadooptest</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span><span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Unit</span> <span style="color: #333333;">=</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">val</span> conf <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkConf</span><span style="color: #333333;">().</span>setMaster<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"local"</span><span style="color: #333333;">).</span>setAppName<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"WordCount"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> sc <span style="color: #008800; font-weight: bold;">=</span> <span style="color: #008800; font-weight: bold;">new</span> <span style="color: #bb0066; font-weight: bold;">SparkContext</span><span style="color: #333333;">(</span>conf<span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> data <span style="color: #008800; font-weight: bold;">=</span> sc<span style="color: #333333;">.</span>textFile<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"hdfs://10.0.0.209:8020/user/cloudera/hduser/in/documentation.txt"</span><span style="color: #333333;">)</span>
<span style="color: #008800; font-weight: bold;">val</span> result <span style="color: #008800; font-weight: bold;">=</span> data<span style="color: #333333;">.</span>flatMap<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">.</span>split<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">" "</span><span style="color: #333333;">)).</span>map<span style="color: #333333;">(</span>words <span style="color: #333333;">=&</span>gt<span style="color: #333333;">;</span> <span style="color: #333333;">(</span>words<span style="color: #333333;">,</span><span style="color: #0000dd; font-weight: bold;">1</span><span style="color: #333333;">)).</span>reduceByKey<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">+</span><span style="color: #008800; font-weight: bold;">_</span><span style="color: #333333;">)</span>
result<span style="color: #333333;">.</span>collect<span style="color: #333333;">.</span>foreach<span style="color: #333333;">(</span>println<span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-10085202219324524242017-02-02T11:28:00.002-08:002019-08-03T18:54:04.318-07:00Apache Spark and Eclipse Setup, Step by Step Installation.<div dir="ltr" style="text-align: left;" trbidi="on">
<div dir="ltr" style="text-align: left;" trbidi="on">
<div dir="ltr" style="text-align: left;" trbidi="on">
<div dir="ltr" style="text-align: left;" trbidi="on">
<div>
<i><b>Note: By this time all the softwares were latest, future their will be many upgrades on Spark, Java, eclipse, Scala etc, please use all the latest softwares for that time.</b></i><br />
<br />
I made sure Java 7 is installed on my machine,</div>
<div>
<br /></div>
<div>
spark eclipse setup.</div>
<div>
-----------------------</div>
<div>
<br /></div>
<div>
1. download eclipse</div>
<div>
<br /></div>
<div>
2. go to help, eclipse , marketplace.</div>
<div>
<br /></div>
<div>
3. use find, scala</div>
<div>
<br /></div>
<div>
<br /></div>
<div>
search</div>
<div>
<br /></div>
<div>
scala ide 4.2 </div>
<div>
<br /></div>
<div>
resart the eclipse to take the effect </div>
<div>
<br /></div>
<div>
create maven project</div>
<div>
<br /></div>
<div>
build tool </div>
<div>
<br /></div>
<div>
group id bidataproject,artifact-sparkapplication , snap-shot 0.0.1, bigdata.spark_applications</div>
<div>
<br /></div>
<div>
go-to src/main/java ---- right click--- refactor ---- rename--- change java to scala ----- </div>
<div>
<br /></div>
<div>
right click create new scala object that is helloworld.scala</div>
<div>
<br /></div>
<div>
below:</div>
<div>
<br />
<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"> <span style="color: #008800; font-weight: bold;">object</span> <span style="color: #bb0066; font-weight: bold;">helloworld</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">def</span> main<span style="color: #333333;">(</span>args<span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Array</span><span style="color: #333333;">[</span><span style="color: #333399; font-weight: bold;">String</span><span style="color: #333333;">])</span><span style="color: #008800; font-weight: bold;">:</span> <span style="color: #333399; font-weight: bold;">Unit</span> <span style="color: #333333;">=</span> <span style="color: #333333;">{</span>
println<span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Hello, world!"</span><span style="color: #333333;">)</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
</div>
<div>
</div>
<div>
</div>
<div>
</div>
<div>
<br /></div>
</div>
</div>
<div>
You will get "Not a scala project"</div>
<div>
<br /></div>
<div>
You have to create scala nature</div>
<div>
<br /></div>
<div>
Right click it ... configure .... Add Scala nature...</div>
<div>
<br /></div>
<div>
Scala library container change.. if we are implementing spark it is recommend to use 2.10.</div>
<div>
<br /></div>
<div>
Right click goto properties. choose latest bundle 2.10 dynamic.</div>
<div>
<br /></div>
<div>
After creating the first object you will get to 2.10</div>
<div>
<br /></div>
<div>
Now pom dependency settings</div>
<div>
<br /></div>
<div>
pom.xml</div>
<div>
<br /></div>
<div>
we need to add 4 important dependencies</div>
<div>
<br /></div>
<div>
as shown below.</div>
<div>
<br /></div>
<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #007700;"><dependency></dependency></span><span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.scala-lang<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>scala-library<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>2.10.6<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.hadoop<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>hadoop-common<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>2.6.4<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-core_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-sql_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-streaming_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-mllib_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-mllib_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #888888;"><!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive_2.10 --></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>org.apache.spark<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>spark-hive_2.10<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>1.6.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"></dependency></span>
<span style="color: #007700;"><dependency></span>
<span style="color: #007700;"><groupId></span>junit<span style="color: #007700;"></groupId></span>
<span style="color: #007700;"><artifactId></span>junit<span style="color: #007700;"></artifactId></span>
<span style="color: #007700;"><version></span>3.8.1<span style="color: #007700;"></version></span>
<span style="color: #007700;"><scope></span>test<span style="color: #007700;"></scope></span>
<span style="color: #007700;"></dependency></span>
</pre>
</div>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-58681614413662382082017-01-18T15:12:00.004-08:002017-03-17T17:15:20.562-07:00<div dir="ltr" style="text-align: left;" trbidi="on">
<div class="challenge_problem_statement" style="background-color: white; border: 0px; color: #39424e; font-family: "Whitney SSm A", "Whitney SSm B", Avenir, "Segoe UI", Ubuntu, "Helvetica Neue", Helvetica, Arial, sans-serif; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_problem_statement_body" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="hackdown-content" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">An Interesting problem on HackerRank!</span></b></div>
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;"><b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">Objective</b> <br style="word-break: break-word; word-wrap: break-word;" />Building on our knowledge of <i style="border: 0px; font-family: inherit; font-stretch: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">Arrays</i> by adding another dimension. </span></div>
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;"><b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">Context</b> <br style="word-break: break-word; word-wrap: break-word;" />Given a <span class="MathJax_SVG" id="MathJax-Element-1-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 2223.9 936.9" width="5.165ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z" stroke-width="1"></path><g transform="translate(722,0)"><path d="M630 29Q630 9 609 9Q604 9 587 25T493 118L389 222L284 117Q178 13 175 11Q171 9 168 9Q160 9 154 15T147 29Q147 36 161 51T255 146L359 250L255 354Q174 435 161 449T147 471Q147 480 153 485T168 490Q173 490 175 489Q178 487 284 383L389 278L493 382Q570 459 587 475T609 491Q630 491 630 471Q630 464 620 453T522 355L418 250L522 145Q606 61 618 48T630 29Z" stroke-width="1"></path></g><g transform="translate(1723,0)"><path d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z" stroke-width="1"></path></g></g></svg></span> <i style="border: 0px; font-family: inherit; font-stretch: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">2D Array</i>, <span class="MathJax_SVG" id="MathJax-Element-2-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>:</span></div>
<pre style="background: rgb(244, 250, 255); border-radius: 2px; border: 0px; color: #454c59; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: 18px; margin-bottom: 9px; margin-top: 12px; outline: 0px; padding: 20px; vertical-align: baseline; white-space: pre-wrap; word-break: break-word; word-wrap: break-word;"><code style="background: transparent; border-radius: 3px; border: 0px; color: #454c5f; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">1 1 1 0 0 0
0 1 0 0 0 0
1 1 1 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0
</span></code></pre>
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;">We define an hourglass in <span class="MathJax_SVG" id="MathJax-Element-3-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span> to be a subset of values with indices falling in this pattern in <span class="MathJax_SVG" id="MathJax-Element-4-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>'s graphical representation:</span></div>
<pre style="background: rgb(244, 250, 255); border-radius: 2px; border: 0px; color: #454c59; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: 18px; margin-bottom: 9px; margin-top: 12px; outline: 0px; padding: 20px; vertical-align: baseline; white-space: pre-wrap; word-break: break-word; word-wrap: break-word;"><code style="background: transparent; border-radius: 3px; border: 0px; color: #454c5f; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">a b c
d
e f g
</span></code></pre>
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;">There are <span class="MathJax_SVG" id="MathJax-Element-5-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 1001 936.9" width="2.325ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z" stroke-width="1"></path><path d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z" stroke-width="1" transform="translate(500,0)"></path></g></svg></span> hourglasses in <span class="MathJax_SVG" id="MathJax-Element-6-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>, and an <i style="border: 0px; font-family: inherit; font-stretch: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">hourglass sum</i> is the sum of an hourglass' values.</span></div>
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;"><b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">Task</b> <br style="word-break: break-word; word-wrap: break-word;" />Calculate the hourglass sum for every hourglass in <span class="MathJax_SVG" id="MathJax-Element-7-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>, then print the <i style="border: 0px; font-family: inherit; font-stretch: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">maximum</i> hourglass sum.</span></div>
</div>
</div>
</div>
<div class="challenge_input_format" style="background-color: white; border: 0px; color: #39424e; font-family: "Whitney SSm A", "Whitney SSm B", Avenir, "Segoe UI", Ubuntu, "Helvetica Neue", Helvetica, Arial, sans-serif; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_input_format_title" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">Input Format</span></b></div>
</div>
<div class="msB challenge_input_format_body" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="hackdown-content" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;">There are <span class="MathJax_SVG" id="MathJax-Element-1-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 500.5 936.9" width="1.162ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z" stroke-width="1"></path></g></svg></span> lines of input, where each line contains <span class="MathJax_SVG" id="MathJax-Element-2-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 500.5 936.9" width="1.162ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z" stroke-width="1"></path></g></svg></span> space-separated integers describing <i style="border: 0px; font-family: inherit; font-stretch: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">2D Array</i> <span class="MathJax_SVG" id="MathJax-Element-3-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>; every value in <span class="MathJax_SVG" id="MathJax-Element-4-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span> will be in the inclusive range of <span class="MathJax_SVG" id="MathJax-Element-5-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 1279 936.9" width="2.971ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z" stroke-width="1"></path><g transform="translate(778,0)"><path d="M352 287Q304 211 232 211Q154 211 104 270T44 396Q42 412 42 436V444Q42 537 111 606Q171 666 243 666Q245 666 249 666T257 665H261Q273 665 286 663T323 651T370 619T413 560Q456 472 456 334Q456 194 396 97Q361 41 312 10T208 -22Q147 -22 108 7T68 93T121 149Q143 149 158 135T173 96Q173 78 164 65T148 49T135 44L131 43Q131 41 138 37T164 27T206 22H212Q272 22 313 86Q352 142 352 280V287ZM244 248Q292 248 321 297T351 430Q351 508 343 542Q341 552 337 562T323 588T293 615T246 625Q208 625 181 598Q160 576 154 546T147 441Q147 358 152 329T172 282Q197 248 244 248Z" stroke-width="1"></path></g></g></svg></span> to <span class="MathJax_SVG" id="MathJax-Element-6-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 500.5 936.9" width="1.162ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M352 287Q304 211 232 211Q154 211 104 270T44 396Q42 412 42 436V444Q42 537 111 606Q171 666 243 666Q245 666 249 666T257 665H261Q273 665 286 663T323 651T370 619T413 560Q456 472 456 334Q456 194 396 97Q361 41 312 10T208 -22Q147 -22 108 7T68 93T121 149Q143 149 158 135T173 96Q173 78 164 65T148 49T135 44L131 43Q131 41 138 37T164 27T206 22H212Q272 22 313 86Q352 142 352 280V287ZM244 248Q292 248 321 297T351 430Q351 508 343 542Q341 552 337 562T323 588T293 615T246 625Q208 625 181 598Q160 576 154 546T147 441Q147 358 152 329T172 282Q197 248 244 248Z" stroke-width="1"></path></g></svg></span>.</span></div>
</div>
</div>
</div>
<div class="challenge_constraints" style="background-color: white; border: 0px; color: #39424e; font-family: "Whitney SSm A", "Whitney SSm B", Avenir, "Segoe UI", Ubuntu, "Helvetica Neue", Helvetica, Arial, sans-serif; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_constraints_title" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">Output Format</span></b></div>
</div>
</div>
<div class="challenge_output_format" style="background-color: white; border: 0px; color: #39424e; font-family: "Whitney SSm A", "Whitney SSm B", Avenir, "Segoe UI", Ubuntu, "Helvetica Neue", Helvetica, Arial, sans-serif; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_output_format_body" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="hackdown-content" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; margin-top: 12px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<span style="font-size: xx-small;">Print the largest (maximum) hourglass sum found in <span class="MathJax_SVG" id="MathJax-Element-1-Frame" style="border: 0px; direction: ltr; display: inline-block; float: none; font-family: inherit; font-stretch: inherit; font-variant: inherit; line-height: normal; margin: 0px; max-height: none; max-width: none; min-height: 0px; min-width: 0px; outline: 0px; padding: 0px; vertical-align: baseline; white-space: nowrap; word-spacing: normal; word-wrap: normal;"><svg focusable="false" height="2.176ex" role="img" style="vertical-align: -0.338ex;" viewbox="0 -791.3 750.5 936.9" width="1.743ex" xmlns:xlink="http://www.w3.org/1999/xlink"><g fill="currentColor" stroke-width="0" stroke="currentColor" transform="matrix(1 0 0 -1 0 0)"><path d="M208 74Q208 50 254 46Q272 46 272 35Q272 34 270 22Q267 8 264 4T251 0Q249 0 239 0T205 1T141 2Q70 2 50 0H42Q35 7 35 11Q37 38 48 46H62Q132 49 164 96Q170 102 345 401T523 704Q530 716 547 716H555H572Q578 707 578 706L606 383Q634 60 636 57Q641 46 701 46Q726 46 726 36Q726 34 723 22Q720 7 718 4T704 0Q701 0 690 0T651 1T578 2Q484 2 455 0H443Q437 6 437 9T439 27Q443 40 445 43L449 46H469Q523 49 533 63L521 213H283L249 155Q208 86 208 74ZM516 260Q516 271 504 416T490 562L463 519Q447 492 400 412L310 260L413 259Q516 259 516 260Z" stroke-width="1"></path></g></svg></span>.</span></div>
</div>
</div>
</div>
<div class="challenge_sample_input" style="background-color: white; border: 0px; color: #39424e; font-family: "Whitney SSm A", "Whitney SSm B", Avenir, "Segoe UI", Ubuntu, "Helvetica Neue", Helvetica, Arial, sans-serif; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_sample_input_title" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="border: 0px; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: 1.5em; margin-bottom: 1em; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">Sample Input</span></b></div>
</div>
<div class="msB challenge_sample_input_body" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="hackdown-content" style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<pre style="background: rgb(244, 250, 255); border-radius: 2px; border: 0px; color: #454c59; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: 18px; margin-bottom: 9px; margin-top: 12px; outline: 0px; padding: 20px; vertical-align: baseline; white-space: pre-wrap; word-break: break-word; word-wrap: break-word;"><code style="background: transparent; border-radius: 3px; border: 0px; color: #454c5f; font-family: Menlo, Monaco, Consolas, "Courier New", monospace; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">1 1 1 0 0 0
0 1 0 0 0 0
1 1 1 0 0 0
0 0 2 4 4 0
0 0 0 2 0 0
0 0 1 2 4 0
</span></code></pre>
</div>
</div>
</div>
<div class="challenge_sample_output" style="border: 0px; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div class="msB challenge_sample_output_title" style="border: 0px; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px 0px 10px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<div style="background-color: white; border: 0px; color: #39424e; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; line-height: 1.5em; margin-bottom: 1em; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="font-size: xx-small;">Sample Output</span></b></div>
<div class="msB challenge_sample_output_title" style="border: 0px; font-stretch: inherit; font-variant-numeric: inherit; line-height: inherit; margin: 0px 0px 10px; orphans: 2; outline: 0px; padding: 0px; text-align: left; text-indent: 0px; vertical-align: baseline; widows: 2; word-break: break-word; word-wrap: break-word;">
<div style="-webkit-text-stroke-width: 0px; background-color: white; border: 0px; color: #39424e; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; font-weight: inherit; letter-spacing: normal; line-height: 1.5em; margin: 0px 0px 1em; outline: 0px; padding: 0px; text-transform: none; vertical-align: baseline; white-space: normal; word-break: break-word; word-spacing: 0px; word-wrap: break-word;">
<b style="border: 0px; font-family: inherit; font-stretch: inherit; font-style: inherit; font-variant: inherit; line-height: inherit; margin: 0px; outline: 0px; padding: 0px; vertical-align: baseline; word-break: break-word; word-wrap: break-word;"><span style="background-color: #f4faff; color: #454c5f; font-family: "menlo" , "monaco" , "consolas" , "courier new" , monospace; font-weight: normal; white-space: pre-wrap;"><span style="font-size: xx-small;">19</span></span></b></div>
<div style="-webkit-text-stroke-width: 0px; background-color: white; border: 0px; color: #39424e; font-family: inherit; font-size: 16px; font-stretch: inherit; font-style: inherit; font-variant: inherit; letter-spacing: normal; line-height: 1.5em; margin: 0px 0px 1em; outline: 0px; padding: 0px; text-transform: none; vertical-align: baseline; white-space: normal; word-break: break-word; word-spacing: 0px; word-wrap: break-word;">
<b><u>Solution</u></b><br />
<br />
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"> <span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.ArrayList</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Scanner</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">class</span> <span style="color: #bb0066; font-weight: bold;">TwoDimArrays</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">void</span> <span style="color: #0066bb; font-weight: bold;">main</span><span style="color: #333333;">(</span>String<span style="color: #333333;">[]</span> args<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
Integer<span style="color: #333333;">[][]</span> arr <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> Integer<span style="color: #333333;">[</span><span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">][</span><span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">];</span>
Scanner in <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> Scanner<span style="color: #333333;">(</span>System<span style="color: #333333;">.</span><span style="color: #0000cc;">in</span><span style="color: #333333;">);</span>
<span style="color: #888888;">// reading elements in to array </span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">;</span> i<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> j <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> j <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">;</span> j<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">if</span> <span style="color: #333333;">(</span>in<span style="color: #333333;">.</span><span style="color: #0000cc;">hasNext</span><span style="color: #333333;">())</span> <span style="color: #333333;">{</span>
arr<span style="color: #333333;">[</span>i<span style="color: #333333;">][</span>j<span style="color: #333333;">]</span> <span style="color: #333333;">=</span> in<span style="color: #333333;">.</span><span style="color: #0000cc;">nextInt</span><span style="color: #333333;">();</span>
<span style="color: #333333;">}</span> <span style="color: #008800; font-weight: bold;">else</span> <span style="color: #333333;">{</span>
arr<span style="color: #333333;">[</span>i<span style="color: #333333;">][</span>j<span style="color: #333333;">]</span> <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">null</span><span style="color: #333333;">;</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">print</span><span style="color: #333333;">(</span>getMax<span style="color: #333333;">(</span>findingHourGlass<span style="color: #333333;">(</span>arr<span style="color: #333333;">)));</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> ArrayList <span style="color: #0066bb; font-weight: bold;">findingHourGlass</span><span style="color: #333333;">(</span>Integer<span style="color: #333333;">[][]</span> arr<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
ArrayList al<span style="color: #333333;">;</span>
ArrayList countList <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> ArrayList<span style="color: #333333;"><>();</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">;</span> i<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> j <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> j <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">4</span><span style="color: #333333;">;</span> j<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
al <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> ArrayList<span style="color: #333333;"><>();</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> p <span style="color: #333333;">=</span> i<span style="color: #333333;">;</span> p <span style="color: #333333;"><</span> i <span style="color: #333333;">+</span> <span style="color: #0000dd; font-weight: bold;">3</span> <span style="color: #333333;">&&</span> <span style="color: #333333;">(</span>i <span style="color: #333333;">+</span> <span style="color: #0000dd; font-weight: bold;">3</span><span style="color: #333333;">)</span> <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">7</span><span style="color: #333333;">;</span> p<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> q <span style="color: #333333;">=</span> j<span style="color: #333333;">;</span> q <span style="color: #333333;"><</span> j <span style="color: #333333;">+</span> <span style="color: #0000dd; font-weight: bold;">3</span> <span style="color: #333333;">&&</span> <span style="color: #333333;">(</span>j <span style="color: #333333;">+</span> <span style="color: #0000dd; font-weight: bold;">3</span><span style="color: #333333;">)</span> <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">7</span><span style="color: #333333;">;</span> q<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">if</span> <span style="color: #333333;">(</span>arr<span style="color: #333333;">[</span>p<span style="color: #333333;">][</span>q<span style="color: #333333;">]</span> <span style="color: #333333;">!=</span> <span style="color: #008800; font-weight: bold;">null</span><span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
<span style="color: #888888;">// System.out.printf("%3d", arr[p][q]); </span>
al<span style="color: #333333;">.</span><span style="color: #0000cc;">add</span><span style="color: #333333;">(</span>arr<span style="color: #333333;">[</span>p<span style="color: #333333;">][</span>q<span style="color: #333333;">]);</span>
<span style="color: #888888;">// System.out.print(" </span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span> <span style="color: #888888;">// q-loop </span>
<span style="color: #333333;">}</span> <span style="color: #888888;">// p-loop </span>
Integer count <span style="color: #333333;">=</span> countHourGlass<span style="color: #333333;">(</span>al<span style="color: #333333;">);</span> countList<span style="color: #333333;">.</span><span style="color: #0000cc;">add</span><span style="color: #333333;">(</span>count<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">return</span> countList<span style="color: #333333;">;</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> Integer <span style="color: #0066bb; font-weight: bold;">countHourGlass</span><span style="color: #333333;">(</span>ArrayList al<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
Integer sum <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #333333;"><</span> al<span style="color: #333333;">.</span><span style="color: #0000cc;">size</span><span style="color: #333333;">();</span> i<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">if</span> <span style="color: #333333;">(</span>i <span style="color: #333333;">!=</span> <span style="color: #0000dd; font-weight: bold;">3</span> <span style="color: #333333;">&&</span> i <span style="color: #333333;">!=</span> <span style="color: #0000dd; font-weight: bold;">5</span><span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
sum <span style="color: #333333;">=</span> sum <span style="color: #333333;">+</span> al<span style="color: #333333;">.</span><span style="color: #0000cc;">get</span><span style="color: #333333;">(</span>i<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">return</span> sum<span style="color: #333333;">;</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">void</span> <span style="color: #0066bb; font-weight: bold;">printGrid</span><span style="color: #333333;">(</span>Integer<span style="color: #333333;">[][]</span> arr<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">;</span> i<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> j <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> j <span style="color: #333333;"><</span> <span style="color: #0000dd; font-weight: bold;">6</span><span style="color: #333333;">;</span> j<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">printf</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"%3d"</span><span style="color: #333333;">,</span> arr<span style="color: #333333;">[</span>i<span style="color: #333333;">][</span>j<span style="color: #333333;">]);</span><span style="color: #888888;">// + </span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">print</span><span style="color: #333333;">(</span><span style="background-color: #ffaaaa; color: red;">"</span>
<span style="color: #333333;">}</span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">println</span><span style="color: #333333;">();</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">int</span> <span style="color: #0066bb; font-weight: bold;">getMax</span><span style="color: #333333;">(</span>ArrayList list<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
<span style="color: #333399; font-weight: bold;">int</span> max <span style="color: #333333;">=</span> Integer<span style="color: #333333;">.</span><span style="color: #0000cc;">MIN_VALUE</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #333333;"><</span> list<span style="color: #333333;">.</span><span style="color: #0000cc;">size</span><span style="color: #333333;">();</span> i<span style="color: #333333;">++)</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">if</span> <span style="color: #333333;">(</span>list<span style="color: #333333;">.</span><span style="color: #0000cc;">get</span><span style="color: #333333;">(</span>i<span style="color: #333333;">)</span> <span style="color: #333333;">></span> max<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
max <span style="color: #333333;">=</span> <span style="color: #333333;">(</span>Integer<span style="color: #333333;">)</span> list<span style="color: #333333;">.</span><span style="color: #0000cc;">get</span><span style="color: #333333;">(</span>i<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">return</span> max<span style="color: #333333;">;</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span></pre>
<br /></div>
</div>
</div>
</div>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-90381343700360824562017-01-16T01:37:00.000-08:002017-03-08T09:56:12.329-08:00Getting Max of the ArrayList<div dir="ltr" style="text-align: left;" trbidi="on">
<pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"> <span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">int</span> <span style="color: #0066bb; font-weight: bold;">getMax</span><span style="color: #333333;">(</span>ArrayList list<span style="color: #333333;">){</span>
<span style="color: #333399; font-weight: bold;">int</span> max <span style="color: #333333;">=</span> Integer<span style="color: #333333;">.</span><span style="color: #0000cc;">MIN_VALUE</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">for</span><span style="color: #333333;">(</span><span style="color: #333399; font-weight: bold;">int</span> i<span style="color: #333333;">=</span><span style="color: #0000dd; font-weight: bold;">0</span><span style="color: #333333;">;</span> i <span style="color: #008800; font-weight: bold;">if</span><span style="color: #333333;">(</span>list<span style="color: #333333;">.</span><span style="color: #0000cc;">get</span><span style="color: #333333;">(</span>i<span style="color: #333333;">)</span> <span style="color: #333333;">></span> max<span style="color: #333333;">){</span>
max <span style="color: #333333;">=</span> <span style="color: #333333;">(</span>Integer<span style="color: #333333;">)</span> list<span style="color: #333333;">.</span><span style="color: #0000cc;">get</span><span style="color: #333333;">(</span>i<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">return</span> max<span style="color: #333333;">;</span>
<span style="color: #333333;">}</span></pre>
<br /></div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-52129674618391866782015-05-11T05:02:00.001-07:002015-05-11T05:02:34.911-07:00Unlike uname , lsb_release -a<p dir="ltr">This command gives OS version of linux flavours.</p>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0tag:blogger.com,1999:blog-3387700777530757377.post-4197418738957082002015-03-19T20:01:00.000-07:002017-03-08T09:53:32.424-08:00Copying List of Files using Java, by SSH Channel and executing linux command(s).<div dir="ltr" style="text-align: left;" trbidi="on">
<br />
Hello,<br />
<br />
The below code helps you in transferring your local files to remote linux machine using SSH channel, without using Putty/WinSCP 's. <br />
<br />
Note 1: I have tested this code on flat files only.<br />
Note 2: Place <a href="http://www.java2s.com/Code/Jar/j/Downloadjsch0142jar.htm" rel="nofollow" target="_blank">jsch-0.1.42.jar </a>in your build path.<br />
<br />
<br /><pre style="-webkit-text-stroke-width: 0px; color: #333333; font-style: normal; font-variant-caps: normal; font-variant-ligatures: normal; font-weight: normal; letter-spacing: normal; line-height: 16.25px; margin: 0px; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; widows: 2; word-spacing: 0px;"><span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.BufferedReader</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.File</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.IOException</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.io.InputStreamReader</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.ArrayList</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">java.util.Properties</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.Channel</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.ChannelExec</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.ChannelSftp</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.JSch</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.JSchException</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.Session</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">import</span> <span style="color: #0e84b5; font-weight: bold;">com.jcraft.jsch.SftpException</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">class</span> <span style="color: #bb0066; font-weight: bold;">TransferFilesViaSSH</span> <span style="color: #333333;">{</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> String userNameRmtMachine <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">"xxxxxxxx"</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> String passwordRmtMachine <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">"xxxxxxxx"</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> String hostNumRmtMachine <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">"192.168.1.5"</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> Integer sshPortNumRmtMachine <span style="color: #333333;">=</span> <span style="color: #0000dd; font-weight: bold;">22</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> String srcFilesForTransfr <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">"D:\\flat_files\\"</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #008800; font-weight: bold;">final</span> String tgtPathRmtMachine <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">"/home/xxxxxx/xxxxxx/"</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">public</span> <span style="color: #008800; font-weight: bold;">static</span> <span style="color: #333399; font-weight: bold;">void</span> <span style="color: #0066bb; font-weight: bold;">main</span><span style="color: #333333;">(</span>String<span style="color: #333333;">[]</span> args<span style="color: #333333;">)</span> <span style="color: #008800; font-weight: bold;">throws</span> IOException <span style="color: #333333;">{</span>
String srcFilesFullPath <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">""</span><span style="color: #333333;">;</span>
String targetFilesLocation <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">""</span><span style="color: #333333;">;</span>
JSch javaSSHChannel <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> JSch<span style="color: #333333;">();</span>
<span style="color: #008800; font-weight: bold;">try</span> <span style="color: #333333;">{</span>
Session sshSession <span style="color: #333333;">=</span> javaSSHChannel<span style="color: #333333;">.</span><span style="color: #0000cc;">getSession</span><span style="color: #333333;">(</span>userNameRmtMachine<span style="color: #333333;">,</span>
hostNumRmtMachine<span style="color: #333333;">,</span> sshPortNumRmtMachine<span style="color: #333333;">);</span>
sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">setPassword</span><span style="color: #333333;">(</span>passwordRmtMachine<span style="color: #333333;">);</span>
Properties configProperties <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> Properties<span style="color: #333333;">();</span>
configProperties<span style="color: #333333;">.</span><span style="color: #0000cc;">put</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"StrictHostKeyChecking"</span><span style="color: #333333;">,</span> <span style="background-color: #fff0f0;">"no"</span><span style="color: #333333;">);</span>
sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">setConfig</span><span style="color: #333333;">(</span>configProperties<span style="color: #333333;">);</span>
sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">connect</span><span style="color: #333333;">();</span>
<span style="color: #888888;">// Channels opening</span>
Channel openSftpChannel <span style="color: #333333;">=</span> sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">openChannel</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"sftp"</span><span style="color: #333333;">);</span><span style="color: #888888;">// channel sftp connection</span>
Channel openExecChannel <span style="color: #333333;">=</span> sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">openChannel</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"exec"</span><span style="color: #333333;">);</span><span style="color: #888888;">// channel for Executing your command</span>
ChannelSftp sftpChannel <span style="color: #333333;">=</span> <span style="color: #333333;">(</span>ChannelSftp<span style="color: #333333;">)</span> openSftpChannel<span style="color: #333333;">;</span>
sftpChannel<span style="color: #333333;">.</span><span style="color: #0000cc;">connect</span><span style="color: #333333;">();</span>
<span style="color: #888888;">// copying source files</span>
File srcFilesFolder <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> File<span style="color: #333333;">(</span>srcFilesForTransfr<span style="color: #333333;">);</span>
ArrayList listOfFiles <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> ArrayList<span style="color: #333333;">();</span>
listOfFiles <span style="color: #333333;">=</span> filesListFromFolder<span style="color: #333333;">(</span>srcFilesFolder<span style="color: #333333;">);</span>
<span style="color: #008800; font-weight: bold;">for</span> <span style="color: #333333;">(</span>String fileName <span style="color: #333333;">:</span> listOfFiles<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
srcFilesFullPath <span style="color: #333333;">=</span> srcFilesForTransfr <span style="color: #333333;">+</span> fileName<span style="color: #333333;">;</span>
targetFilesLocation <span style="color: #333333;">=</span> tgtPathRmtMachine <span style="color: #333333;">+</span> fileName<span style="color: #333333;">;</span>
sftpChannel<span style="color: #333333;">.</span><span style="color: #0000cc;">put</span><span style="color: #333333;">(</span>srcFilesFullPath<span style="color: #333333;">,</span> targetFilesLocation<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
sftpChannel<span style="color: #333333;">.</span><span style="color: #0000cc;">disconnect</span><span style="color: #333333;">();</span>
<span style="color: #888888;">// Command Executor</span>
ChannelExec channelExec <span style="color: #333333;">=</span> <span style="color: #333333;">(</span>ChannelExec<span style="color: #333333;">)</span> openExecChannel<span style="color: #333333;">;</span>
channelExec<span style="color: #333333;">.</span><span style="color: #0000cc;">setCommand</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"ls -l "</span> <span style="color: #333333;">+</span> tgtPathRmtMachine<span style="color: #333333;">);</span>
channelExec<span style="color: #333333;">.</span><span style="color: #0000cc;">setErrStream</span><span style="color: #333333;">(</span>System<span style="color: #333333;">.</span><span style="color: #0000cc;">err</span><span style="color: #333333;">);</span>
channelExec<span style="color: #333333;">.</span><span style="color: #0000cc;">connect</span><span style="color: #333333;">();</span>
BufferedReader bufRdr <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> BufferedReader<span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">new</span> InputStreamReader<span style="color: #333333;">(</span>channelExec<span style="color: #333333;">.</span><span style="color: #0000cc;">getInputStream</span><span style="color: #333333;">()));</span>
String line <span style="color: #333333;">=</span> <span style="background-color: #fff0f0;">""</span><span style="color: #333333;">;</span>
<span style="color: #008800; font-weight: bold;">while</span><span style="color: #333333;">((</span>line <span style="color: #333333;">=</span> bufRdr<span style="color: #333333;">.</span><span style="color: #0000cc;">readLine</span><span style="color: #333333;">())</span> <span style="color: #333333;">!=</span> <span style="color: #008800; font-weight: bold;">null</span><span style="color: #333333;">){</span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">println</span><span style="color: #333333;">(</span>line<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span>
channelExec<span style="color: #333333;">.</span><span style="color: #0000cc;">disconnect</span><span style="color: #333333;">();</span>
sshSession<span style="color: #333333;">.</span><span style="color: #0000cc;">disconnect</span><span style="color: #333333;">();</span>
<span style="color: #333333;">}</span> <span style="color: #008800; font-weight: bold;">catch</span> <span style="color: #333333;">(</span>JSchException e<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
e<span style="color: #333333;">.</span><span style="color: #0000cc;">printStackTrace</span><span style="color: #333333;">();</span>
<span style="color: #333333;">}</span> <span style="color: #008800; font-weight: bold;">catch</span> <span style="color: #333333;">(</span>SftpException e<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
e<span style="color: #333333;">.</span><span style="color: #0000cc;">printStackTrace</span><span style="color: #333333;">();</span>
<span style="color: #333333;">}</span>
System<span style="color: #333333;">.</span><span style="color: #0000cc;">out</span><span style="color: #333333;">.</span><span style="color: #0000cc;">println</span><span style="color: #333333;">(</span><span style="background-color: #fff0f0;">"Test"</span><span style="color: #333333;">);</span>
<span style="color: #333333;">}</span> <span style="color: #888888;">// end of main method</span>
<span style="color: #008800; font-weight: bold;">private</span> <span style="color: #008800; font-weight: bold;">static</span> ArrayList <span style="color: #0066bb; font-weight: bold;">filesListFromFolder</span><span style="color: #333333;">(</span>File srcFilesFolder<span style="color: #333333;">)</span> <span style="color: #333333;">{</span>
ArrayList fileList <span style="color: #333333;">=</span> <span style="color: #008800; font-weight: bold;">new</span> ArrayList<span style="color: #333333;">();</span>
<span style="color: #008800; font-weight: bold;">for</span><span style="color: #333333;">(</span><span style="color: #008800; font-weight: bold;">final</span> File fileEntry <span style="color: #333333;">:</span> srcFilesFolder<span style="color: #333333;">.</span><span style="color: #0000cc;">listFiles</span><span style="color: #333333;">()){</span>
<span style="color: #008800; font-weight: bold;">if</span><span style="color: #333333;">(</span>fileEntry<span style="color: #333333;">.</span><span style="color: #0000cc;">isDirectory</span><span style="color: #333333;">()){</span>
filesListFromFolder<span style="color: #333333;">(</span>fileEntry<span style="color: #333333;">);</span>
<span style="color: #333333;">}</span><span style="color: #008800; font-weight: bold;">else</span> <span style="color: #333333;">{</span>
fileList<span style="color: #333333;">.</span><span style="color: #0000cc;">add</span><span style="color: #333333;">(</span>fileEntry<span style="color: #333333;">.</span><span style="color: #0000cc;">getName</span><span style="color: #333333;">());</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span>
<span style="color: #008800; font-weight: bold;">return</span> fileList<span style="color: #333333;">;</span>
<span style="color: #333333;">}</span>
<span style="color: #333333;">}</span> <span style="color: #888888;">// end of class</span></pre>
</div>
Naveen Kumarhttp://www.blogger.com/profile/04291894728506693191noreply@blogger.com0