ITechShree-Data-Analytics-Technologies

SQOOP Import using JAVA example





First of all download the hadoop common jar and cloudera sqoop jars, sqoop jdbc driver for rdbms  and add it to your build path.



package java.sqoop;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.tool.ImportTool;

import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Calendar;
import java.util.Date;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class Java_Sqoop_Import
{
private static Connection Con;

private static int i = 0;
private static int c = 0;
private static String Location = "";

private static SqoopOptions SqoopOptions = new SqoopOptions();

private static void setUpConnection() throws IOException, SQLException
{
Calendar now = Calendar.getInstance();

Date current_date= now.getTime();
String string_date = current_date.toString();

String month = current_date.substring(4, 7);
String day = current_date.substring(8, 10);
Date system_date = now.getTime();
Location = "/<hdfs_path_destination>/" + month + "/day" + day + "_";

Configuration config = new Configuration();

FileSystem fs = null;
FSDataOutputStream out = null;
InputStream in = null;
try {
fs = FileSystem.get(config);
Path desnation_path_Obj = new Path(Location);
String str = Location;
for (i = 0; i <= 4; i += 1) {
str = Location + i;
desnation_path_Obj= new Path(str);
if (!fs.exists(desnation_path_Obj))
{
Location = str;
System.out.println("Target directory in hdfs : " + Location);
break;
}

}

}
catch (Exception e)
{
e.printStackTrace();
}
finally {
if (in != null)
in.close();
if (out != null)
out.close();
if (fs != null)
fs.close();
}
config.addResource(new Path("/hadoop_installation_folder/hadoop-2.4.0/etc/hadoop/core-site.xml"));
config.addResource(new Path("/hadoop_installation_folder/hadoop-2.4.0/etc/hadoop/hdfs-site.xml"));



SqoopOptions.setConnectString(jdbc:postgresql://<ip address:port number>/<database name);
SqoopOptions.setUsername(username);
SqoopOptions.setPassword(password);
SqoopOptions.setNumMappers(1);

SqoopOptions.setSqlQuery("select * from testdb.employee_test_table ");


SqoopOptions.setConf(config);
SqoopOptions.setTargetDir(Location);
SqoopOptions.setHadoopMapRedHome("/hadoop_installation_folder/hadoop-2.4.0/etc/hadoop/");
SqoopOptions.setExplicitInputDelims(true);
SqoopOptions.setExplicitOutputDelims(true);
SqoopOptions.setHiveDropDelims(true);
SqoopOptions.setFieldsTerminatedBy('\t');
SqoopOptions.setLinesTerminatedBy('\n');

}

private static int runSqoopImport()
{
int res = new ImportTool().run(SqoopOptions);

if (res != 0) {
throw new RuntimeException("Sqoop API failure  error - please return code : " + Integer.toString(res));
}
return res;
}

public static void main(String[] args)
throws Exception
{
try
{

setUpConnection();

runSqoopImport();


}
catch (IOException e)
{
e.printStackTrace();
}
catch (SQLException e) {
e.printStackTrace();
}
}}

Now create jar file for this code and run jar in hadoop cluster .
 Data  from employee table will be imported to hdfs destination directory.


For security purpose  you could make a properties file keeping rdbms connection url, username and password and read the properties file in the code and then set to variable to SqoopOptions instead of assigning directly.


Now you get the idea how Sqoop import command works using JAVA.
Please try for other sqoop commands also.

Next I shall move to Apache Flume.

See you in my next blog !!

Post a Comment

1 Comments

Please do not enter any spam link in the comment box